Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
0f05495c
Unverified
Commit
0f05495c
authored
Dec 04, 2018
by
Scott Cyphers
Committed by
GitHub
Dec 04, 2018
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' into master
parents
1b5340c4
244c9fcf
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
126 additions
and
282 deletions
+126
-282
external_cldnn.cmake
cmake/external_cldnn.cmake
+1
-1
intelgpu_backend.cpp
src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+39
-1
intelgpu_op_custom_kernels.cpp
src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
+59
-256
intelgpu_op_custom_kernels.hpp
src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
+2
-1
unit_test.manifest
src/ngraph/runtime/intelgpu/unit_test.manifest
+25
-23
No files found.
cmake/external_cldnn.cmake
View file @
0f05495c
...
...
@@ -22,7 +22,7 @@ include(ExternalProject)
#------------------------------------------------------------------------------
set
(
CLDNN_GIT_REPO_URL https://github.com/intel/clDNN.git
)
set
(
CLDNN_GIT_LABEL
df28d2861716cac7a6a9eff4e49e47162959a74
8
)
set
(
CLDNN_GIT_LABEL
02add7c4ce2baa81e2a32fa02d733dcc4f01310
8
)
set
(
BOOST_VERSION 1.64.0
)
set
(
OUT_DIR
${
EXTERNAL_PROJECTS_ROOT
}
/cldnn/out
)
...
...
src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
View file @
0f05495c
...
...
@@ -30,6 +30,7 @@
#include <CPP/crop.hpp>
#include <CPP/data.hpp>
#include <CPP/eltwise.hpp>
#include <CPP/gemm.hpp>
#include <CPP/input_layout.hpp>
#include <CPP/layout.hpp>
#include <CPP/lrn.hpp>
...
...
@@ -675,6 +676,41 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{
arguments_check
(
op
,
2
,
1
);
const
shared_ptr
<
op
::
Dot
>
dot_inst
=
static_pointer_cast
<
op
::
Dot
>
(
op
);
const
size_t
axes_count
=
dot_inst
->
get_reduction_axes_count
();
const
Shape
&
input0_shape
=
get_input_shape
(
op
,
0
);
const
Shape
&
input1_shape
=
get_input_shape
(
op
,
1
);
const
size_t
input0_elem_count
=
shape_size
(
input0_shape
);
const
size_t
input1_elem_count
=
shape_size
(
input1_shape
);
if
(
get_input_type
(
op
)
==
element
::
f32
&&
get_input_type
(
op
,
1
)
==
element
::
f32
&&
get_output_type
(
op
)
==
element
::
f32
&&
input0_elem_count
&&
input1_elem_count
&&
(
axes_count
==
1
)
&&
(
input0_shape
.
size
()
<
3
)
&&
(
input1_shape
.
size
()
<
3
)
&&
!
input0_shape
.
empty
()
&&
!
input1_shape
.
empty
())
{
string
input1_name
=
get_input_name
(
op
,
1
);
// If we have A[5] and B[5] here, in cldnn we have A[1, 1, 1, 5] and B[1, 1, 1, 5]
// it needs to be reshaped into A[1, 1, 1, 5] and B[1, 1, 5, 1]
if
(
!
input0_shape
.
empty
()
&&
(
input1_shape
.
size
()
==
1
))
{
const
string
new_name
=
input1_name
+
"_reshaped"
;
Shape
new_shape
=
input1_shape
;
new_shape
.
push_back
(
1
);
const
cldnn
::
tensor
reshaped_tensor
=
intelgpu_space
::
create_cldnn_tensor
(
new_shape
);
const
cldnn
::
reshape
reshape_op
(
new_name
,
input1_name
,
reshaped_tensor
);
topology
.
add
(
reshape_op
);
input1_name
=
new_name
;
}
const
cldnn
::
gemm
dot_op
(
get_output_name
(
op
),
get_input_name
(
op
,
0
),
input1_name
);
topology
.
add
(
dot_op
);
}
else
{
do_dot_operation
(
topology
,
get_input_name
(
op
,
0
),
get_input_shape
(
op
,
0
),
...
...
@@ -682,7 +718,9 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
get_input_shape
(
op
,
1
),
get_output_name
(
op
),
get_output_shape
(
op
),
get_output_type
(
op
));
get_output_type
(
op
),
axes_count
);
}
break
;
}
case
OP_TYPEID
:
:
MaxPool
:
...
...
src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
View file @
0f05495c
...
...
@@ -306,16 +306,6 @@ static string access_dims_strided(const Shape& dimentions,
return
buffer
;
}
static
void
do_dot_operation_error
(
const
Shape
&
input0_shape
,
const
Shape
&
input1_shape
,
const
Shape
&
output_shape
)
{
throw
invalid_argument
(
"IntelGPU Dot operation. Conbination input0_shape"
+
runtime
::
intelgpu
::
array_dims
(
input0_shape
)
+
", input1_shape"
+
runtime
::
intelgpu
::
array_dims
(
input1_shape
)
+
", output_shape"
+
runtime
::
intelgpu
::
array_dims
(
output_shape
)
+
" is not supported."
);
}
void
runtime
::
intelgpu
::
do_pad_operation
(
cldnn
::
topology
&
topology
,
const
string
&
input_name
,
const
Shape
&
input_shape
,
...
...
@@ -742,82 +732,30 @@ void runtime::intelgpu::do_avg_pool_backprop_operation(cldnn::topology& topology
topology
.
add
(
op_avg_pool_backprop
);
}
static
void
do_1d_scalar_mul
(
codegen
::
CodeWriter
&
writer
,
string
&
entry_point_name
,
const
Shape
&
input0_shape
,
const
Shape
&
input1_shape
,
const
string
&
type_name
)
{
const
size_t
input0_count
=
input0_shape
.
empty
()
?
0
:
shape_size
<
Shape
>
(
input0_shape
);
const
size_t
input1_count
=
input1_shape
.
empty
()
?
0
:
shape_size
<
Shape
>
(
input1_shape
);
const
size_t
output_count
=
max
(
input0_count
,
input1_count
);
entry_point_name
+=
"_do_1d_scalar_mul"
;
writer
<<
"__kernel void "
<<
entry_point_name
<<
"(const __global "
<<
type_name
<<
"* input0"
<<
", const __global "
<<
type_name
<<
"* input1, __global "
<<
type_name
<<
"* output)
\n
"
;
writer
.
block_begin
();
{
writer
<<
"for (uint i1 = 0; i1 < "
<<
output_count
<<
"; ++i1)
\n
"
;
writer
.
block_begin
();
{
writer
<<
"output[i1] = input0["
<<
(
input0_count
>
0
?
"i1"
:
"0"
)
<<
"] * input1["
<<
(
input1_count
>
0
?
"i1"
:
"0"
)
<<
"];
\n
"
;
}
writer
.
block_end
();
}
writer
.
block_end
();
}
static
vector
<
size_t
>
do_2d_2d_mul
(
codegen
::
CodeWriter
&
writer
,
string
&
entry_point_name
,
void
runtime
::
intelgpu
::
do_dot_operation
(
cldnn
::
topology
&
topology
,
const
string
&
input0_name
,
const
Shape
&
input0_shape
,
const
string
&
input1_name
,
const
Shape
&
input1_shape
,
const
string
&
output_name
,
const
Shape
&
output_shape
,
const
string
&
type_name
)
const
element
::
Type
&
output_type
,
size_t
reduction_axes_count
)
{
entry_point_name
+=
"_do_2d_2d_mul"
;
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
output_shape
);
string
entry_point_name
=
"dot_"
+
output_name
;
const
string
type_name
=
get_opencl_type_name
(
output_type
);
const
size_t
input0_axes
=
input0_shape
.
size
()
-
reduction_axes_count
;
size_t
var_idx
=
reduction_axes_count
;
Shape
reduction_shape
;
codegen
::
CodeWriter
writer
;
vector
<
size_t
>
gws
;
runtime
::
intelgpu
::
gen_func_def
(
writer
,
entry_point_name
,
{
2
,
type_name
},
{
input0_shape
,
input1_shape
},
type_name
,
output_shape
);
writer
.
block_begin
();
for
(
auto
it
=
input1_shape
.
begin
();
(
it
!=
input1_shape
.
end
())
&&
(
var_idx
>
0
);
++
it
)
{
// Main loops
gws
=
runtime
::
intelgpu
::
generate_loops
(
writer
,
output_shape
,
true
);
// Inner loop
writer
<<
type_name
<<
" sum = 0;
\n
"
;
writer
<<
"for (uint i2 = 0; i2 < "
<<
input0_shape
.
at
(
1
)
<<
"; ++i2)
\n
"
;
writer
.
block_begin
();
{
writer
<<
"sum += input0[i0][i2] * input1[i2][i1];
\n
"
;
}
writer
.
block_end
();
writer
<<
"output[i0][i1] = sum;
\n
"
;
// Closing brackets for main loops
runtime
::
intelgpu
::
generate_loops
(
writer
,
output_shape
,
false
);
reduction_shape
.
push_back
(
*
it
);
--
var_idx
;
}
writer
.
block_end
();
return
gws
;
}
static
vector
<
size_t
>
do_3d_3d_mul
(
codegen
::
CodeWriter
&
writer
,
string
&
entry_point_name
,
const
Shape
&
input0_shape
,
const
Shape
&
input1_shape
,
const
Shape
&
output_shape
,
const
string
&
type_name
)
{
entry_point_name
+=
"_do_3d_3d_mul"
;
vector
<
size_t
>
gws
;
runtime
::
intelgpu
::
gen_func_def
(
writer
,
entry_point_name
,
...
...
@@ -825,219 +763,84 @@ static vector<size_t> do_3d_3d_mul(codegen::CodeWriter& writer,
{
input0_shape
,
input1_shape
},
type_name
,
output_shape
);
writer
.
block_begin
();
{
// Main loops
gws
=
runtime
::
intelgpu
::
generate_loops
(
writer
,
output_shape
,
true
);
// Inner loop
writer
<<
type_name
<<
" sum = 0;
\n
"
;
writer
<<
"for (uint i4 = 0; i4 < "
<<
input0_shape
.
back
()
<<
"; ++i4)
\n
"
;
writer
.
block_begin
();
{
writer
<<
"sum += input0[i0][i1][i4] * input1[i4][i2][i3];
\n
"
;
}
writer
.
block_end
();
writer
<<
"output[i0][i1][i2][i3] = sum;
\n
"
;
// Closing brackets for main loops
runtime
::
intelgpu
::
generate_loops
(
writer
,
output_shape
,
false
);
}
writer
.
block_end
();
return
gws
;
}
static
vector
<
size_t
>
do_3d_2d_mul
(
codegen
::
CodeWriter
&
writer
,
string
&
entry_point_name
,
const
Shape
&
input0_shape
,
const
Shape
&
input1_shape
,
const
Shape
&
output_shape
,
const
string
&
type_name
)
{
entry_point_name
+=
"_do_3d_2d_mul"
;
vector
<
size_t
>
gws
;
writer
<<
"// reduction_axes_count:"
<<
reduction_axes_count
<<
"
\n
"
<<
"// reduction_shape:"
<<
reduction_shape
<<
"
\n
"
;
runtime
::
intelgpu
::
gen_func_def
(
writer
,
entry_point_name
,
{
2
,
type_name
},
{
input0_shape
,
input1_shape
},
type_name
,
output_shape
);
writer
.
block_begin
();
{
// Main loops
gws
=
runtime
::
intelgpu
::
generate_loops
(
writer
,
output_shape
,
true
);
// Inner loop
writer
<<
type_name
<<
" sum = 0;
\n
"
;
writer
<<
"for (uint i3 = 0; i3 < "
<<
input0_shape
.
back
()
<<
"; ++i3)
\n
"
;
writer
.
block_begin
();
{
writer
<<
"sum += input0[i0][i1][i3] * input1[i3][i2];
\n
"
;
}
writer
.
block_end
();
writer
<<
"output[i0][i1][i2] = sum;
\n
"
;
// Closing brackets for main loops
runtime
::
intelgpu
::
generate_loops
(
writer
,
output_shape
,
false
);
}
writer
.
block_end
();
return
gws
;
}
static
vector
<
size_t
>
do_2d_1d_mul
(
codegen
::
CodeWriter
&
writer
,
string
&
entry_point_name
,
const
Shape
&
input0_shape
,
const
Shape
&
input1_shape
,
const
Shape
&
output_shape
,
const
string
&
type_name
)
{
entry_point_name
+=
"_do_2d_1d_mul"
;
vector
<
size_t
>
gws
;
runtime
::
intelgpu
::
gen_func_def
(
writer
,
entry_point_name
,
{
2
,
type_name
},
{
input0_shape
,
input1_shape
},
type_name
,
output_shape
);
writer
.
block_begin
();
// Reduction loops
var_idx
=
0
;
for
(
auto
const
&
i
:
reduction_shape
)
{
// Main loops
gws
=
runtime
::
intelgpu
::
generate_loops
(
writer
,
output_shape
,
true
);
writer
<<
type_name
<<
" sum = 0;
\n
"
;
// Inner loop
writer
<<
"for (uint i1 = 0; i1 < "
<<
input0_shape
.
at
(
1
)
<<
"; ++i1)
\n
"
;
writer
<<
"for (uint k"
<<
var_idx
<<
" = 0; k"
<<
var_idx
<<
" < "
<<
i
<<
"; ++k"
<<
var_idx
<<
")
\n
"
;
writer
.
block_begin
();
{
writer
<<
"sum += input0[i0][i1] * input1[i1];
\n
"
;
}
writer
.
block_end
();
writer
<<
"output[i0] = sum;
\n
"
;
// Closing brackets for main loops
runtime
::
intelgpu
::
generate_loops
(
writer
,
output_shape
,
false
);
++
var_idx
;
}
writer
.
block_end
();
return
gws
;
}
static
void
do_scalar_scalar_mul
(
codegen
::
CodeWriter
&
writer
,
string
&
entry_point_name
,
const
string
&
type_name
)
{
entry_point_name
+=
"_scalar_scalar_mul"
;
runtime
::
intelgpu
::
gen_func_def
(
writer
,
entry_point_name
,
{
2
,
type_name
},
{{
1
},
{
1
}},
type_name
,
{
1
});
writer
<<
"sum += input0"
;
writer
.
block_begin
();
if
(
input0_shape
.
empty
())
{
writer
<<
"output[0] = input0[0] * input1[0];
\n
"
;
writer
<<
"[0]
"
;
}
writer
.
block_end
();
}
static
void
do_1d_1d_mul
(
codegen
::
CodeWriter
&
writer
,
string
&
entry_point_name
,
const
Shape
&
shape
,
const
string
&
type_name
)
{
if
(
shape
.
size
()
>
1
)
else
{
// main axes indexes
for
(
size_t
i
=
0
;
i
<
input0_axes
;
++
i
)
{
throw
invalid_argument
(
"do_1d_1d_mul: Shape"
+
runtime
::
intelgpu
::
array_dims
(
shape
)
+
" must be 1D"
);
writer
<<
"[i"
<<
i
<<
"]"
;
}
entry_point_name
+=
"_do_1d_1d_mul"
;
runtime
::
intelgpu
::
gen_func_def
(
writer
,
entry_point_name
,
{
2
,
type_name
},
{
2
,
shape
},
type_name
,
{
1
});
writer
.
block_begin
();
{
writer
<<
type_name
<<
" sum = 0;
\n
"
<<
"for (uint i = 0; i < "
<<
shape
.
front
()
<<
"; ++i)
\n
"
;
writer
.
block_begin
();
// reduction axes indexes
for
(
size_t
i
=
0
;
i
<
reduction_shape
.
size
();
++
i
)
{
writer
<<
"sum += input0[i] * input1[i];
\n
"
;
writer
<<
"[k"
<<
i
<<
"]
"
;
}
writer
.
block_end
();
writer
<<
"output[0] = sum;
\n
"
;
}
writer
.
block_end
();
}
void
runtime
::
intelgpu
::
do_dot_operation
(
cldnn
::
topology
&
topology
,
const
string
&
input0_name
,
const
Shape
&
input0_shape
,
const
string
&
input1_name
,
const
Shape
&
input1_shape
,
const
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
)
{
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
output_shape
);
string
entry_point_name
=
"dot_"
+
output_name
;
const
string
type_name
=
get_opencl_type_name
(
output_type
);
codegen
::
CodeWriter
writer
;
vector
<
size_t
>
gws
=
{
1
};
const
bool
is_input0_scalar
=
input0_shape
.
empty
();
const
bool
is_input1_scalar
=
input1_shape
.
empty
();
const
bool
is_output_scalar
=
output_shape
.
empty
();
// operation
writer
<<
" * input1"
;
if
(
is_input0_scalar
&&
is_input1_scalar
&&
is_output_scalar
)
{
do_scalar_scalar_mul
(
writer
,
entry_point_name
,
type_name
);
}
else
if
(((
is_input0_scalar
&&
!
is_input1_scalar
)
||
(
!
is_input0_scalar
&&
is_input1_scalar
))
&&
!
is_output_scalar
)
if
(
input1_shape
.
empty
())
{
do_1d_scalar_mul
(
writer
,
entry_point_name
,
input0_shape
,
input1_shape
,
type_name
)
;
writer
<<
"[0]"
;
}
else
if
(
!
is_input0_scalar
&&
!
is_input1_scalar
&&
is_output_scalar
)
{
do_1d_1d_mul
(
writer
,
entry_point_name
,
input1_shape
,
type_name
);
}
else
if
(
!
is_input0_scalar
&&
!
is_input1_scalar
&&
!
is_output_scalar
)
{
if
(
input0_shape
.
size
()
==
2
&&
input1_shape
.
size
()
==
1
)
else
{
gws
=
do_2d_1d_mul
(
writer
,
entry_point_name
,
input0_shape
,
input1_shape
,
output_shape
,
type_name
);
}
else
if
(
input0_shape
.
size
()
==
2
&&
input1_shape
.
size
()
==
2
)
// reduction axes indexes
for
(
size_t
i
=
0
;
i
<
reduction_shape
.
size
();
++
i
)
{
gws
=
do_2d_2d_mul
(
writer
,
entry_point_name
,
input0_shape
,
input1_shape
,
output_shape
,
type_name
);
writer
<<
"[k"
<<
i
<<
"]"
;
}
else
if
(
input0_shape
.
size
()
==
3
&&
input1_shape
.
size
()
==
3
)
// main axes indexes
for
(
size_t
i
=
input0_axes
;
i
<
output_shape
.
size
();
++
i
)
{
gws
=
do_3d_3d_mul
(
writer
,
entry_point_name
,
input0_shape
,
input1_shape
,
output_shape
,
type_name
);
writer
<<
"[i"
<<
i
<<
"]"
;
}
else
if
(
input0_shape
.
size
()
==
3
&&
input1_shape
.
size
()
==
2
)
{
gws
=
do_3d_2d_mul
(
writer
,
entry_point_name
,
input0_shape
,
input1_shape
,
output_shape
,
type_name
);
}
else
writer
<<
";
\n
"
;
// Closing brackets for reduction loops
for
(
auto
const
&
i
:
reduction_shape
)
{
do_dot_operation_error
(
input0_shape
,
input1_shape
,
output_shape
);
}
writer
.
block_end
();
}
else
{
do_dot_operation_error
(
input0_shape
,
input1_shape
,
output_shape
);
writer
<<
"output"
<<
runtime
::
intelgpu
::
access_dims
(
output_shape
)
<<
" = sum;
\n
"
;
// Closing brackets for main loops
runtime
::
intelgpu
::
generate_loops
(
writer
,
output_shape
,
false
);
}
writer
.
block_end
();
const
cldnn
::
custom_gpu_primitive
op_dot
(
output_name
,
{
input0_name
,
input1_name
},
...
...
src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
View file @
0f05495c
...
...
@@ -72,7 +72,8 @@ namespace ngraph
const
Shape
&
inputB_shape
,
const
std
::
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
);
const
element
::
Type
&
output_type
,
size_t
reduction_axes_count
);
void
do_slice_operation
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
input_name
,
...
...
src/ngraph/runtime/intelgpu/unit_test.manifest
View file @
0f05495c
avg_pool_2d_2channel_2image_padded_only_above_do_not_include_in_computation
avg_pool_2d_2channel_2image_padded_only_above_include_in_computation
avg_pool_3d_uneven_strided_padded
backwards_batch_norm_three_outputs
backwards_batch_norm_training
backwards_dot_scalar_tensor
backwards_dot_tensor3_tensor3
...
...
@@ -9,7 +10,6 @@ backwards_dot_tensor_vector
backwards_exp
backwards_maxpool_n2_c1_hw5_3x3_str2_max
backwards_maxpool_n4_c1_hw4_2x2_max
backwards_relu
backwards_replace_slice
backwards_reverse_sequence_n3_c2_h3
backwards_reverse_sequence_n4d2c3h2w2
...
...
@@ -21,41 +21,37 @@ batch_norm_training_0eps_f64
batch_norm_one_output
batch_norm_three_outputs
dequantize
dequantize_zero_offset
dequantize_axes
dequantize_int8
dequantize_int8_zero_offset
dequantize_int32
dequantize_int32_zero_offset
dequantize_int8
dequantize_int8_zero_offset
dequantize_zero_offset
divide_by_zero_int32
dot_3d_multi_axis
dot_4d_5d_multi_axis
dot_4d_5d_multi_axis_more
generate_mask
function_call
generate_mask
max_pool_3d
maxpool_bprop_larger_than_cache
numeric_double_inf
numeric_double_nan
quantize
quantize_zero_offset
quantize_axes
quantize_int8
quantize_int8_zero_offset
quantize_clamp_int32
quantize_clamp_int8
quantize_clamp_uint8
quantize_int32
quantize_int32_zero_offset
quantize_clamp_uint8
quantize_clamp_int8
quantize_clamp_int32
quantize_ROUND_NEAREST_TOWARD_ZERO
quantize_ROUND_NEAREST_TOWARD_INFINITY
quantize_ROUND_NEAREST_UPWARD
quantize_int8
quantize_int8_zero_offset
quantize_ROUND_DOWN
quantize_ROUND_NEAREST_DOWNWARD
quantize_ROUND_NEAREST_TOWARD_EVEN
quantize_ROUND_NEAREST_TOWARD_INFINITY
quantize_ROUND_NEAREST_TOWARD_ZERO
quantize_ROUND_NEAREST_UPWARD
quantize_ROUND_TOWARD_INFINITY
quantize_ROUND_TOWARD_ZERO
quantize_ROUND_UP
quantize_
ROUND_DOWN
quantize_
zero_offset
reduce_window_emulating_max_pool_1d_1channel_1image
reduce_window_emulating_max_pool_1d_1channel_2image
reduce_window_emulating_max_pool_1d_2channel_2image
...
...
@@ -73,9 +69,14 @@ reverse_sequence_n2c3h4w2
reverse_sequence_n4c3h2w2
reverse_sequence_n4d2c3h2w2
select_and_scatter_3d_without_overlap
select_and_scatter_with_overlap
select_and_scatter_without_overlap
select_and_scatter_with_overlap
shape_of_5d
shape_of_matrix
shape_of_scalar
shape_of_vector
softmax_axis_3d_double
sum_stable_acc
topk_1d_max_all
topk_1d_max_one
topk_1d_max_partial
...
...
@@ -88,17 +89,17 @@ topk_2d_max_partial
topk_2d_min_all
topk_2d_min_one
topk_2d_min_partial
topk_3d_large_input_max
topk_3d_large_input_min
topk_3d_max_all
topk_3d_max_one
topk_3d_max_partial
topk_3d_min_all
topk_3d_min_one
topk_3d_min_partial
topk_3d_single_output
topk_5d_max_partial
topk_int64
topk_3d_large_input_max
topk_3d_large_input_min
topk_3d_single_output
zero_sized_abs
zero_sized_acos
zero_sized_add
...
...
@@ -145,3 +146,4 @@ max_3d_to_scalar_double
argmin_trivial_in_i32
argmax_4D_axis_3_i64_in_i32
argmin_trivial_in_double
maxpool_bprop_larger_than_cache
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment