Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
ab9fad24
Commit
ab9fad24
authored
Apr 18, 2019
by
Sergey Shalnov
Committed by
Robert Kimball
Apr 18, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
IntelGPU backend: Custom kernels refactoring 2 (#2770)
parent
6f0c8190
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
203 additions
and
412 deletions
+203
-412
intelgpu_backend.cpp
src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+8
-88
intelgpu_kernels.hpp
src/ngraph/runtime/intelgpu/intelgpu_kernels.hpp
+16
-0
intelgpu_op_broadcast.cpp
src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.cpp
+121
-125
intelgpu_op_broadcast.hpp
src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp
+0
-63
intelgpu_op_custom_func_call.cpp
src/ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.cpp
+33
-23
intelgpu_op_custom_func_call.hpp
src/ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.hpp
+0
-41
intelgpu_op_softmax.cpp
src/ngraph/runtime/intelgpu/intelgpu_op_softmax.cpp
+25
-32
intelgpu_op_softmax.hpp
src/ngraph/runtime/intelgpu/intelgpu_op_softmax.hpp
+0
-40
No files found.
src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
View file @
ab9fad24
...
...
@@ -53,10 +53,7 @@
#include "ngraph/runtime/intelgpu/intelgpu_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_softmax.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_tensor_view.hpp"
#include "ngraph/runtime/intelgpu/visualize_tree.hpp"
...
...
@@ -671,14 +668,7 @@ shared_ptr<runtime::Executable>
if
((
shape_dim_count
>
3
)
||
((
shape_dim_count
==
3
)
&&
(
axes_size
==
2
))
||
(
op
->
get_input_element_type
(
0
)
!=
element
::
f32
))
{
do_softmax_operation
(
topology
,
op
->
get_input_tensor_name
(
0
),
op
->
get_input_shape
(
0
),
op
->
get_input_element_type
(
0
),
op
->
get_output_tensor_name
(
0
),
op
->
get_output_shape
(
0
),
op
->
get_output_element_type
(
0
),
axes
);
kern
.
emit
<
op
::
Softmax
>
(
softmax_op
);
}
else
{
...
...
@@ -979,15 +969,7 @@ shared_ptr<runtime::Executable>
}
else
{
do_bcast_sum_operation
(
topology
,
op
->
get_input_tensor_name
(
0
),
op
->
get_input_shape
(
0
),
op
->
get_input_element_type
(
0
),
op
->
get_output_tensor_name
(
0
),
op
->
get_output_shape
(
0
),
op
->
get_output_element_type
(
0
),
axis
,
true
);
kern
.
emit
<
op
::
Broadcast
>
(
broadcast
);
}
break
;
}
...
...
@@ -1005,15 +987,7 @@ shared_ptr<runtime::Executable>
}
else
{
do_bcast_sum_operation
(
topology
,
op
->
get_input_tensor_name
(
0
),
op
->
get_input_shape
(
0
),
op
->
get_input_element_type
(
0
),
op
->
get_output_tensor_name
(
0
),
op
->
get_output_shape
(
0
),
op
->
get_output_element_type
(
0
),
axis
,
false
);
kern
.
emit
<
op
::
Sum
>
(
sum
);
}
break
;
}
...
...
@@ -1031,13 +1005,7 @@ shared_ptr<runtime::Executable>
}
else
{
do_product_operation
(
topology
,
op
->
get_input_tensor_name
(
0
),
op
->
get_input_shape
(
0
),
op
->
get_output_tensor_name
(
0
),
op
->
get_output_shape
(
0
),
op
->
get_output_element_type
(
0
),
axis
);
kern
.
emit
<
op
::
Product
>
(
prod
);
}
break
;
}
...
...
@@ -1098,44 +1066,16 @@ shared_ptr<runtime::Executable>
{
arguments_check
(
op
,
1
,
1
);
const
shared_ptr
<
op
::
All
>
all_op
=
static_pointer_cast
<
op
::
All
>
(
op
);
const
AxisSet
&
axis
=
all_op
->
get_reduction_axes
();
const
shared_ptr
<
Node
>
def_val
=
all_op
->
get_default_value
();
const
shared_ptr
<
op
::
Constant
>
def_const
=
static_pointer_cast
<
op
::
Constant
>
(
def_val
);
const
vector
<
std
::
string
>&
values
=
def_const
->
get_value_strings
();
// Empty axis is not a case for do_equal_propagation()
do_all_any_op
(
topology
,
op
->
get_input_tensor_name
(
0
),
op
->
get_input_shape
(
0
),
op
->
get_output_tensor_name
(
0
),
op
->
get_output_shape
(
0
),
op
->
get_output_element_type
(
0
),
axis
,
"lhs && rhs"
,
values
.
at
(
0
));
kern
.
emit
<
op
::
All
>
(
static_pointer_cast
<
op
::
All
>
(
op
));
break
;
}
case
OP_TYPEID
:
:
Any
:
{
arguments_check
(
op
,
1
,
1
);
const
shared_ptr
<
op
::
Any
>
any_op
=
static_pointer_cast
<
op
::
Any
>
(
op
);
const
AxisSet
&
axis
=
any_op
->
get_reduction_axes
();
const
shared_ptr
<
Node
>
def_val
=
any_op
->
get_default_value
();
const
shared_ptr
<
op
::
Constant
>
def_const
=
static_pointer_cast
<
op
::
Constant
>
(
def_val
);
const
vector
<
std
::
string
>&
values
=
def_const
->
get_value_strings
();
// Empty axis is not a case for do_equal_propagation()
do_all_any_op
(
topology
,
op
->
get_input_tensor_name
(
0
),
op
->
get_input_shape
(
0
),
op
->
get_output_tensor_name
(
0
),
op
->
get_output_shape
(
0
),
op
->
get_output_element_type
(
0
),
axis
,
"lhs || rhs"
,
values
.
at
(
0
));
kern
.
emit
<
op
::
Any
>
(
static_pointer_cast
<
op
::
Any
>
(
op
));
break
;
}
case
OP_TYPEID
:
:
ReluBackprop
:
...
...
@@ -1788,34 +1728,14 @@ shared_ptr<runtime::Executable>
{
arguments_check
(
op
,
1
,
1
);
const
shared_ptr
<
op
::
Min
>
min_op
=
static_pointer_cast
<
op
::
Min
>
(
op
);
const
AxisSet
&
axis
=
min_op
->
get_reduction_axes
();
do_max_min_operation
(
topology
,
op
->
get_input_tensor_name
(
0
),
op
->
get_input_shape
(
0
),
op
->
get_output_tensor_name
(
0
),
op
->
get_output_shape
(
0
),
op
->
get_output_element_type
(
0
),
axis
,
true
);
kern
.
emit
<
op
::
Min
>
(
static_pointer_cast
<
op
::
Min
>
(
op
));
break
;
}
case
OP_TYPEID
:
:
Max
:
{
arguments_check
(
op
,
1
,
1
);
const
shared_ptr
<
op
::
Max
>
max_op
=
static_pointer_cast
<
op
::
Max
>
(
op
);
const
AxisSet
&
axis
=
max_op
->
get_reduction_axes
();
do_max_min_operation
(
topology
,
op
->
get_input_tensor_name
(
0
),
op
->
get_input_shape
(
0
),
op
->
get_output_tensor_name
(
0
),
op
->
get_output_shape
(
0
),
op
->
get_output_element_type
(
0
),
axis
,
false
);
kern
.
emit
<
op
::
Max
>
(
static_pointer_cast
<
op
::
Max
>
(
op
));
break
;
}
case
OP_TYPEID
:
:
OneHot
:
...
...
src/ngraph/runtime/intelgpu/intelgpu_kernels.hpp
View file @
ab9fad24
...
...
@@ -23,9 +23,17 @@
#include <CPP/topology.hpp>
#include "ngraph/node.hpp"
#include "ngraph/op/all.hpp"
#include "ngraph/op/any.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/max.hpp"
#include "ngraph/op/min.hpp"
#include "ngraph/op/product.hpp"
#include "ngraph/op/select.hpp"
#include "ngraph/op/slice.hpp"
#include "ngraph/op/softmax.hpp"
#include "ngraph/op/sum.hpp"
namespace
ngraph
{
...
...
@@ -98,11 +106,19 @@ public:
private
:
void
queue_krnl
(
const
krnl_info
&
krn_info
,
const
std
::
shared_ptr
<
Node
>&
op
);
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
All
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Any
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Broadcast
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Convolution
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
ConvolutionBackpropData
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
ConvolutionBackpropFilters
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Max
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Min
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Product
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Select
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Slice
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Softmax
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Sum
>&
op
)
const
;
cldnn
::
topology
&
stream
;
size_t
m_count_krnls
;
...
...
src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.cpp
View file @
ab9fad24
...
...
@@ -14,32 +14,27 @@
// limitations under the License.
//*****************************************************************************
#include <CPP/concatenation.hpp>
#include <CPP/custom_gpu_primitive.hpp>
#include <CPP/reshape.hpp>
#include "ngraph/code_writer.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/util.hpp"
using
namespace
std
;
using
namespace
ngraph
;
static
void
do_sum_to_scalar_operation
(
cldnn
::
topology
&
topology
,
const
string
&
input_name
,
const
Shape
&
input_shape
,
const
element
::
Type
&
input_type
,
const
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
AxisSet
&
axis
)
using
namespace
ngraph
::
runtime
::
intelgpu
;
static
CustomKernels
::
krnl_info
do_sum_to_scalar_operation
(
const
string
&
input_name
,
const
Shape
&
input_shape
,
const
element
::
Type
&
input_type
,
const
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
AxisSet
&
axis
)
{
const
string
function_name
=
"sum_to_scalar_"
+
output_name
;
const
string
input_type_str
=
runtime
::
intelgpu
::
get_opencl_type_name
(
input_type
);
const
string
output_type_str
=
runtime
::
intelgpu
::
get_opencl_type_name
(
output_type
);
const
string
input_type_str
=
get_opencl_type_name
(
input_type
);
const
string
output_type_str
=
get_opencl_type_name
(
output_type
);
const
size_t
main_loop_count
=
shape_size
(
input_shape
);
const
size_t
vect_channels
=
32
;
CodeWriter
writer
;
...
...
@@ -53,10 +48,8 @@ static void do_sum_to_scalar_operation(cldnn::topology& topology,
writer
.
block_begin
();
{
// Main function body
writer
<<
"// input array dims: input0"
<<
runtime
::
intelgpu
::
array_dims
(
input_shape
)
<<
"
\n
"
<<
"// output array dims: output"
<<
runtime
::
intelgpu
::
array_dims
(
output_shape
)
<<
"
\n
"
writer
<<
"// input array dims: input0"
<<
array_dims
(
input_shape
)
<<
"
\n
"
<<
"// output array dims: output"
<<
array_dims
(
output_shape
)
<<
"
\n
"
<<
output_type_str
<<
" result = 0.0f;
\n
"
<<
"const uint id = get_sub_group_local_id();
\n
"
<<
"uint element_id = id;
\n
"
...
...
@@ -66,82 +59,80 @@ static void do_sum_to_scalar_operation(cldnn::topology& topology,
{
writer
<<
"result += input0[element_id];
\n
"
<<
"element_id += "
<<
vect_channels
<<
";
\n
"
;
writer
.
block_end
();
}
writer
.
block_end
();
writer
<<
"if (element_id < "
<<
main_loop_count
<<
")
\n
"
;
writer
.
block_begin
();
{
writer
<<
"result += input0[element_id];
\n
"
;
}
writer
.
block_end
();
writer
<<
"if (element_id < "
<<
main_loop_count
<<
")
\n
"
;
writer
.
block_begin
();
{
writer
<<
"result += input0[element_id];
\n
"
;
}
writer
.
block_end
();
writer
<<
output_type_str
<<
" sub_group_result = sub_group_reduce_add(result);
\n
"
;
writer
<<
output_type_str
<<
" sub_group_result = sub_group_reduce_add(result);
\n
"
;
writer
<<
"if (id == 0)
\n
"
;
writer
.
block_begin
();
{
writer
<<
"*output = sub_group_result;
\n
"
;
}
writer
.
block_end
();
}
// End of function bracket
writer
<<
"if (id == 0)
\n
"
;
writer
.
block_begin
();
{
writer
<<
"*output = sub_group_result;
\n
"
;
}
writer
.
block_end
();
}
// End of function bracket
writer
.
block_end
();
const
cldnn
::
layout
layout
=
runtime
::
intelgpu
::
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
output_shape
);
const
cldnn
::
custom_gpu_primitive
op_bcast_sum
(
output_name
,
{
input_name
},
{
writer
.
get_code
()},
function_name
,
runtime
::
intelgpu
::
get_kernel_args
(
1
,
1
),
""
,
layout
,
gws
,
lws
);
topology
.
add
(
op_bcast_sum
);
}
const
CustomKernelInfo
op_bcast_sum
(
output_name
,
output_shape
,
output_type
,
{
input_name
},
{
writer
.
get_code
()},
function_name
,
gws
,
lws
);
return
{
op_bcast_sum
};
}
void
runtime
::
intelgpu
::
do_bcast_sum_operation
(
cldnn
::
topology
&
topology
,
const
string
&
input_name
,
const
Shape
&
input_shape
,
const
element
::
Type
&
input_type
,
const
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
AxisSet
&
axis
,
bool
is_bcast
)
// This implements Broadcast and Sum nGraph operations.
// input_shape (bcast) or output_shape (sum) can be empty.
// If the shape is empty it means scalar
static
CustomKernels
::
krnl_info
do_bcast_sum_operation
(
const
shared_ptr
<
Node
>&
op
,
const
AxisSet
&
axis
,
bool
is_bcast
)
{
const
string
&
input_name
=
op
->
get_input_tensor_name
(
0
);
const
Shape
&
input_shape
=
op
->
get_input_shape
(
0
);
const
element
::
Type
&
input_type
=
op
->
get_input_element_type
(
0
);
const
string
&
output_name
=
op
->
get_output_tensor_name
(
0
);
const
Shape
&
output_shape
=
op
->
get_output_shape
(
0
);
const
element
::
Type
&
output_type
=
op
->
get_output_element_type
(
0
);
string
function_name
=
is_bcast
?
"broadcast_"
:
"sum_"
;
function_name
+=
output_name
;
CodeWriter
writer
;
vector
<
size_t
>
gws
;
runtime
::
intelgpu
::
gen_func_def
(
writer
,
function_name
,
{
get_opencl_type_name
(
input_type
)},
{
input_shape
},
get_opencl_type_name
(
output_type
),
output_shape
);
gen_func_def
(
writer
,
function_name
,
{
get_opencl_type_name
(
input_type
)},
{
input_shape
},
get_opencl_type_name
(
output_type
),
output_shape
);
writer
.
block_begin
();
{
if
(
is_bcast
)
{
// Broadcast loops
gws
=
runtime
::
intelgpu
::
generate_loops
(
writer
,
output_shape
,
true
);
gws
=
generate_loops
(
writer
,
output_shape
,
true
);
writer
<<
"output"
<<
access_dims
(
output_shape
)
<<
" = input0"
<<
access_dims
(
output_shape
,
"i"
,
axis
)
<<
";
\n
"
;
// Closing brackets for Broadcast loop
runtime
::
intelgpu
::
generate_loops
(
writer
,
output_shape
,
false
);
generate_loops
(
writer
,
output_shape
,
false
);
}
else
{
// corner case with scalar
if
(
output_shape
.
empty
()
||
(
!
output_shape
.
empty
()
&&
(
shape_size
(
output_shape
)
==
1
)))
{
return
do_sum_to_scalar_operation
(
topology
,
input_name
,
return
do_sum_to_scalar_operation
(
input_name
,
input_shape
,
input_type
,
output_name
,
...
...
@@ -171,39 +162,38 @@ void runtime::intelgpu::do_bcast_sum_operation(cldnn::topology& topology,
}
// End of function bracket
writer
.
block_end
();
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
output_shape
);
const
cldnn
::
custom_gpu_primitive
op_bcast_sum
(
output_name
,
{
input_name
},
{
writer
.
get_code
()},
function_name
,
get_kernel_args
(
1
,
1
),
""
,
layout
,
gws
);
topology
.
add
(
op_bcast_sum
);
const
CustomKernelInfo
op_bcast_sum
(
output_name
,
output_shape
,
output_type
,
{
input_name
},
{
writer
.
get_code
()},
function_name
,
gws
);
return
{
op_bcast_sum
};
}
void
runtime
::
intelgpu
::
do_max_min_operation
(
cldnn
::
topology
&
topology
,
const
string
&
input_name
,
const
Shape
&
input_shape
,
const
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
AxisSet
&
axis
,
bool
is_min
)
// This implements Min and Max operations depends on is_min parameter
static
CustomKernels
::
krnl_info
do_max_min_operation
(
const
shared_ptr
<
op
::
util
::
ArithmeticReduction
>&
op
,
bool
is_min
)
{
const
string
&
input_name
=
op
->
get_input_tensor_name
(
0
);
const
Shape
&
input_shape
=
op
->
get_input_shape
(
0
);
const
string
&
output_name
=
op
->
get_output_tensor_name
(
0
);
const
Shape
&
output_shape
=
op
->
get_output_shape
(
0
);
const
element
::
Type
&
output_type
=
op
->
get_output_element_type
(
0
);
const
AxisSet
&
axis
=
op
->
get_reduction_axes
();
const
string
function_name
=
"min_max_"
+
output_name
;
const
size_t
input_size
=
shape_size
<
Shape
>
(
input_shape
);
const
string
&
init_value
=
get_opencl_type_min_max_value
(
output_type
,
!
is_min
);
const
string
&
operation
=
is_min
?
" < "
:
" > "
;
CodeWriter
writer
;
runtime
::
intelgpu
::
gen_func_def
(
writer
,
function_name
,
{
get_opencl_type_name
(
output_type
)},
{
input_shape
},
get_opencl_type_name
(
output_type
),
output_shape
);
gen_func_def
(
writer
,
function_name
,
{
get_opencl_type_name
(
output_type
)},
{
input_shape
},
get_opencl_type_name
(
output_type
),
output_shape
);
writer
.
block_begin
();
{
...
...
@@ -255,36 +245,29 @@ void runtime::intelgpu::do_max_min_operation(cldnn::topology& topology,
}
// End of function bracket
writer
.
block_end
();
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
output_shape
);
const
cldnn
::
custom_gpu_primitive
op_min_max
(
output_name
,
{
input_name
},
{
writer
.
get_code
()},
function_name
,
get_kernel_args
(
1
,
1
),
""
,
layout
,
{
1
});
topology
.
add
(
op_min_max
);
const
CustomKernelInfo
op_min_max
(
output_name
,
output_shape
,
output_type
,
{
input_name
},
{
writer
.
get_code
()},
function_name
);
return
{
op_min_max
};
}
void
runtime
::
intelgpu
::
do_product_operation
(
cldnn
::
topology
&
topology
,
const
string
&
input_name
,
const
Shape
&
input_shape
,
const
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
AxisSet
&
axis
)
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
Product
>&
op
)
const
{
const
string
&
input_name
=
op
->
get_input_tensor_name
(
0
);
const
Shape
&
input_shape
=
op
->
get_input_shape
(
0
);
const
string
&
output_name
=
op
->
get_output_tensor_name
(
0
);
const
Shape
&
output_shape
=
op
->
get_output_shape
(
0
);
const
element
::
Type
&
output_type
=
op
->
get_output_element_type
(
0
);
const
AxisSet
&
axis
=
op
->
get_reduction_axes
();
const
string
function_name
=
"product_"
+
output_name
;
const
size_t
input_size
=
shape_size
<
Shape
>
(
input_shape
);
CodeWriter
writer
;
runtime
::
intelgpu
::
gen_func_def
(
writer
,
function_name
,
{
get_opencl_type_name
(
output_type
)},
{
input_shape
},
get_opencl_type_name
(
output_type
),
output_shape
);
gen_func_def
(
writer
,
function_name
,
{
get_opencl_type_name
(
output_type
)},
{
input_shape
},
get_opencl_type_name
(
output_type
),
output_shape
);
writer
.
block_begin
();
{
...
...
@@ -330,14 +313,27 @@ void runtime::intelgpu::do_product_operation(cldnn::topology& topology,
}
// End of function bracket
writer
.
block_end
();
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
output_shape
);
const
cldnn
::
custom_gpu_primitive
op_product
(
output_name
,
{
input_name
},
{
writer
.
get_code
()},
function_name
,
get_kernel_args
(
1
,
1
),
""
,
layout
,
{
1
});
topology
.
add
(
op_product
);
const
CustomKernelInfo
op_product
(
output_name
,
output_shape
,
output_type
,
{
input_name
},
{
writer
.
get_code
()},
function_name
);
return
{
op_product
};
}
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
Broadcast
>&
op
)
const
{
return
do_bcast_sum_operation
(
op
,
op
->
get_broadcast_axes
(),
true
);
}
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
Sum
>&
op
)
const
{
return
do_bcast_sum_operation
(
op
,
op
->
get_reduction_axes
(),
false
);
}
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
Max
>&
op
)
const
{
return
do_max_min_operation
(
op
,
false
);
}
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
Min
>&
op
)
const
{
return
do_max_min_operation
(
op
,
true
);
}
src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp
deleted
100644 → 0
View file @
6f0c8190
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <CPP/topology.hpp>
#include "ngraph/axis_set.hpp"
#include "ngraph/shape.hpp"
namespace
ngraph
{
namespace
runtime
{
namespace
intelgpu
{
// This implements Broadcast and Sum nGraph operations.
// input_shape (bcast) or output_shape (sum) can be empty.
// If the shape is empty it means scalar
void
do_bcast_sum_operation
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
input_name
,
const
Shape
&
input_shape
,
const
element
::
Type
&
input_type
,
const
std
::
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
AxisSet
&
axis
,
bool
is_bcast
);
// This implements Min and Max operations depends on is_min parameter
void
do_max_min_operation
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
input_name
,
const
Shape
&
input_shape
,
const
std
::
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
AxisSet
&
axis
,
bool
is_min
);
// This implements Product operation
void
do_product_operation
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
input_name
,
const
Shape
&
input_shape
,
const
std
::
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
AxisSet
&
axis
);
}
}
}
src/ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.cpp
View file @
ab9fad24
...
...
@@ -14,26 +14,29 @@
// limitations under the License.
//*****************************************************************************
#include <CPP/custom_gpu_primitive.hpp>
#include "ngraph/code_writer.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/op/constant.hpp"
using
namespace
std
;
using
namespace
ngraph
;
using
namespace
ngraph
::
runtime
::
intelgpu
;
void
runtime
::
intelgpu
::
do_all_any_op
(
cldnn
::
topology
&
topology
,
const
string
&
input0_name
,
const
Shape
&
input0_shape
,
const
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
AxisSet
&
axis
,
const
std
::
string
&
operation
,
const
std
::
string
&
init_val
)
static
CustomKernels
::
krnl_info
do_all_any_op
(
const
shared_ptr
<
op
::
util
::
LogicalReduction
>&
op
,
const
string
&
operation
)
{
const
string
&
input0_name
=
op
->
get_input_tensor_name
(
0
);
const
Shape
&
input0_shape
=
op
->
get_input_shape
(
0
);
const
string
&
output_name
=
op
->
get_output_tensor_name
(
0
);
const
Shape
&
output_shape
=
op
->
get_output_shape
(
0
);
const
element
::
Type
&
output_type
=
op
->
get_output_element_type
(
0
);
const
AxisSet
&
axis
=
op
->
get_reduction_axes
();
const
shared_ptr
<
Node
>
def_val
=
op
->
get_default_value
();
const
shared_ptr
<
op
::
Constant
>
def_const
=
static_pointer_cast
<
op
::
Constant
>
(
def_val
);
const
vector
<
string
>&
values
=
def_const
->
get_value_strings
();
const
string
&
init_val
=
values
.
at
(
0
);
const
string
entry_point_name
=
"custom_op_all_any_"
+
output_name
;
const
string
kernel_type_name
=
get_opencl_type_name
(
output_type
);
const
size_t
input_size
=
shape_size
<
Shape
>
(
input0_shape
);
...
...
@@ -94,14 +97,21 @@ void runtime::intelgpu::do_all_any_op(cldnn::topology& topology,
}
// End of function bracket
writer
.
block_end
();
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
output_shape
);
const
cldnn
::
custom_gpu_primitive
op_all_any
(
output_name
,
{
input0_name
},
{
writer
.
get_code
()},
entry_point_name
,
get_kernel_args
(
1
,
1
),
""
,
layout
,
{
1
});
topology
.
add
(
op_all_any
);
const
CustomKernelInfo
krn_ret
(
output_name
,
output_shape
,
output_type
,
{
input0_name
},
{
writer
.
get_code
()},
entry_point_name
);
return
{
krn_ret
};
}
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
All
>&
op
)
const
{
return
do_all_any_op
(
op
,
"lhs && rhs"
);
}
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
Any
>&
op
)
const
{
return
do_all_any_op
(
op
,
"lhs || rhs"
);
}
src/ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.hpp
deleted
100644 → 0
View file @
6f0c8190
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <CPP/topology.hpp>
#include "ngraph/axis_set.hpp"
#include "ngraph/shape.hpp"
namespace
ngraph
{
namespace
runtime
{
namespace
intelgpu
{
void
do_all_any_op
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
input0_name
,
const
Shape
&
input0_shape
,
const
std
::
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
AxisSet
&
axis
,
const
std
::
string
&
operation
,
const
std
::
string
&
init_val
);
}
}
}
src/ngraph/runtime/intelgpu/intelgpu_op_softmax.cpp
View file @
ab9fad24
...
...
@@ -14,15 +14,13 @@
// limitations under the License.
//*****************************************************************************
#include <CPP/custom_gpu_primitive.hpp>
#include "ngraph/code_writer.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_
layout
.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_
kernels
.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_softmax.hpp"
using
namespace
std
;
using
namespace
ngraph
;
using
namespace
ngraph
::
runtime
::
intelgpu
;
static
Shape
shape_dims
(
const
Shape
&
dimentions
,
const
AxisSet
&
axis
=
{})
{
...
...
@@ -45,22 +43,20 @@ static Shape shape_dims(const Shape& dimentions, const AxisSet& axis = {})
return
output_shape
;
}
void
runtime
::
intelgpu
::
do_softmax_operation
(
cldnn
::
topology
&
topology
,
const
string
&
input_name
,
const
Shape
&
input_shape
,
const
element
::
Type
&
input_type
,
const
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
AxisSet
&
axes
)
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
Softmax
>&
op
)
const
{
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
output_shape
);
const
string
&
input_name
=
op
->
get_input_tensor_name
(
0
);
const
Shape
&
input_shape
=
op
->
get_input_shape
(
0
);
const
element
::
Type
&
input_type
=
op
->
get_input_element_type
(
0
);
const
string
&
output_name
=
op
->
get_output_tensor_name
(
0
);
const
Shape
&
output_shape
=
op
->
get_output_shape
(
0
);
const
element
::
Type
&
output_type
=
op
->
get_output_element_type
(
0
);
const
AxisSet
&
axes
=
op
->
get_axes
();
const
string
entry_point_name
=
"softmax_"
+
output_name
;
const
string
middle_name
=
entry_point_name
+
"_middle"
;
const
string
entry_point_middle_name
=
"softmax_middle_"
+
output_name
;
const
string
expression
=
"output"
+
access_dims
(
input_shape
,
"i"
,
axes
)
+
" = 0.0f;
\n
"
;
const
Shape
new_shape
=
shape_dims
(
output_shape
,
axes
);
const
cldnn
::
layout
layout_middle
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
new_shape
);
CodeWriter
writer0
;
CodeWriter
writer1
;
vector
<
size_t
>
gws
;
...
...
@@ -81,15 +77,13 @@ void runtime::intelgpu::do_softmax_operation(cldnn::topology& topology,
}
writer0
.
block_end
();
const
cldnn
::
custom_gpu_primitive
op_softmax_middle
(
middle_name
,
{
input_name
},
{
writer0
.
get_code
()},
entry_point_middle_name
,
get_kernel_args
(
1
,
1
),
""
,
layout_middle
,
gws
);
topology
.
add
(
op_softmax_middle
);
const
CustomKernelInfo
op_softmax_middle
(
middle_name
,
new_shape
,
output_type
,
{
input_name
},
{
writer0
.
get_code
()},
entry_point_middle_name
,
gws
);
writer1
<<
"__kernel void "
<<
entry_point_name
<<
"(const __global "
<<
get_opencl_type_name
(
input_type
)
<<
" input0"
<<
array_dims
(
input_shape
)
...
...
@@ -107,13 +101,12 @@ void runtime::intelgpu::do_softmax_operation(cldnn::topology& topology,
}
writer1
.
block_end
();
const
cldnn
::
custom_gpu_primitive
op_softmax
(
output_name
,
{
input_name
,
middle_name
},
{
writer1
.
get_code
()},
entry_point_name
,
get_kernel_args
(
2
,
1
),
""
,
layout
,
gws
);
topology
.
add
(
op_softmax
);
const
CustomKernelInfo
op_softmax
(
output_name
,
output_shape
,
output_type
,
{
input_name
,
middle_name
},
{
writer1
.
get_code
()},
entry_point_name
,
gws
);
return
{
op_softmax_middle
,
op_softmax
};
}
src/ngraph/runtime/intelgpu/intelgpu_op_softmax.hpp
deleted
100644 → 0
View file @
6f0c8190
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <CPP/topology.hpp>
#include "ngraph/shape.hpp"
#include "ngraph/type/element_type.hpp"
namespace
ngraph
{
namespace
runtime
{
namespace
intelgpu
{
void
do_softmax_operation
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
input_name
,
const
Shape
&
input_shape
,
const
element
::
Type
&
input_type
,
const
std
::
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
AxisSet
&
axes
);
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment