Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
67248fdb
Commit
67248fdb
authored
Apr 19, 2019
by
Sergey Shalnov
Committed by
Robert Kimball
Apr 19, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
IntelGPU backend: Custom kernels refactoring 3 (#2787)
parent
2b13ae40
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
334 additions
and
375 deletions
+334
-375
intelgpu_backend.cpp
src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+29
-139
intelgpu_kernels.cpp
src/ngraph/runtime/intelgpu/intelgpu_kernels.cpp
+12
-0
intelgpu_kernels.hpp
src/ngraph/runtime/intelgpu/intelgpu_kernels.hpp
+22
-0
intelgpu_op_batchnorm.cpp
src/ngraph/runtime/intelgpu/intelgpu_op_batchnorm.cpp
+214
-117
intelgpu_op_batchnorm.hpp
src/ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp
+0
-88
intelgpu_op_custom_kernels.cpp
src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
+57
-20
intelgpu_op_custom_kernels.hpp
src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
+0
-11
No files found.
src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
View file @
67248fdb
...
@@ -52,7 +52,6 @@
...
@@ -52,7 +52,6 @@
#include "ngraph/runtime/intelgpu/intelgpu_executable.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_executable.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_tensor_view.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_tensor_view.hpp"
#include "ngraph/runtime/intelgpu/visualize_tree.hpp"
#include "ngraph/runtime/intelgpu/visualize_tree.hpp"
...
@@ -61,6 +60,7 @@
...
@@ -61,6 +60,7 @@
#include "ngraph/function.hpp"
#include "ngraph/function.hpp"
#include "ngraph/node.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/all.hpp"
#include "ngraph/op/all.hpp"
#include "ngraph/op/and.hpp"
#include "ngraph/op/any.hpp"
#include "ngraph/op/any.hpp"
#include "ngraph/op/argmax.hpp"
#include "ngraph/op/argmax.hpp"
#include "ngraph/op/argmin.hpp"
#include "ngraph/op/argmin.hpp"
...
@@ -73,13 +73,20 @@
...
@@ -73,13 +73,20 @@
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/dot.hpp"
#include "ngraph/op/dot.hpp"
#include "ngraph/op/embedding_lookup.hpp"
#include "ngraph/op/embedding_lookup.hpp"
#include "ngraph/op/equal.hpp"
#include "ngraph/op/erf.hpp"
#include "ngraph/op/erf.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/greater.hpp"
#include "ngraph/op/greater_eq.hpp"
#include "ngraph/op/less.hpp"
#include "ngraph/op/less_eq.hpp"
#include "ngraph/op/lrn.hpp"
#include "ngraph/op/lrn.hpp"
#include "ngraph/op/max.hpp"
#include "ngraph/op/max.hpp"
#include "ngraph/op/max_pool.hpp"
#include "ngraph/op/max_pool.hpp"
#include "ngraph/op/min.hpp"
#include "ngraph/op/min.hpp"
#include "ngraph/op/not_equal.hpp"
#include "ngraph/op/one_hot.hpp"
#include "ngraph/op/one_hot.hpp"
#include "ngraph/op/or.hpp"
#include "ngraph/op/pad.hpp"
#include "ngraph/op/pad.hpp"
#include "ngraph/op/product.hpp"
#include "ngraph/op/product.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/quantize.hpp"
...
@@ -129,25 +136,13 @@ static OP_TYPEID get_typeid(const string& s)
...
@@ -129,25 +136,13 @@ static OP_TYPEID get_typeid(const string& s)
return
it
->
second
;
return
it
->
second
;
}
}
static
void
arguments_check
(
const
shared_ptr
<
Node
>&
op
,
size_t
input
,
size_t
output
)
{
if
(
op
->
get_input_size
()
!=
input
||
op
->
get_output_size
()
!=
output
)
{
ostringstream
os
;
os
<<
"Operation
\"
"
<<
op
->
description
()
<<
"
\"
input and output sizes mismatch."
<<
" Expected input size="
<<
input
<<
", provided="
<<
op
->
get_input_size
()
<<
". Expected output size="
<<
output
<<
", provided="
<<
op
->
get_output_size
();
throw
invalid_argument
(
os
.
str
());
}
}
static
void
do_eltwise_operation
(
cldnn
::
topology
&
topology
,
static
void
do_eltwise_operation
(
cldnn
::
topology
&
topology
,
const
shared_ptr
<
Node
>&
op
,
const
shared_ptr
<
Node
>&
op
,
const
string
&
custom_op
,
const
string
&
custom_op
,
bool
function_operation
,
bool
function_operation
,
cldnn
::
eltwise_mode
mode
)
cldnn
::
eltwise_mode
mode
)
{
{
arguments_check
(
op
,
2
,
1
);
runtime
::
intelgpu
::
arguments_check
(
op
,
2
,
1
);
if
(
op
->
get_input_element_type
(
0
)
!=
element
::
f32
||
if
(
op
->
get_input_element_type
(
0
)
!=
element
::
f32
||
op
->
get_input_element_type
(
1
)
!=
element
::
f32
||
op
->
get_input_element_type
(
1
)
!=
element
::
f32
||
...
@@ -180,7 +175,7 @@ static void do_cldnn_unary(cldnn::topology& topology,
...
@@ -180,7 +175,7 @@ static void do_cldnn_unary(cldnn::topology& topology,
cldnn_activation_func
mode
,
cldnn_activation_func
mode
,
const
cldnn_activation_additional_params
&
param
=
{
0.
f
,
0.
f
})
const
cldnn_activation_additional_params
&
param
=
{
0.
f
,
0.
f
})
{
{
arguments_check
(
op
,
1
,
1
);
runtime
::
intelgpu
::
arguments_check
(
op
,
1
,
1
);
const
cldnn
::
activation
cldnn_unary
(
const
cldnn
::
activation
cldnn_unary
(
op
->
get_output_tensor_name
(
0
),
op
->
get_input_tensor_name
(
0
),
mode
,
param
);
op
->
get_output_tensor_name
(
0
),
op
->
get_input_tensor_name
(
0
),
mode
,
param
);
...
@@ -190,7 +185,7 @@ static void do_cldnn_unary(cldnn::topology& topology,
...
@@ -190,7 +185,7 @@ static void do_cldnn_unary(cldnn::topology& topology,
static
void
static
void
do_custom_unary
(
cldnn
::
topology
&
topology
,
const
shared_ptr
<
Node
>&
op
,
const
string
&
operation
)
do_custom_unary
(
cldnn
::
topology
&
topology
,
const
shared_ptr
<
Node
>&
op
,
const
string
&
operation
)
{
{
arguments_check
(
op
,
1
,
1
);
runtime
::
intelgpu
::
arguments_check
(
op
,
1
,
1
);
runtime
::
intelgpu
::
do_custom_unary_operation
(
topology
,
runtime
::
intelgpu
::
do_custom_unary_operation
(
topology
,
op
->
get_input_tensor_name
(
0
),
op
->
get_input_tensor_name
(
0
),
...
@@ -209,7 +204,7 @@ static void do_universal_unary(cldnn::topology& topology,
...
@@ -209,7 +204,7 @@ static void do_universal_unary(cldnn::topology& topology,
bool
force_custom
=
false
,
bool
force_custom
=
false
,
const
cldnn_activation_additional_params
&
param
=
{
0.
f
,
0.
f
})
const
cldnn_activation_additional_params
&
param
=
{
0.
f
,
0.
f
})
{
{
arguments_check
(
op
,
1
,
1
);
runtime
::
intelgpu
::
arguments_check
(
op
,
1
,
1
);
if
(
force_custom
||
(
op
->
get_input_element_type
(
0
)
!=
element
::
f32
))
if
(
force_custom
||
(
op
->
get_input_element_type
(
0
)
!=
element
::
f32
))
{
{
...
@@ -228,7 +223,7 @@ static void do_pooling_operation(cldnn::topology& topology,
...
@@ -228,7 +223,7 @@ static void do_pooling_operation(cldnn::topology& topology,
const
Shape
&
pad_below
,
const
Shape
&
pad_below
,
const
cldnn
::
pooling_mode
mode
)
const
cldnn
::
pooling_mode
mode
)
{
{
arguments_check
(
op
,
1
,
1
);
runtime
::
intelgpu
::
arguments_check
(
op
,
1
,
1
);
const
cldnn
::
tensor
output_size
=
intelgpu_space
::
create_cldnn_tensor
(
op
->
get_output_shape
(
0
));
const
cldnn
::
tensor
output_size
=
intelgpu_space
::
create_cldnn_tensor
(
op
->
get_output_shape
(
0
));
const
cldnn
::
tensor
input_offset
=
intelgpu_space
::
create_cldnn_offset
(
pad_below
);
const
cldnn
::
tensor
input_offset
=
intelgpu_space
::
create_cldnn_offset
(
pad_below
);
...
@@ -245,22 +240,12 @@ static void do_pooling_operation(cldnn::topology& topology,
...
@@ -245,22 +240,12 @@ static void do_pooling_operation(cldnn::topology& topology,
topology
.
add
(
cldnn_pooling
);
topology
.
add
(
cldnn_pooling
);
}
}
static
void
do_logical_operation
(
cldnn
::
topology
&
topology
,
template
<
typename
OP
>
const
shared_ptr
<
Node
>&
op
,
static
void
do_logical_operation
(
runtime
::
intelgpu
::
CustomKernels
&
kern
,
const
shared_ptr
<
Node
>&
op
)
const
string
&
operation
)
{
{
arguments_check
(
op
,
2
,
1
);
runtime
::
intelgpu
::
arguments_check
(
op
,
2
,
1
);
runtime
::
intelgpu
::
do_logic_kernel
(
topology
,
kern
.
emit
<
OP
>
(
static_pointer_cast
<
OP
>
(
op
));
op
->
get_input_tensor_name
(
0
),
op
->
get_input_shape
(
0
),
op
->
get_input_element_type
(
0
),
op
->
get_input_tensor_name
(
1
),
op
->
get_input_shape
(
1
),
op
->
get_output_tensor_name
(
0
),
op
->
get_output_shape
(
0
),
op
->
get_output_element_type
(
0
),
operation
);
}
}
// This function needed to only change the name of the data in topology
// This function needed to only change the name of the data in topology
...
@@ -1246,42 +1231,42 @@ shared_ptr<runtime::Executable>
...
@@ -1246,42 +1231,42 @@ shared_ptr<runtime::Executable>
}
}
case
OP_TYPEID
:
:
Greater
:
case
OP_TYPEID
:
:
Greater
:
{
{
do_logical_operation
(
topology
,
op
,
" > "
);
do_logical_operation
<
op
::
Greater
>
(
kern
,
op
);
break
;
break
;
}
}
case
OP_TYPEID
:
:
GreaterEq
:
case
OP_TYPEID
:
:
GreaterEq
:
{
{
do_logical_operation
(
topology
,
op
,
" >= "
);
do_logical_operation
<
op
::
GreaterEq
>
(
kern
,
op
);
break
;
break
;
}
}
case
OP_TYPEID
:
:
Equal
:
case
OP_TYPEID
:
:
Equal
:
{
{
do_logical_operation
(
topology
,
op
,
" == "
);
do_logical_operation
<
op
::
Equal
>
(
kern
,
op
);
break
;
break
;
}
}
case
OP_TYPEID
:
:
NotEqual
:
case
OP_TYPEID
:
:
NotEqual
:
{
{
do_logical_operation
(
topology
,
op
,
" != "
);
do_logical_operation
<
op
::
NotEqual
>
(
kern
,
op
);
break
;
break
;
}
}
case
OP_TYPEID
:
:
Less
:
case
OP_TYPEID
:
:
Less
:
{
{
do_logical_operation
(
topology
,
op
,
" < "
);
do_logical_operation
<
op
::
Less
>
(
kern
,
op
);
break
;
break
;
}
}
case
OP_TYPEID
:
:
LessEq
:
case
OP_TYPEID
:
:
LessEq
:
{
{
do_logical_operation
(
topology
,
op
,
" <= "
);
do_logical_operation
<
op
::
LessEq
>
(
kern
,
op
);
break
;
break
;
}
}
case
OP_TYPEID
:
:
And
:
case
OP_TYPEID
:
:
And
:
{
{
do_logical_operation
(
topology
,
op
,
" && "
);
do_logical_operation
<
op
::
And
>
(
kern
,
op
);
break
;
break
;
}
}
case
OP_TYPEID
:
:
Or
:
case
OP_TYPEID
:
:
Or
:
{
{
do_logical_operation
(
topology
,
op
,
" || "
);
do_logical_operation
<
op
::
Or
>
(
kern
,
op
);
break
;
break
;
}
}
case
OP_TYPEID
:
:
Pad
:
case
OP_TYPEID
:
:
Pad
:
...
@@ -1305,40 +1290,8 @@ shared_ptr<runtime::Executable>
...
@@ -1305,40 +1290,8 @@ shared_ptr<runtime::Executable>
{
{
arguments_check
(
op
,
6
,
3
);
arguments_check
(
op
,
6
,
3
);
const
shared_ptr
<
op
::
BatchNormTrainingBackprop
>
batch_norm
=
kern
.
emit
<
op
::
BatchNormTrainingBackprop
>
(
static_pointer_cast
<
op
::
BatchNormTrainingBackprop
>
(
op
);
static_pointer_cast
<
op
::
BatchNormTrainingBackprop
>
(
op
));
const
double
eps
=
batch_norm
->
get_eps_value
();
do_create_mean
(
topology
,
op
->
get_output_tensor_name
(
2
),
// d_beta
op
->
get_output_element_type
(
2
),
op
->
get_input_tensor_name
(
5
),
// delta
op
->
get_input_shape
(
5
),
true
);
do_create_variance_back
(
topology
,
op
->
get_output_tensor_name
(
1
),
// d_gamma
op
->
get_output_element_type
(
1
),
eps
,
op
->
get_input_tensor_name
(
2
),
// input
op
->
get_input_shape
(
2
),
op
->
get_input_tensor_name
(
3
),
// gamma
op
->
get_input_tensor_name
(
4
),
// beta
op
->
get_input_tensor_name
(
5
));
// delta
do_batch_norm_backprop_operation
(
topology
,
op
->
get_input_shape
(
2
),
op
->
get_input_element_type
(
2
),
op
->
get_input_tensor_name
(
0
),
op
->
get_input_tensor_name
(
1
),
op
->
get_input_tensor_name
(
2
),
op
->
get_input_tensor_name
(
3
),
op
->
get_input_tensor_name
(
4
),
op
->
get_input_tensor_name
(
5
),
eps
,
op
->
get_output_tensor_name
(
0
),
op
->
get_output_tensor_name
(
1
),
op
->
get_output_tensor_name
(
2
));
break
;
break
;
}
}
case
OP_TYPEID
:
:
BatchNormInference
:
case
OP_TYPEID
:
:
BatchNormInference
:
...
@@ -1367,16 +1320,7 @@ shared_ptr<runtime::Executable>
...
@@ -1367,16 +1320,7 @@ shared_ptr<runtime::Executable>
if
(
proceed_with_custom_kernel
||
(
op
->
get_input_shape
(
2
).
size
()
!=
4
)
||
if
(
proceed_with_custom_kernel
||
(
op
->
get_input_shape
(
2
).
size
()
!=
4
)
||
(
op
->
get_input_element_type
(
0
)
!=
ngraph
::
element
::
f32
))
(
op
->
get_input_element_type
(
0
)
!=
ngraph
::
element
::
f32
))
{
{
do_batch_norm_operation
(
topology
,
kern
.
emit
<
op
::
BatchNormInference
>
(
bnorm
);
op
->
get_output_tensor_name
(
0
),
op
->
get_output_element_type
(
0
),
eps
,
op
->
get_input_tensor_name
(
2
),
op
->
get_input_shape
(
2
),
op
->
get_input_tensor_name
(
0
),
op
->
get_input_tensor_name
(
1
),
op
->
get_input_tensor_name
(
3
),
op
->
get_input_tensor_name
(
4
));
}
}
else
else
{
{
...
@@ -1400,61 +1344,7 @@ shared_ptr<runtime::Executable>
...
@@ -1400,61 +1344,7 @@ shared_ptr<runtime::Executable>
if
((
op
->
get_input_shape
(
2
).
size
()
!=
4
)
||
if
((
op
->
get_input_shape
(
2
).
size
()
!=
4
)
||
(
op
->
get_input_element_type
(
0
)
!=
ngraph
::
element
::
f32
))
(
op
->
get_input_element_type
(
0
)
!=
ngraph
::
element
::
f32
))
{
{
string
mean_name
;
kern
.
emit
<
op
::
BatchNormTraining
>
(
bnorm
);
string
variance_name
;
if
(
op
->
get_inputs
().
size
()
<
3
||
op
->
get_outputs
().
empty
())
{
arguments_check
(
op
,
3
,
1
);
// throw exception in this case
}
if
(
op
->
get_outputs
().
size
()
==
3
)
{
arguments_check
(
op
,
3
,
3
);
mean_name
=
op
->
get_output_tensor_name
(
1
);
variance_name
=
op
->
get_output_tensor_name
(
2
);
do_create_mean
(
topology
,
mean_name
,
op
->
get_output_element_type
(
0
),
op
->
get_input_tensor_name
(
2
),
op
->
get_input_shape
(
2
),
false
);
do_create_variance
(
topology
,
variance_name
,
op
->
get_output_element_type
(
0
),
op
->
get_input_tensor_name
(
2
),
op
->
get_input_shape
(
2
),
mean_name
);
}
if
(
op
->
get_outputs
().
size
()
==
1
||
op
->
get_outputs
().
size
()
==
3
)
{
if
(
mean_name
.
empty
()
||
variance_name
.
empty
())
{
arguments_check
(
op
,
5
,
1
);
mean_name
=
op
->
get_input_tensor_name
(
3
);
variance_name
=
op
->
get_input_tensor_name
(
4
);
}
do_batch_norm_operation
(
topology
,
op
->
get_output_tensor_name
(
0
),
op
->
get_output_element_type
(
0
),
eps
,
op
->
get_input_tensor_name
(
2
),
op
->
get_input_shape
(
2
),
op
->
get_input_tensor_name
(
0
),
op
->
get_input_tensor_name
(
1
),
mean_name
,
variance_name
);
}
else
{
arguments_check
(
op
,
5
,
1
);
// throw exception in this case
}
}
}
else
else
{
{
...
...
src/ngraph/runtime/intelgpu/intelgpu_kernels.cpp
View file @
67248fdb
...
@@ -44,3 +44,15 @@ void runtime::intelgpu::CustomKernels::queue_krnl(const krnl_info& krnl_info,
...
@@ -44,3 +44,15 @@ void runtime::intelgpu::CustomKernels::queue_krnl(const krnl_info& krnl_info,
stream
.
add
(
kernel_item
);
stream
.
add
(
kernel_item
);
}
}
}
}
void
runtime
::
intelgpu
::
arguments_check
(
const
shared_ptr
<
Node
>&
op
,
size_t
input
,
size_t
output
)
{
if
(
op
->
get_input_size
()
!=
input
||
op
->
get_output_size
()
!=
output
)
{
ostringstream
os
;
os
<<
"Operation
\"
"
<<
op
->
description
()
<<
"
\"
input and output sizes mismatch."
<<
" Expected input size="
<<
input
<<
", provided="
<<
op
->
get_input_size
()
<<
". Expected output size="
<<
output
<<
", provided="
<<
op
->
get_output_size
();
throw
invalid_argument
(
os
.
str
());
}
}
src/ngraph/runtime/intelgpu/intelgpu_kernels.hpp
View file @
67248fdb
...
@@ -24,11 +24,20 @@
...
@@ -24,11 +24,20 @@
#include "ngraph/node.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/all.hpp"
#include "ngraph/op/all.hpp"
#include "ngraph/op/and.hpp"
#include "ngraph/op/any.hpp"
#include "ngraph/op/any.hpp"
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/equal.hpp"
#include "ngraph/op/greater.hpp"
#include "ngraph/op/greater_eq.hpp"
#include "ngraph/op/less.hpp"
#include "ngraph/op/less_eq.hpp"
#include "ngraph/op/max.hpp"
#include "ngraph/op/max.hpp"
#include "ngraph/op/min.hpp"
#include "ngraph/op/min.hpp"
#include "ngraph/op/not_equal.hpp"
#include "ngraph/op/or.hpp"
#include "ngraph/op/product.hpp"
#include "ngraph/op/product.hpp"
#include "ngraph/op/select.hpp"
#include "ngraph/op/select.hpp"
#include "ngraph/op/slice.hpp"
#include "ngraph/op/slice.hpp"
...
@@ -43,6 +52,8 @@ namespace ngraph
...
@@ -43,6 +52,8 @@ namespace ngraph
{
{
class
CustomKernelInfo
;
class
CustomKernelInfo
;
class
CustomKernels
;
class
CustomKernels
;
void
arguments_check
(
const
std
::
shared_ptr
<
Node
>&
op
,
size_t
input
,
size_t
output
);
}
}
}
}
}
}
...
@@ -107,13 +118,24 @@ private:
...
@@ -107,13 +118,24 @@ private:
void
queue_krnl
(
const
krnl_info
&
krn_info
,
const
std
::
shared_ptr
<
Node
>&
op
);
void
queue_krnl
(
const
krnl_info
&
krn_info
,
const
std
::
shared_ptr
<
Node
>&
op
);
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
All
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
All
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
And
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Any
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Any
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
BatchNormInference
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
BatchNormTraining
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
BatchNormTrainingBackprop
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Broadcast
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Broadcast
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Convolution
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Convolution
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
ConvolutionBackpropData
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
ConvolutionBackpropData
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
ConvolutionBackpropFilters
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
ConvolutionBackpropFilters
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Equal
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Greater
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
GreaterEq
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Less
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
LessEq
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Max
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Max
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Min
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Min
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
NotEqual
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Or
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Product
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Product
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Select
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Select
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Slice
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Slice
>&
op
)
const
;
...
...
src/ngraph/runtime/intelgpu/intelgpu_op_batchnorm.cpp
View file @
67248fdb
...
@@ -14,21 +14,15 @@
...
@@ -14,21 +14,15 @@
// limitations under the License.
// limitations under the License.
//*****************************************************************************
//*****************************************************************************
#include <CPP/batch_norm.hpp>
#include <CPP/concatenation.hpp>
#include <CPP/custom_gpu_primitive.hpp>
#include <CPP/scale.hpp>
#include <CPP/split.hpp>
#include "ngraph/code_writer.hpp"
#include "ngraph/code_writer.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/batch_norm.hpp"
using
namespace
std
;
using
namespace
std
;
using
namespace
ngraph
;
using
namespace
ngraph
;
using
namespace
ngraph
::
runtime
::
intelgpu
;
// According to the documentation, input data channel is always being axis 1
// According to the documentation, input data channel is always being axis 1
// Assumed the second dimension from the left. Example {0, 1, 0, 0} or {0, 1}
// Assumed the second dimension from the left. Example {0, 1, 0, 0} or {0, 1}
...
@@ -39,9 +33,8 @@ static Shape get_channel_shape(const Shape& shape, const string& function_name)
...
@@ -39,9 +33,8 @@ static Shape get_channel_shape(const Shape& shape, const string& function_name)
{
{
if
(
shape
.
size
()
<
channel_axis
+
1
)
if
(
shape
.
size
()
<
channel_axis
+
1
)
{
{
const
string
err
=
"intelgpu::"
+
function_name
+
"() input_shape"
+
const
string
err
=
"intelgpu::"
+
function_name
+
"() input_shape"
+
array_dims
(
shape
)
+
runtime
::
intelgpu
::
array_dims
(
shape
)
+
" should be at least "
+
" should be at least "
+
to_string
(
channel_axis
+
1
)
+
"D."
;
to_string
(
channel_axis
+
1
)
+
"D."
;
throw
invalid_argument
(
err
);
throw
invalid_argument
(
err
);
}
}
...
@@ -53,15 +46,14 @@ static size_t get_idx_size(const Shape& shape, size_t pos)
...
@@ -53,15 +46,14 @@ static size_t get_idx_size(const Shape& shape, size_t pos)
return
accumulate
(
shape
.
cbegin
()
+
pos
,
shape
.
cend
(),
1
,
multiplies
<
size_t
>
());
return
accumulate
(
shape
.
cbegin
()
+
pos
,
shape
.
cend
(),
1
,
multiplies
<
size_t
>
());
}
}
void
runtime
::
intelgpu
::
do_create_mean
(
cldnn
::
topology
&
topology
,
// This creates mean of the input matrix by Channel axis
const
string
&
output_name
,
static
CustomKernels
::
krnl_info
do_create_mean
(
const
string
&
output_name
,
const
element
::
Type
&
output_type
,
const
element
::
Type
&
output_type
,
const
string
&
input_name
,
const
string
&
input_name
,
const
Shape
&
input_shape
,
const
Shape
&
input_shape
,
bool
backward
)
bool
backward
)
{
{
const
Shape
channel_shape
=
get_channel_shape
(
input_shape
,
"create_mean"
);
const
Shape
channel_shape
=
get_channel_shape
(
input_shape
,
"create_mean"
);
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
channel_shape
);
const
string
entry_point_name
=
"create_mean_"
+
output_name
;
const
string
entry_point_name
=
"create_mean_"
+
output_name
;
const
size_t
output_counts
=
shape_size
<
Shape
>
(
input_shape
)
/
input_shape
.
at
(
channel_axis
);
const
size_t
output_counts
=
shape_size
<
Shape
>
(
input_shape
)
/
input_shape
.
at
(
channel_axis
);
const
string
kernel_data_type
=
get_opencl_type_name
(
output_type
);
const
string
kernel_data_type
=
get_opencl_type_name
(
output_type
);
...
@@ -118,26 +110,23 @@ void runtime::intelgpu::do_create_mean(cldnn::topology& topology,
...
@@ -118,26 +110,23 @@ void runtime::intelgpu::do_create_mean(cldnn::topology& topology,
}
// Main function body
}
// Main function body
writer
.
block_end
();
writer
.
block_end
();
const
cldnn
::
custom_gpu_primitive
op_mean
(
output_name
,
const
CustomKernelInfo
op_bcast_sum
(
output_name
,
{
input_name
},
channel_shape
,
{
writer
.
get_code
()},
output_type
,
entry_point_name
,
{
input_name
},
get_kernel_args
(
1
,
1
),
{
writer
.
get_code
()},
""
,
entry_point_name
);
layout
,
return
{
op_bcast_sum
};
{
1
});
topology
.
add
(
op_mean
);
}
}
void
runtime
::
intelgpu
::
do_create_variance
(
cldnn
::
topology
&
topology
,
// This creates variance of the input matrix by Channel axis
const
string
&
output_name
,
static
CustomKernels
::
krnl_info
do_create_variance
(
const
string
&
output_name
,
const
element
::
Type
&
output_type
,
const
element
::
Type
&
output_type
,
const
string
&
input_name
,
const
string
&
input_name
,
const
Shape
&
input_shape
,
const
Shape
&
input_shape
,
const
std
::
string
&
mean_name
)
const
std
::
string
&
mean_name
)
{
{
const
Shape
channel_shape
=
get_channel_shape
(
input_shape
,
"create_variance"
);
const
Shape
channel_shape
=
get_channel_shape
(
input_shape
,
"create_variance"
);
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
channel_shape
);
const
string
entry_point_name
=
"create_variance_"
+
output_name
;
const
string
entry_point_name
=
"create_variance_"
+
output_name
;
const
size_t
output_counts
=
shape_size
<
Shape
>
(
input_shape
)
/
input_shape
.
at
(
channel_axis
);
const
size_t
output_counts
=
shape_size
<
Shape
>
(
input_shape
)
/
input_shape
.
at
(
channel_axis
);
const
string
kernel_data_type
=
get_opencl_type_name
(
output_type
);
const
string
kernel_data_type
=
get_opencl_type_name
(
output_type
);
...
@@ -194,30 +183,26 @@ void runtime::intelgpu::do_create_variance(cldnn::topology& topology,
...
@@ -194,30 +183,26 @@ void runtime::intelgpu::do_create_variance(cldnn::topology& topology,
}
// Main function body
}
// Main function body
writer
.
block_end
();
writer
.
block_end
();
const
cldnn
::
custom_gpu_primitive
op_variance
(
output_name
,
const
CustomKernelInfo
op_variance
(
output_name
,
{
input_name
,
mean_name
},
channel_shape
,
{
writer
.
get_code
()},
output_type
,
entry_point_name
,
{
input_name
,
mean_name
},
get_kernel_args
(
2
,
1
),
{
writer
.
get_code
()},
""
,
entry_point_name
);
layout
,
return
{
op_variance
};
{
1
});
topology
.
add
(
op_variance
);
}
}
void
runtime
::
intelgpu
::
do_batch_norm_operation
(
cldnn
::
topology
&
topology
,
static
CustomKernels
::
krnl_info
do_batch_norm_operation
(
const
string
&
output_name
,
const
string
&
output_name
,
const
element
::
Type
&
output_type
,
const
element
::
Type
&
output_type
,
double
eps
,
double
eps
,
const
string
&
input_name
,
const
string
&
input_name
,
const
Shape
&
input_shape
,
const
Shape
&
input_shape
,
const
string
&
gamma_name
,
const
string
&
gamma_name
,
const
string
&
beta_name
,
const
string
&
beta_name
,
const
string
&
mean_name_inp
,
const
string
&
mean_name_inp
,
const
string
&
variance_name_inp
)
const
string
&
variance_name_inp
)
{
{
const
Shape
channel_shape
=
get_channel_shape
(
input_shape
,
"batch_norm"
);
const
Shape
channel_shape
=
get_channel_shape
(
input_shape
,
"batch_norm"
);
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
input_shape
);
const
vector
<
size_t
>
gws
(
input_shape
.
begin
(),
input_shape
.
begin
()
+
2
);
const
vector
<
size_t
>
gws
(
input_shape
.
begin
(),
input_shape
.
begin
()
+
2
);
const
string
entry_point_name
=
"batch_norm_"
+
output_name
;
const
string
entry_point_name
=
"batch_norm_"
+
output_name
;
const
string
kernel_data_type
=
get_opencl_type_name
(
output_type
);
const
string
kernel_data_type
=
get_opencl_type_name
(
output_type
);
...
@@ -265,32 +250,30 @@ void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology,
...
@@ -265,32 +250,30 @@ void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology,
}
// Main function body
}
// Main function body
writer
.
block_end
();
writer
.
block_end
();
const
vector
<
cldnn
::
primitive_id
>&
inputs
=
{
const
vector
<
string
>&
inputs
=
{
input_name
,
gamma_name
,
beta_name
,
mean_name_inp
,
variance_name_inp
};
input_name
,
gamma_name
,
beta_name
,
mean_name_inp
,
variance_name_inp
};
const
cldnn
::
custom_gpu_primitive
op_batch_norm
(
output_name
,
const
CustomKernelInfo
op_batch_norm
(
output_name
,
inputs
,
input_shape
,
{
writer
.
get_code
()},
output_type
,
entry_point_name
,
inputs
,
get_kernel_args
(
5
,
1
),
{
writer
.
get_code
()},
""
,
entry_point_name
,
layout
,
gws
,
gws
,
{
1
,
1
,
1
});
{
1
,
1
,
1
});
return
{
op_batch_norm
};
topology
.
add
(
op_batch_norm
);
}
}
void
runtime
::
intelgpu
::
do_create_variance_back
(
cldnn
::
topology
&
topology
,
// This creates variance backprop of the input matrix by Channel axis
const
string
&
output_name
,
static
CustomKernels
::
krnl_info
do_create_variance_back
(
const
string
&
output_name
,
const
element
::
Type
&
output_type
,
const
element
::
Type
&
output_type
,
double
eps
,
double
eps
,
const
string
&
input_name
,
const
string
&
input_name
,
const
Shape
&
input_shape
,
const
Shape
&
input_shape
,
const
string
&
mean_name
,
const
string
&
mean_name
,
const
string
&
variance_name
,
const
string
&
variance_name
,
const
string
&
delta_name
)
const
string
&
delta_name
)
{
{
const
Shape
channel_shape
=
get_channel_shape
(
input_shape
,
"create_variance_back"
);
const
Shape
channel_shape
=
get_channel_shape
(
input_shape
,
"create_variance_back"
);
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
channel_shape
);
const
string
entry_point_name
=
"create_variance_back_"
+
output_name
;
const
string
entry_point_name
=
"create_variance_back_"
+
output_name
;
const
string
kernel_data_type
=
get_opencl_type_name
(
output_type
);
const
string
kernel_data_type
=
get_opencl_type_name
(
output_type
);
CodeWriter
writer
;
CodeWriter
writer
;
...
@@ -343,34 +326,34 @@ void runtime::intelgpu::do_create_variance_back(cldnn::topology& topology,
...
@@ -343,34 +326,34 @@ void runtime::intelgpu::do_create_variance_back(cldnn::topology& topology,
}
// Main function body
}
// Main function body
writer
.
block_end
();
writer
.
block_end
();
const
vector
<
cldnn
::
primitive_id
>&
inputs
=
{
input_name
,
delta_name
,
mean_name
,
variance_name
};
const
vector
<
string
>&
inputs
=
{
input_name
,
delta_name
,
mean_name
,
variance_name
};
const
cldnn
::
custom_gpu_primitive
op_create_variance_back
(
output_name
,
const
CustomKernelInfo
op_create_variance_back
(
output_name
,
inputs
,
channel_shape
,
{
writer
.
get_code
()},
output_type
,
entry_point_name
,
inputs
,
get_kernel_args
(
4
,
1
),
{
writer
.
get_code
()},
""
,
entry_point_name
,
layout
,
gws
);
gws
);
return
{
op_create_variance_back
};
topology
.
add
(
op_create_variance_back
);
}
}
void
runtime
::
intelgpu
::
do_batch_norm_backprop_operation
(
cldnn
::
topology
&
topology
,
// This function uses "shape" parameter as input or output Shape
const
Shape
&
shape
,
// Shape of all other calculated as first axis from the left
const
element
::
Type
&
type
,
// Example: output[ 4, 3, 2, 8 ] means out_gamma[ 3 ]
const
string
&
gamma_name
,
static
CustomKernels
::
krnl_info
do_batch_norm_backprop_operation
(
const
Shape
&
shape
,
const
string
&
beta_name
,
const
element
::
Type
&
type
,
const
string
&
input_name
,
const
string
&
gamma_name
,
const
string
&
mean_name
,
const
string
&
beta_name
,
const
string
&
variance_name
,
const
string
&
input_name
,
const
string
&
delta_name
,
const
string
&
mean_name
,
double
eps
,
const
string
&
variance_name
,
const
string
&
output_name
,
const
string
&
delta_name
,
const
string
&
output_gamma_name
,
double
eps
,
const
string
&
output_beta_name
)
const
string
&
output_name
,
const
string
&
output_gamma_name
,
const
string
&
output_beta_name
)
{
{
const
Shape
channel_shape
=
get_channel_shape
(
shape
,
"batch_norm_backprop"
);
const
Shape
channel_shape
=
get_channel_shape
(
shape
,
"batch_norm_backprop"
);
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
type
,
shape
);
const
string
entry_point_name
=
"batch_norm_backprop_"
+
output_name
;
const
string
entry_point_name
=
"batch_norm_backprop_"
+
output_name
;
const
size_t
r_axes_size
=
shape_size
(
shape
)
/
shape_size
(
channel_shape
);
const
size_t
r_axes_size
=
shape_size
(
shape
)
/
shape_size
(
channel_shape
);
const
string
kernel_data_type
=
get_opencl_type_name
(
type
);
const
string
kernel_data_type
=
get_opencl_type_name
(
type
);
...
@@ -391,7 +374,7 @@ void runtime::intelgpu::do_batch_norm_backprop_operation(cldnn::topology& topolo
...
@@ -391,7 +374,7 @@ void runtime::intelgpu::do_batch_norm_backprop_operation(cldnn::topology& topolo
{
// Main function body
{
// Main function body
// Main loops
// Main loops
gws
=
runtime
::
intelgpu
::
generate_loops
(
writer
,
shape
,
true
);
gws
=
generate_loops
(
writer
,
shape
,
true
);
writer
<<
kernel_data_type
<<
" stddev = sqrt(variance[i"
<<
channel_axis
<<
"] + "
<<
eps
writer
<<
kernel_data_type
<<
" stddev = sqrt(variance[i"
<<
channel_axis
<<
"] + "
<<
eps
<<
");
\n
"
;
<<
");
\n
"
;
...
@@ -404,25 +387,139 @@ void runtime::intelgpu::do_batch_norm_backprop_operation(cldnn::topology& topolo
...
@@ -404,25 +387,139 @@ void runtime::intelgpu::do_batch_norm_backprop_operation(cldnn::topology& topolo
<<
channel_axis
<<
"]) / "
<<
r_axes_size
<<
");
\n
"
;
<<
channel_axis
<<
"]) / "
<<
r_axes_size
<<
");
\n
"
;
// Closing brackets for main loops
// Closing brackets for main loops
runtime
::
intelgpu
::
generate_loops
(
writer
,
shape
,
false
);
generate_loops
(
writer
,
shape
,
false
);
}
// Main function body
}
// Main function body
writer
.
block_end
();
writer
.
block_end
();
const
vector
<
cldnn
::
primitive_id
>&
inputs
=
{
input_name
,
const
vector
<
string
>&
inputs
=
{
input_name
,
delta_name
,
delta_name
,
mean_name
,
mean_name
,
variance_name
,
variance_name
,
gamma_name
,
gamma_name
,
output_gamma_name
,
output_gamma_name
,
output_beta_name
};
output_beta_name
};
const
cldnn
::
custom_gpu_primitive
op_batch_norm_backprop
(
output_name
,
const
CustomKernelInfo
op_batch_norm_backprop
(
inputs
,
output_name
,
shape
,
type
,
inputs
,
{
writer
.
get_code
()},
entry_point_name
,
gws
);
{
writer
.
get_code
()},
return
{
op_batch_norm_backprop
};
entry_point_name
,
}
get_kernel_args
(
7
,
1
),
""
,
CustomKernels
::
krnl_info
layout
,
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
BatchNormInference
>&
op
)
const
gws
);
{
topology
.
add
(
op_batch_norm_backprop
);
return
do_batch_norm_operation
(
op
->
get_output_tensor_name
(
0
),
op
->
get_output_element_type
(
0
),
op
->
get_eps_value
(),
op
->
get_input_tensor_name
(
2
),
op
->
get_input_shape
(
2
),
op
->
get_input_tensor_name
(
0
),
op
->
get_input_tensor_name
(
1
),
op
->
get_input_tensor_name
(
3
),
op
->
get_input_tensor_name
(
4
));
}
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
BatchNormTraining
>&
op
)
const
{
CustomKernels
::
krnl_info
result
;
string
mean_name
;
string
variance_name
;
if
(
op
->
get_inputs
().
size
()
<
3
||
op
->
get_outputs
().
empty
())
{
arguments_check
(
op
,
3
,
1
);
// throw exception in this case
}
if
(
op
->
get_outputs
().
size
()
==
3
)
{
arguments_check
(
op
,
3
,
3
);
mean_name
=
op
->
get_output_tensor_name
(
1
);
variance_name
=
op
->
get_output_tensor_name
(
2
);
CustomKernels
::
krnl_info
mean
=
do_create_mean
(
mean_name
,
op
->
get_output_element_type
(
0
),
op
->
get_input_tensor_name
(
2
),
op
->
get_input_shape
(
2
),
false
);
result
.
insert
(
result
.
end
(),
mean
.
begin
(),
mean
.
end
());
CustomKernels
::
krnl_info
variance
=
do_create_variance
(
variance_name
,
op
->
get_output_element_type
(
0
),
op
->
get_input_tensor_name
(
2
),
op
->
get_input_shape
(
2
),
mean_name
);
result
.
insert
(
result
.
end
(),
variance
.
begin
(),
variance
.
end
());
}
if
(
op
->
get_outputs
().
size
()
==
1
||
op
->
get_outputs
().
size
()
==
3
)
{
if
(
mean_name
.
empty
()
||
variance_name
.
empty
())
{
arguments_check
(
op
,
5
,
1
);
mean_name
=
op
->
get_input_tensor_name
(
3
);
variance_name
=
op
->
get_input_tensor_name
(
4
);
}
CustomKernels
::
krnl_info
batch_norm
=
do_batch_norm_operation
(
op
->
get_output_tensor_name
(
0
),
op
->
get_output_element_type
(
0
),
op
->
get_eps_value
(),
op
->
get_input_tensor_name
(
2
),
op
->
get_input_shape
(
2
),
op
->
get_input_tensor_name
(
0
),
op
->
get_input_tensor_name
(
1
),
mean_name
,
variance_name
);
result
.
insert
(
result
.
end
(),
batch_norm
.
begin
(),
batch_norm
.
end
());
}
else
{
arguments_check
(
op
,
5
,
1
);
// throw exception in this case
}
return
result
;
}
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
BatchNormTrainingBackprop
>&
op
)
const
{
CustomKernels
::
krnl_info
result
;
CustomKernels
::
krnl_info
mean
=
do_create_mean
(
op
->
get_output_tensor_name
(
2
),
// d_beta
op
->
get_output_element_type
(
2
),
op
->
get_input_tensor_name
(
5
),
// delta
op
->
get_input_shape
(
5
),
true
);
result
.
insert
(
result
.
end
(),
mean
.
begin
(),
mean
.
end
());
CustomKernels
::
krnl_info
variance
=
do_create_variance_back
(
op
->
get_output_tensor_name
(
1
),
// d_gamma
op
->
get_output_element_type
(
1
),
op
->
get_eps_value
(),
op
->
get_input_tensor_name
(
2
),
// input
op
->
get_input_shape
(
2
),
op
->
get_input_tensor_name
(
3
),
// gamma
op
->
get_input_tensor_name
(
4
),
// beta
op
->
get_input_tensor_name
(
5
));
// delta
result
.
insert
(
result
.
end
(),
variance
.
begin
(),
variance
.
end
());
CustomKernels
::
krnl_info
batch_norm
=
do_batch_norm_backprop_operation
(
op
->
get_input_shape
(
2
),
op
->
get_input_element_type
(
2
),
op
->
get_input_tensor_name
(
0
),
op
->
get_input_tensor_name
(
1
),
op
->
get_input_tensor_name
(
2
),
op
->
get_input_tensor_name
(
3
),
op
->
get_input_tensor_name
(
4
),
op
->
get_input_tensor_name
(
5
),
op
->
get_eps_value
(),
op
->
get_output_tensor_name
(
0
),
op
->
get_output_tensor_name
(
1
),
op
->
get_output_tensor_name
(
2
));
result
.
insert
(
result
.
end
(),
batch_norm
.
begin
(),
batch_norm
.
end
());
return
result
;
}
}
src/ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp
deleted
100644 → 0
View file @
2b13ae40
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <CPP/topology.hpp>
#include "ngraph/shape.hpp"
#include "ngraph/type/element_type.hpp"
namespace
ngraph
{
namespace
runtime
{
namespace
intelgpu
{
// This implements BatchNorm nGraph operation
// nGraph uses channels in this operation but clDNN uses full input data
void
do_batch_norm_operation
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
output_name
,
const
element
::
Type
&
output_type
,
double
eps
,
const
std
::
string
&
input_name
,
const
Shape
&
input_shape
,
const
std
::
string
&
gamma_name
,
const
std
::
string
&
beta_name
,
const
std
::
string
&
mean_name
,
const
std
::
string
&
variance_name
);
// This creates mean of the input matrix by Channel axis
void
do_create_mean
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
output_name
,
const
element
::
Type
&
output_type
,
const
std
::
string
&
input_name
,
const
Shape
&
input_shape
,
bool
backward
);
// This creates variance of the input matrix by Channel axis
void
do_create_variance
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
output_name
,
const
element
::
Type
&
output_type
,
const
std
::
string
&
input_name
,
const
Shape
&
input_shape
,
const
std
::
string
&
mean_name
);
// This creates variance backprop of the input matrix by Channel axis
void
do_create_variance_back
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
output_name
,
const
element
::
Type
&
output_type
,
double
eps
,
const
std
::
string
&
input_name
,
const
Shape
&
input_shape
,
const
std
::
string
&
mean_name
,
const
std
::
string
&
variance_name
,
const
std
::
string
&
delta_name
);
// This function uses "shape" parameter as input or output Shape
// Shape of all other calculated as first axis from the left
// Example: output[ 4, 3, 2, 8 ] means out_gamma[ 3 ]
void
do_batch_norm_backprop_operation
(
cldnn
::
topology
&
topology
,
const
Shape
&
shape
,
const
element
::
Type
&
type
,
const
std
::
string
&
gamma_name
,
const
std
::
string
&
beta_name
,
const
std
::
string
&
input_name
,
const
std
::
string
&
mean_name
,
const
std
::
string
&
variance_name
,
const
std
::
string
&
delta_name
,
double
eps
,
const
std
::
string
&
output_name
,
const
std
::
string
&
output_gamma_name
,
const
std
::
string
&
output_beta_name
);
}
}
}
src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
View file @
67248fdb
...
@@ -1277,18 +1277,16 @@ CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::Select>&
...
@@ -1277,18 +1277,16 @@ CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::Select>&
return
{
krn_ret
};
return
{
krn_ret
};
}
}
void
runtime
::
intelgpu
::
do_logic_kernel
(
cldnn
::
topology
&
topology
,
static
CustomKernels
::
krnl_info
do_logic_kernel
(
const
shared_ptr
<
Node
>&
op
,
const
string
&
operation
)
const
string
&
input0_name
,
const
Shape
&
input0_shape
,
const
element
::
Type
&
input0_type
,
const
string
&
input1_name
,
const
Shape
&
input1_shape
,
const
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
string
&
operation
)
{
{
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
output_shape
);
const
string
&
input0_name
=
op
->
get_input_tensor_name
(
0
);
const
Shape
&
input0_shape
=
op
->
get_input_shape
(
0
);
const
element
::
Type
&
input0_type
=
op
->
get_input_element_type
(
0
);
const
string
&
input1_name
=
op
->
get_input_tensor_name
(
1
);
const
Shape
&
input1_shape
=
op
->
get_input_shape
(
1
);
const
string
&
output_name
=
op
->
get_output_tensor_name
(
0
);
const
Shape
&
output_shape
=
op
->
get_output_shape
(
0
);
const
element
::
Type
&
output_type
=
op
->
get_output_element_type
(
0
);
const
string
entry_point_name
=
"logic_"
+
output_name
;
const
string
entry_point_name
=
"logic_"
+
output_name
;
CodeWriter
writer
;
CodeWriter
writer
;
vector
<
size_t
>
gws
;
vector
<
size_t
>
gws
;
...
@@ -1313,15 +1311,14 @@ void runtime::intelgpu::do_logic_kernel(cldnn::topology& topology,
...
@@ -1313,15 +1311,14 @@ void runtime::intelgpu::do_logic_kernel(cldnn::topology& topology,
}
}
writer
.
block_end
();
writer
.
block_end
();
const
cldnn
::
custom_gpu_primitive
op_logical
(
output_name
,
const
CustomKernelInfo
op_logical
(
output_name
,
{
input0_name
,
input1_name
},
output_shape
,
{
writer
.
get_code
()},
output_type
,
entry_point_name
,
{
input0_name
,
input1_name
},
get_kernel_args
(
2
,
1
),
{
writer
.
get_code
()},
""
,
entry_point_name
,
layout
,
gws
);
gws
);
return
{
op_logical
};
topology
.
add
(
op_logical
);
}
}
void
runtime
::
intelgpu
::
do_eltwise_kernel
(
cldnn
::
topology
&
topology
,
void
runtime
::
intelgpu
::
do_eltwise_kernel
(
cldnn
::
topology
&
topology
,
...
@@ -2333,3 +2330,43 @@ size_t runtime::intelgpu::get_max_memory_rss()
...
@@ -2333,3 +2330,43 @@ size_t runtime::intelgpu::get_max_memory_rss()
return
result
;
return
result
;
}
}
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
And
>&
op
)
const
{
return
do_logic_kernel
(
op
,
" && "
);
}
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
Equal
>&
op
)
const
{
return
do_logic_kernel
(
op
,
" == "
);
}
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
Greater
>&
op
)
const
{
return
do_logic_kernel
(
op
,
" > "
);
}
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
GreaterEq
>&
op
)
const
{
return
do_logic_kernel
(
op
,
" >= "
);
}
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
Less
>&
op
)
const
{
return
do_logic_kernel
(
op
,
" < "
);
}
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
LessEq
>&
op
)
const
{
return
do_logic_kernel
(
op
,
" <= "
);
}
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
NotEqual
>&
op
)
const
{
return
do_logic_kernel
(
op
,
" != "
);
}
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
Or
>&
op
)
const
{
return
do_logic_kernel
(
op
,
" || "
);
}
src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
View file @
67248fdb
...
@@ -100,17 +100,6 @@ namespace ngraph
...
@@ -100,17 +100,6 @@ namespace ngraph
const
element
::
Type
&
output_type
,
const
element
::
Type
&
output_type
,
size_t
concat_axis
);
size_t
concat_axis
);
void
do_logic_kernel
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
input0_name
,
const
Shape
&
input0_shape
,
const
element
::
Type
&
input0_type
,
const
std
::
string
&
input1_name
,
const
Shape
&
input1_shape
,
const
std
::
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
std
::
string
&
operation
);
void
do_eltwise_kernel
(
cldnn
::
topology
&
topology
,
void
do_eltwise_kernel
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
input0_name
,
const
std
::
string
&
input0_name
,
const
Shape
&
input0_shape
,
const
Shape
&
input0_shape
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment