Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
b466027e
Commit
b466027e
authored
Mar 29, 2019
by
Diego Caballero
Committed by
Scott Cyphers
Mar 29, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[CPU] Fix ambiguous 'op' namespace. (#2683)
parent
105f03bc
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
464 additions
and
393 deletions
+464
-393
bounded_relu.cpp
src/ngraph/runtime/cpu/builder/bounded_relu.cpp
+1
-1
leaky_relu.cpp
src/ngraph/runtime/cpu/builder/leaky_relu.cpp
+1
-1
quantization.cpp
src/ngraph/runtime/cpu/builder/quantization.cpp
+1
-1
mkldnn_utils.cpp
src/ngraph/runtime/cpu/mkldnn_utils.cpp
+1
-1
cpu_assignment.cpp
src/ngraph/runtime/cpu/pass/cpu_assignment.cpp
+29
-27
cpu_fusion.cpp
src/ngraph/runtime/cpu/pass/cpu_fusion.cpp
+320
-266
cpu_memory_optimization.cpp
src/ngraph/runtime/cpu/pass/cpu_memory_optimization.cpp
+4
-4
cpu_rnn_fusion.cpp
src/ngraph/runtime/cpu/pass/cpu_rnn_fusion.cpp
+107
-92
No files found.
src/ngraph/runtime/cpu/builder/bounded_relu.cpp
View file @
b466027e
...
...
@@ -38,7 +38,7 @@ namespace ngraph
auto
&
out_tensor
=
external_function
->
get_tensor_data
(
out
[
0
].
get_name
());
size_t
count
=
out
[
0
].
get_size
();
auto
alpha
=
static_cast
<
const
op
::
BoundedRelu
*>
(
node
)
->
get_alpha
();
auto
alpha
=
static_cast
<
const
ngraph
::
op
::
BoundedRelu
*>
(
node
)
->
get_alpha
();
if
(
runtime
::
cpu
::
mkldnn_utils
::
use_mkldnn_kernel
(
node
))
{
...
...
src/ngraph/runtime/cpu/builder/leaky_relu.cpp
View file @
b466027e
...
...
@@ -38,7 +38,7 @@ namespace ngraph
auto
&
out_tensor
=
external_function
->
get_tensor_data
(
out
[
0
].
get_name
());
size_t
count
=
out
[
0
].
get_size
();
auto
alpha
=
static_cast
<
const
op
::
LeakyRelu
*>
(
node
)
->
get_alpha
();
auto
alpha
=
static_cast
<
const
ngraph
::
op
::
LeakyRelu
*>
(
node
)
->
get_alpha
();
if
(
runtime
::
cpu
::
mkldnn_utils
::
use_mkldnn_kernel
(
node
))
{
...
...
src/ngraph/runtime/cpu/builder/quantization.cpp
View file @
b466027e
...
...
@@ -313,7 +313,7 @@ namespace ngraph
auto
arg0_shape
=
args
[
0
].
get_shape
();
auto
arg1_shape
=
args
[
1
].
get_shape
();
auto
daxes
=
quantize
->
get_axes
();
op
::
Quantize
::
RoundMode
round_mode
=
quantize
->
get_round_mode
();
ngraph
::
op
::
Quantize
::
RoundMode
round_mode
=
quantize
->
get_round_mode
();
if
(
args
[
0
].
get_element_type
()
==
element
::
f32
)
{
...
...
src/ngraph/runtime/cpu/mkldnn_utils.cpp
View file @
b466027e
...
...
@@ -705,7 +705,7 @@ bool runtime::cpu::mkldnn_utils::use_mkldnn_kernel(const ngraph::Node* node)
void
runtime
::
cpu
::
mkldnn_utils
::
assign_mkldnn_kernel
(
Node
*
node
)
{
auto
ngraph_op
=
static_cast
<
op
::
Op
*>
(
node
);
auto
ngraph_op
=
static_cast
<
ngraph
::
op
::
Op
*>
(
node
);
auto
op_annotations
=
std
::
make_shared
<
ngraph
::
runtime
::
cpu
::
CPUOpAnnotations
>
();
op_annotations
->
set_mkldnn_op
(
true
);
ngraph_op
->
set_op_annotations
(
op_annotations
);
...
...
src/ngraph/runtime/cpu/pass/cpu_assignment.cpp
View file @
b466027e
...
...
@@ -126,7 +126,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
QuantizedConcat
)
{
auto
quantized_concat
=
static_cast
<
op
::
QuantizedConcat
*>
(
node
);
auto
quantized_concat
=
static_cast
<
ngraph
::
op
::
QuantizedConcat
*>
(
node
);
if
((
node
->
get_input_element_type
(
0
)
==
element
::
i8
||
node
->
get_input_element_type
(
0
)
==
element
::
u8
)
&&
...
...
@@ -195,7 +195,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
ConvolutionBiasAdd
)
{
auto
convolution
=
static_cast
<
op
::
ConvolutionBiasAdd
*>
(
node
);
auto
convolution
=
static_cast
<
ngraph
::
op
::
ConvolutionBiasAdd
*>
(
node
);
if
(
mkldnn_utils
::
can_use_mkldnn_conv
<
ngraph
::
op
::
ConvolutionBiasAdd
>
(
node
))
{
...
...
@@ -212,7 +212,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
GetOutputElement
)
{
auto
goe
=
static_cast
<
op
::
GetOutputElement
*>
(
node
);
auto
goe
=
static_cast
<
ngraph
::
op
::
GetOutputElement
*>
(
node
);
auto
op_annotations
=
std
::
make_shared
<
ngraph
::
runtime
::
cpu
::
CPUOpAnnotations
>
();
op_annotations
->
add_in_place_oi_pair
({
0
,
goe
->
get_n
(),
false
});
...
...
@@ -222,7 +222,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
ConvolutionAdd
)
{
auto
convolution
=
static_cast
<
op
::
ConvolutionAdd
*>
(
node
);
auto
convolution
=
static_cast
<
ngraph
::
op
::
ConvolutionAdd
*>
(
node
);
if
(
mkldnn_utils
::
can_use_mkldnn_conv
<
ngraph
::
op
::
ConvolutionAdd
>
(
node
))
{
...
...
@@ -257,7 +257,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
ConvolutionBackpropData
)
{
auto
convolution
=
static_cast
<
op
::
ConvolutionBackpropData
*>
(
node
);
auto
convolution
=
static_cast
<
ngraph
::
op
::
ConvolutionBackpropData
*>
(
node
);
auto
arg0_shape
=
node
->
get_input_shape
(
0
);
auto
arg1_shape
=
node
->
get_input_shape
(
1
);
...
...
@@ -282,7 +282,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
ConvolutionBackpropFilters
)
{
auto
convolution
=
static_cast
<
op
::
ConvolutionBackpropFilters
*>
(
node
);
auto
convolution
=
static_cast
<
ngraph
::
op
::
ConvolutionBackpropFilters
*>
(
node
);
auto
arg0_shape
=
node
->
get_input_shape
(
0
);
auto
arg1_shape
=
node
->
get_input_shape
(
1
);
...
...
@@ -316,7 +316,8 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
ConvolutionBiasBackpropFiltersBias
)
{
auto
convolution
=
static_cast
<
op
::
ConvolutionBiasBackpropFiltersBias
*>
(
node
);
auto
convolution
=
static_cast
<
ngraph
::
op
::
ConvolutionBiasBackpropFiltersBias
*>
(
node
);
auto
data_shape
=
node
->
get_input_shape
(
0
);
auto
delta_shape
=
node
->
get_input_shape
(
1
);
...
...
@@ -340,7 +341,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
AvgPool
)
{
auto
avg_pool
=
static_cast
<
op
::
AvgPool
*>
(
node
);
auto
avg_pool
=
static_cast
<
ngraph
::
op
::
AvgPool
*>
(
node
);
auto
arg0_shape
=
node
->
get_input_shape
(
0
);
auto
arg0_rank
=
arg0_shape
.
size
();
...
...
@@ -357,7 +358,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
AvgPoolBackprop
)
{
auto
avg_pool
=
static_cast
<
op
::
AvgPoolBackprop
*>
(
node
);
auto
avg_pool
=
static_cast
<
ngraph
::
op
::
AvgPoolBackprop
*>
(
node
);
auto
arg0_shape
=
node
->
get_input_shape
(
0
);
auto
arg0_rank
=
arg0_shape
.
size
();
...
...
@@ -374,7 +375,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
MaxPool
)
{
auto
max_pool
=
static_cast
<
op
::
MaxPool
*>
(
node
);
auto
max_pool
=
static_cast
<
ngraph
::
op
::
MaxPool
*>
(
node
);
auto
arg0_shape
=
node
->
get_input_shape
(
0
);
auto
arg0_rank
=
arg0_shape
.
size
();
...
...
@@ -391,7 +392,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
MaxPoolWithIndices
)
{
auto
max_pool
=
static_cast
<
op
::
MaxPoolWithIndices
*>
(
node
);
auto
max_pool
=
static_cast
<
ngraph
::
op
::
MaxPoolWithIndices
*>
(
node
);
auto
arg0_shape
=
node
->
get_input_shape
(
0
);
auto
arg0_rank
=
arg0_shape
.
size
();
...
...
@@ -407,7 +408,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
MaxPoolBackprop
)
{
auto
max_pool
=
static_cast
<
op
::
MaxPoolBackprop
*>
(
node
);
auto
max_pool
=
static_cast
<
ngraph
::
op
::
MaxPoolBackprop
*>
(
node
);
auto
arg1_shape
=
node
->
get_input_shape
(
1
);
auto
arg1_rank
=
arg1_shape
.
size
();
...
...
@@ -424,7 +425,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
MaxPoolWithIndicesBackprop
)
{
auto
max_pool
=
static_cast
<
op
::
MaxPoolWithIndicesBackprop
*>
(
node
);
auto
max_pool
=
static_cast
<
ngraph
::
op
::
MaxPoolWithIndicesBackprop
*>
(
node
);
auto
arg1_shape
=
node
->
get_input_shape
(
1
);
auto
arg1_rank
=
arg1_shape
.
size
();
...
...
@@ -440,7 +441,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
Relu
)
{
auto
relu
=
static_cast
<
op
::
Relu
*>
(
node
);
auto
relu
=
static_cast
<
ngraph
::
op
::
Relu
*>
(
node
);
auto
arg0_shape
=
node
->
get_input_shape
(
0
);
auto
arg0_rank
=
arg0_shape
.
size
();
...
...
@@ -464,7 +465,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
ReplaceSlice
)
{
auto
replace_slice
=
static_cast
<
op
::
ReplaceSlice
*>
(
node
);
auto
replace_slice
=
static_cast
<
ngraph
::
op
::
ReplaceSlice
*>
(
node
);
// ReplaceSlice is independent of data type. Hence not checking type
auto
op_annotations
=
...
...
@@ -480,7 +481,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
UpdateSlice
)
{
auto
update_slice
=
static_cast
<
op
::
UpdateSlice
*>
(
node
);
auto
update_slice
=
static_cast
<
ngraph
::
op
::
UpdateSlice
*>
(
node
);
auto
op_annotations
=
std
::
make_shared
<
ngraph
::
runtime
::
cpu
::
CPUOpAnnotations
>
();
...
...
@@ -601,7 +602,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
Softmax
)
{
auto
softmax
=
static_cast
<
op
::
Softmax
*>
(
node
);
auto
softmax
=
static_cast
<
ngraph
::
op
::
Softmax
*>
(
node
);
auto
arg0_shape
=
node
->
get_input_shape
(
0
);
auto
arg0_rank
=
arg0_shape
.
size
();
...
...
@@ -618,7 +619,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
Slice
)
{
auto
slice
=
static_cast
<
op
::
Slice
*>
(
node
);
auto
slice
=
static_cast
<
ngraph
::
op
::
Slice
*>
(
node
);
auto
strides
=
slice
->
get_strides
();
if
(
!
is_strided
(
strides
)
&&
node
->
get_input_element_type
(
0
)
==
element
::
f32
)
{
...
...
@@ -649,7 +650,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
BoundedRelu
)
{
auto
bounded_relu
=
static_cast
<
op
::
BoundedRelu
*>
(
node
);
auto
bounded_relu
=
static_cast
<
ngraph
::
op
::
BoundedRelu
*>
(
node
);
auto
arg0_shape
=
node
->
get_input_shape
(
0
);
auto
arg0_rank
=
arg0_shape
.
size
();
...
...
@@ -673,7 +674,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
LeakyRelu
)
{
auto
leaky_relu
=
static_cast
<
op
::
LeakyRelu
*>
(
node
);
auto
leaky_relu
=
static_cast
<
ngraph
::
op
::
LeakyRelu
*>
(
node
);
auto
arg0_shape
=
node
->
get_input_shape
(
0
);
auto
arg0_rank
=
arg0_shape
.
size
();
...
...
@@ -719,7 +720,8 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
QuantizedConvolutionBiasAdd
)
{
auto
quantized_conv_bias
=
static_cast
<
op
::
QuantizedConvolutionBiasAdd
*>
(
node
);
auto
quantized_conv_bias
=
static_cast
<
ngraph
::
op
::
QuantizedConvolutionBiasAdd
*>
(
node
);
auto
op_annotations
=
std
::
make_shared
<
ngraph
::
runtime
::
cpu
::
CPUOpAnnotations
>
();
op_annotations
->
set_mkldnn_op
(
true
);
...
...
@@ -733,7 +735,7 @@ namespace ngraph
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
QuantizedConvolutionBiasSignedAdd
)
{
auto
quantized_conv_bias
=
static_cast
<
op
::
QuantizedConvolutionBiasSignedAdd
*>
(
node
);
static_cast
<
ngraph
::
op
::
QuantizedConvolutionBiasSignedAdd
*>
(
node
);
auto
op_annotations
=
std
::
make_shared
<
ngraph
::
runtime
::
cpu
::
CPUOpAnnotations
>
();
op_annotations
->
set_mkldnn_op
(
true
);
...
...
@@ -758,7 +760,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
Dequantize
)
{
auto
dequantize
=
static_cast
<
op
::
Dequantize
*>
(
node
);
auto
dequantize
=
static_cast
<
ngraph
::
op
::
Dequantize
*>
(
node
);
// TODO(nbpatel): Support dynamic offset via mkldnn
// Go through reference if the offset is not a constant
if
(
!
dequantize
->
get_argument
(
2
)
->
is_constant
())
...
...
@@ -796,7 +798,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
Quantize
)
{
auto
quantize
=
static_cast
<
op
::
Quantize
*>
(
node
);
auto
quantize
=
static_cast
<
ngraph
::
op
::
Quantize
*>
(
node
);
// TODO(nbpatel): Support dynamic offset via mkldnn
// Go through reference if the offset is not a constant
if
(
!
quantize
->
get_argument
(
2
)
->
is_constant
())
...
...
@@ -805,8 +807,8 @@ namespace ngraph
}
auto
offset_const_op
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Constant
>
(
quantize
->
get_argument
(
2
));
op
::
Quantize
::
RoundMode
round_mode
=
quantize
->
get_round_mode
();
if
(
round_mode
!=
op
::
Quantize
::
RoundMode
::
ROUND_NEAREST_TOWARD_EVEN
)
ngraph
::
op
::
Quantize
::
RoundMode
round_mode
=
quantize
->
get_round_mode
();
if
(
round_mode
!=
ngraph
::
op
::
Quantize
::
RoundMode
::
ROUND_NEAREST_TOWARD_EVEN
)
{
return
;
}
...
...
@@ -845,7 +847,7 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
Convert
)
{
auto
convert
=
static_cast
<
op
::
Convert
*>
(
node
);
auto
convert
=
static_cast
<
ngraph
::
op
::
Convert
*>
(
node
);
if
((
node
->
get_input_element_type
(
0
)
==
element
::
i8
&&
node
->
get_output_element_type
(
0
)
==
element
::
u8
)
||
(
node
->
get_input_element_type
(
0
)
==
element
::
u8
&&
...
...
src/ngraph/runtime/cpu/pass/cpu_fusion.cpp
View file @
b466027e
...
...
@@ -144,9 +144,9 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_matmulbias()
auto
x
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape_x
);
auto
b
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape_b
);
auto
pmmb
=
std
::
make_shared
<
op
::
MatmulBias
>
(
auto
pmmb
=
std
::
make_shared
<
ngraph
::
op
::
MatmulBias
>
(
W
,
x
,
nullptr
,
W
->
get_shape
(),
x
->
get_shape
(),
false
,
false
);
auto
pbroadcast
=
std
::
make_shared
<
op
::
Broadcast
>
(
b
,
pmmb
->
get_shape
(),
AxisSet
{
0
});
auto
pbroadcast
=
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
b
,
pmmb
->
get_shape
(),
AxisSet
{
0
});
auto
padd
=
pmmb
+
pbroadcast
;
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
[
W
,
x
](
pattern
::
Matcher
&
m
)
{
...
...
@@ -154,12 +154,12 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_matmulbias()
<<
m
.
get_match_root
()
->
get_name
();
auto
mpattern
=
m
.
get_match_root
();
// add
auto
m_matmul
=
ngraph
::
pattern
::
Matcher
::
unique_match
<
op
::
MatmulBias
>
(
mpattern
);
auto
m_broadcast
=
ngraph
::
pattern
::
Matcher
::
unique_match
<
op
::
Broadcast
>
(
mpattern
);
auto
m_matmul
=
ngraph
::
pattern
::
Matcher
::
unique_match
<
ngraph
::
op
::
MatmulBias
>
(
mpattern
);
auto
m_broadcast
=
ngraph
::
pattern
::
Matcher
::
unique_match
<
ngraph
::
op
::
Broadcast
>
(
mpattern
);
auto
m_bias
=
m_broadcast
->
get_argument
(
0
);
auto
pattern_map
=
m
.
get_pattern_map
();
auto
mmb
=
std
::
make_shared
<
op
::
MatmulBias
>
(
pattern_map
[
W
],
auto
mmb
=
std
::
make_shared
<
ngraph
::
op
::
MatmulBias
>
(
pattern_map
[
W
],
pattern_map
[
x
],
m_bias
,
m_matmul
->
get_a_shape
(),
...
...
@@ -186,12 +186,12 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_matmul()
auto
W
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape_w
);
auto
x
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape_x
);
auto
reshape_pred
=
pattern
::
has_class
<
op
::
Reshape
>
();
auto
reshape_pred
=
pattern
::
has_class
<
ngraph
::
op
::
Reshape
>
();
auto
skip_w
=
std
::
make_shared
<
pattern
::
op
::
Skip
>
(
W
,
reshape_pred
);
auto
skip_x
=
std
::
make_shared
<
pattern
::
op
::
Skip
>
(
x
,
reshape_pred
);
auto
pdot
=
std
::
make_shared
<
op
::
Dot
>
(
skip_w
,
skip_x
);
auto
pdot
=
std
::
make_shared
<
ngraph
::
op
::
Dot
>
(
skip_w
,
skip_x
);
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
[
W
,
x
](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In callback for construct_matmul_pattern against node = "
...
...
@@ -233,7 +233,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_matmul()
return
false
;
}
auto
cg
=
std
::
shared_ptr
<
Node
>
(
new
op
::
MatmulBias
(
pattern_map
[
W
],
auto
cg
=
std
::
shared_ptr
<
Node
>
(
new
ngraph
::
op
::
MatmulBias
(
pattern_map
[
W
],
pattern_map
[
x
],
nullptr
,
shape_arg0
,
...
...
@@ -252,47 +252,51 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_matmul()
void
ngraph
::
runtime
::
cpu
::
pass
::
CPUFusion
::
construct_fprop_bn
()
{
// construct varaiance
auto
N
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
3
},
{
2
,
2
,
2
});
auto
N
=
ngraph
::
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
3
},
{
2
,
2
,
2
});
auto
input
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
2
,
3
});
auto
input_sq
=
std
::
make_shared
<
op
::
Multiply
>
(
input
,
input
);
auto
sum_input
=
std
::
make_shared
<
op
::
Sum
>
(
input
,
AxisSet
{
0
});
auto
square_sumed_input
=
std
::
make_shared
<
op
::
Multiply
>
(
sum_input
,
sum_input
);
auto
sum_squared_input
=
std
::
make_shared
<
op
::
Sum
>
(
input_sq
,
AxisSet
{
0
});
auto
avg_input_sum_sq
=
std
::
make_shared
<
op
::
Divide
>
(
square_sumed_input
,
N
);
auto
xmu
=
std
::
make_shared
<
op
::
Subtract
>
(
sum_squared_input
,
avg_input_sum_sq
);
auto
variance
=
std
::
make_shared
<
op
::
Divide
>
(
xmu
,
N
);
auto
input_sq
=
std
::
make_shared
<
ngraph
::
op
::
Multiply
>
(
input
,
input
);
auto
sum_input
=
std
::
make_shared
<
ngraph
::
op
::
Sum
>
(
input
,
AxisSet
{
0
});
auto
square_sumed_input
=
std
::
make_shared
<
ngraph
::
op
::
Multiply
>
(
sum_input
,
sum_input
);
auto
sum_squared_input
=
std
::
make_shared
<
ngraph
::
op
::
Sum
>
(
input_sq
,
AxisSet
{
0
});
auto
avg_input_sum_sq
=
std
::
make_shared
<
ngraph
::
op
::
Divide
>
(
square_sumed_input
,
N
);
auto
xmu
=
std
::
make_shared
<
ngraph
::
op
::
Subtract
>
(
sum_squared_input
,
avg_input_sum_sq
);
auto
variance
=
std
::
make_shared
<
ngraph
::
op
::
Divide
>
(
xmu
,
N
);
auto
variance_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
variance
,
nullptr
,
NodeVector
{
variance
});
auto
variance_with_broadcast
=
std
::
make_shared
<
op
::
Broadcast
>
(
variance_label
,
Shape
{
2
,
3
},
AxisSet
{
0
});
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
variance_label
,
Shape
{
2
,
3
},
AxisSet
{
0
});
// construct mean
auto
sum_input1
=
std
::
make_shared
<
op
::
Sum
>
(
input
,
AxisSet
{
0
});
auto
mean
=
std
::
make_shared
<
op
::
Divide
>
(
sum_input1
,
N
);
auto
sum_input1
=
std
::
make_shared
<
ngraph
::
op
::
Sum
>
(
input
,
AxisSet
{
0
});
auto
mean
=
std
::
make_shared
<
ngraph
::
op
::
Divide
>
(
sum_input1
,
N
);
auto
mean_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
mean
,
nullptr
,
NodeVector
{
mean
});
auto
mean_with_broadcast
=
std
::
make_shared
<
op
::
Broadcast
>
(
mean_label
,
Shape
{
2
,
3
},
AxisSet
{
0
});
auto
input_diff_mean
=
std
::
make_shared
<
op
::
Subtract
>
(
input
,
mean_with_broadcast
);
auto
mean_with_broadcast
=
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
mean_label
,
Shape
{
2
,
3
},
AxisSet
{
0
});
auto
input_diff_mean
=
std
::
make_shared
<
ngraph
::
op
::
Subtract
>
(
input
,
mean_with_broadcast
);
// Eps
auto
eps_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
3
});
auto
eps_with_broadcast
=
std
::
make_shared
<
op
::
Broadcast
>
(
eps_label
,
Shape
{
2
,
3
},
AxisSet
{
0
});
auto
eps_with_broadcast
=
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
eps_label
,
Shape
{
2
,
3
},
AxisSet
{
0
});
auto
add1
=
std
::
make_shared
<
op
::
Add
>
(
eps_with_broadcast
,
variance_with_broadcast
);
auto
sqrt_variance_eps
=
std
::
make_shared
<
op
::
Sqrt
>
(
add1
);
auto
divide_mean_variance
=
std
::
make_shared
<
op
::
Divide
>
(
input_diff_mean
,
sqrt_variance_eps
);
auto
add1
=
std
::
make_shared
<
ngraph
::
op
::
Add
>
(
eps_with_broadcast
,
variance_with_broadcast
);
auto
sqrt_variance_eps
=
std
::
make_shared
<
ngraph
::
op
::
Sqrt
>
(
add1
);
auto
divide_mean_variance
=
std
::
make_shared
<
ngraph
::
op
::
Divide
>
(
input_diff_mean
,
sqrt_variance_eps
);
// Gamma
auto
gamma_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
3
});
auto
gamma_with_broadcast
=
std
::
make_shared
<
op
::
Broadcast
>
(
gamma_label
,
Shape
{
2
,
3
},
AxisSet
{
0
});
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
gamma_label
,
Shape
{
2
,
3
},
AxisSet
{
0
});
auto
multiply_gamma
=
std
::
make_shared
<
op
::
Multiply
>
(
gamma_with_broadcast
,
divide_mean_variance
);
std
::
make_shared
<
ngraph
::
op
::
Multiply
>
(
gamma_with_broadcast
,
divide_mean_variance
);
// Beta
auto
beta_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
3
});
auto
beta_with_broadcast
=
std
::
make_shared
<
op
::
Broadcast
>
(
beta_label
,
Shape
{
2
,
3
},
AxisSet
{
0
});
auto
beta_with_broadcast
=
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
beta_label
,
Shape
{
2
,
3
},
AxisSet
{
0
});
auto
add_beta
=
std
::
make_shared
<
op
::
Add
>
(
beta_with_broadcast
,
multiply_gamma
);
auto
add_beta
=
std
::
make_shared
<
ngraph
::
op
::
Add
>
(
beta_with_broadcast
,
multiply_gamma
);
// This completes fprop bn pattern
// Define a call back that needs to called once the DFG matches the pattern
...
...
@@ -322,21 +326,22 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_fprop_bn()
Shape
m_bn_variance_shape
{
pattern_map
[
variance_label
]
->
get_shape
()};
// get epsilon value
auto
eps_ptr
=
std
::
dynamic_pointer_cast
<
op
::
Constant
>
(
pattern_map
[
eps_label
]);
auto
eps_ptr
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Constant
>
(
pattern_map
[
eps_label
]);
if
(
!
eps_ptr
)
{
NGRAPH_DEBUG
<<
"Eps must be a constant"
;
return
false
;
}
double
epsilon
=
*
(
reinterpret_cast
<
const
double
*>
(
eps_ptr
->
get_data_ptr
()));
auto
bn_node
=
std
::
make_shared
<
op
::
BatchNormTraining
>
(
auto
bn_node
=
std
::
make_shared
<
ngraph
::
op
::
BatchNormTraining
>
(
epsilon
,
pattern_map
[
gamma_label
],
pattern_map
[
beta_label
],
pattern_map
[
input
]);
if
(
!
mkldnn_utils
::
can_use_mkldnn_batchnorm_fprop
(
bn_node
.
get
()))
{
return
false
;
}
auto
normalized_output
=
std
::
shared_ptr
<
Node
>
(
new
op
::
GetOutputElement
(
bn_node
,
0
));
auto
normalized_output
=
std
::
shared_ptr
<
Node
>
(
new
ngraph
::
op
::
GetOutputElement
(
bn_node
,
0
));
ngraph
::
replace_node
(
m
.
get_match_root
(),
normalized_output
);
return
true
;
...
...
@@ -407,16 +412,18 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_zero_padded_reshaped_conv(
{
auto
pad_input
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{});
auto
pad_value
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{});
auto
pad
=
std
::
make_shared
<
op
::
Pad
>
(
pad_input
,
pad_value
,
CoordinateDiff
{},
CoordinateDiff
{});
auto
pad
=
std
::
make_shared
<
ngraph
::
op
::
Pad
>
(
pad_input
,
pad_value
,
CoordinateDiff
{},
CoordinateDiff
{});
auto
pad_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
pad
,
nullptr
,
NodeVector
{
pad
});
auto
reshape
=
std
::
make_shared
<
op
::
Reshape
>
(
pad_label
,
AxisVector
{},
Shape
{
1
,
1
,
1
,
1
});
auto
reshape
=
std
::
make_shared
<
ngraph
::
op
::
Reshape
>
(
pad_label
,
AxisVector
{},
Shape
{
1
,
1
,
1
,
1
});
auto
reshape_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
reshape
,
nullptr
,
NodeVector
{
reshape
});
auto
conv_filter
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
1
,
1
,
1
,
1
});
auto
conv
=
std
::
make_shared
<
op
::
Convolution
>
(
reshape_label
,
auto
conv
=
std
::
make_shared
<
ngraph
::
op
::
Convolution
>
(
reshape_label
,
conv_filter
,
Strides
{
1
,
1
},
Strides
{
1
,
1
},
...
...
@@ -425,12 +432,15 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_zero_padded_reshaped_conv(
Strides
{
1
,
1
});
auto
conv_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
conv
,
nullptr
,
NodeVector
{
conv
});
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
[
pad_input
,
pad_value
,
pad_label
,
reshape_label
,
conv_filter
,
conv_label
](
pattern
::
Matcher
&
m
)
{
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
[
pad_input
,
pad_value
,
pad_label
,
reshape_label
,
conv_filter
,
conv_label
](
pattern
::
Matcher
&
m
)
{
auto
pattern_map
=
m
.
get_pattern_map
();
auto
pad_value_op
=
std
::
dynamic_pointer_cast
<
op
::
Constant
>
(
pattern_map
[
pad_value
]);
auto
pad_value_op
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Constant
>
(
pattern_map
[
pad_value
]);
if
(
!
pad_value_op
)
{
NGRAPH_DEBUG
<<
"Pad value must be a constant"
;
...
...
@@ -438,16 +448,16 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_zero_padded_reshaped_conv(
}
const
auto
&
matched_conv
=
std
::
static_pointer_cast
<
op
::
Convolution
>
(
pattern_map
[
conv_label
]);
const
auto
&
matched_pad
=
std
::
static_pointer_cast
<
op
::
Pad
>
(
pattern_map
[
pad_label
]);
std
::
static_pointer_cast
<
ngraph
::
op
::
Convolution
>
(
pattern_map
[
conv_label
]);
const
auto
&
matched_pad
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Pad
>
(
pattern_map
[
pad_label
]);
const
auto
&
matched_reshape
=
std
::
static_pointer_cast
<
op
::
Reshape
>
(
pattern_map
[
reshape_label
]);
std
::
static_pointer_cast
<
ngraph
::
op
::
Reshape
>
(
pattern_map
[
reshape_label
]);
const
auto
&
input_order
=
matched_reshape
->
get_input_order
();
auto
hoisted_reshape_output_shape
=
ngraph
::
apply_permutation
<
Shape
>
(
pattern_map
[
pad_input
]
->
get_shape
(),
input_order
);
auto
hoisted_reshape
=
std
::
make_shared
<
op
::
Reshape
>
(
auto
hoisted_reshape
=
std
::
make_shared
<
ngraph
::
op
::
Reshape
>
(
pattern_map
[
pad_input
],
input_order
,
Shape
(
hoisted_reshape_output_shape
.
begin
(),
hoisted_reshape_output_shape
.
end
()));
...
...
@@ -474,7 +484,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_zero_padded_reshaped_conv(
matched_pad
->
get_padding_above
().
at
(
input_order
[
3
]))};
auto
zero_padded_conv
=
std
::
make_shared
<
op
::
Convolution
>
(
hoisted_reshape
,
std
::
make_shared
<
ngraph
::
op
::
Convolution
>
(
hoisted_reshape
,
pattern_map
[
conv_filter
],
matched_conv
->
get_window_movement_strides
(),
matched_conv
->
get_window_dilation_strides
(),
...
...
@@ -494,13 +504,13 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_zero_padded_conv()
{
auto
pad_input
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
1
,
1
,
1
,
1
});
auto
pad_value
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{});
auto
pad
=
std
::
make_shared
<
op
::
Pad
>
(
auto
pad
=
std
::
make_shared
<
ngraph
::
op
::
Pad
>
(
pad_input
,
pad_value
,
CoordinateDiff
{
0
,
0
,
0
,
0
},
CoordinateDiff
{
0
,
0
,
0
,
0
});
auto
pad_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
pad
,
nullptr
,
NodeVector
{
pad
});
auto
conv_filter
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
1
,
1
,
1
,
1
});
auto
conv
=
std
::
make_shared
<
op
::
Convolution
>
(
pad_label
,
auto
conv
=
std
::
make_shared
<
ngraph
::
op
::
Convolution
>
(
pad_label
,
conv_filter
,
Strides
{
1
,
1
},
Strides
{
1
,
1
},
...
...
@@ -509,11 +519,14 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_zero_padded_conv()
Strides
{
1
,
1
});
auto
conv_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
conv
,
nullptr
,
NodeVector
{
conv
});
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
[
pad_input
,
pad_value
,
pad_label
,
conv_filter
,
conv_label
](
pattern
::
Matcher
&
m
)
{
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
[
pad_input
,
pad_value
,
pad_label
,
conv_filter
,
conv_label
](
pattern
::
Matcher
&
m
)
{
auto
pattern_map
=
m
.
get_pattern_map
();
auto
pad_value_op
=
std
::
dynamic_pointer_cast
<
op
::
Constant
>
(
pattern_map
[
pad_value
]);
auto
pad_value_op
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Constant
>
(
pattern_map
[
pad_value
]);
if
(
!
pad_value_op
)
{
NGRAPH_DEBUG
<<
"Pad value must be a constant"
;
...
...
@@ -521,8 +534,8 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_zero_padded_conv()
}
const
auto
&
matched_conv
=
std
::
static_pointer_cast
<
op
::
Convolution
>
(
pattern_map
[
conv_label
]);
const
auto
&
matched_pad
=
std
::
static_pointer_cast
<
op
::
Pad
>
(
pattern_map
[
pad_label
]);
std
::
static_pointer_cast
<
ngraph
::
op
::
Convolution
>
(
pattern_map
[
conv_label
]);
const
auto
&
matched_pad
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Pad
>
(
pattern_map
[
pad_label
]);
if
(
!
zero_padded_conv_consistency_check
(
m
.
get_match_root
(),
pad_value_op
,
...
...
@@ -544,7 +557,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_zero_padded_conv()
static_cast
<
CoordinateDiff
::
value_type
>
(
matched_pad
->
get_padding_above
().
at
(
3
))};
auto
zero_padded_conv
=
std
::
make_shared
<
op
::
Convolution
>
(
pattern_map
[
pad_input
],
std
::
make_shared
<
ngraph
::
op
::
Convolution
>
(
pattern_map
[
pad_input
],
pattern_map
[
conv_filter
],
matched_conv
->
get_window_movement_strides
(),
matched_conv
->
get_window_dilation_strides
(),
...
...
@@ -564,13 +577,13 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_zero_padded_conv_backprop_
{
auto
pad_input
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
1
,
1
,
1
,
1
});
auto
pad_value
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{});
auto
pad
=
std
::
make_shared
<
op
::
Pad
>
(
auto
pad
=
std
::
make_shared
<
ngraph
::
op
::
Pad
>
(
pad_input
,
pad_value
,
CoordinateDiff
{
0
,
0
,
0
,
0
},
CoordinateDiff
{
0
,
0
,
0
,
0
});
auto
pad_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
pad
,
nullptr
,
NodeVector
{
pad
});
auto
output_delta
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
1
,
1
,
1
,
1
});
auto
conv
=
std
::
make_shared
<
op
::
ConvolutionBackpropFilters
>
(
pad_label
,
auto
conv
=
std
::
make_shared
<
ngraph
::
op
::
ConvolutionBackpropFilters
>
(
pad_label
,
Shape
{
1
,
1
,
3
,
3
},
output_delta
,
Strides
{
1
,
1
},
...
...
@@ -580,20 +593,23 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_zero_padded_conv_backprop_
Strides
{
1
,
1
});
auto
conv_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
conv
,
nullptr
,
NodeVector
{
conv
});
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
[
pad_input
,
pad_value
,
pad_label
,
output_delta
,
conv_label
](
pattern
::
Matcher
&
m
)
{
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
[
pad_input
,
pad_value
,
pad_label
,
output_delta
,
conv_label
](
pattern
::
Matcher
&
m
)
{
auto
pattern_map
=
m
.
get_pattern_map
();
auto
pad_value_op
=
std
::
dynamic_pointer_cast
<
op
::
Constant
>
(
pattern_map
[
pad_value
]);
auto
pad_value_op
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Constant
>
(
pattern_map
[
pad_value
]);
if
(
!
pad_value_op
)
{
NGRAPH_DEBUG
<<
"Pad value must be a constant"
;
return
false
;
}
const
auto
&
matched_conv
=
std
::
static_pointer_cast
<
op
::
ConvolutionBackpropFilters
>
(
pattern_map
[
conv_label
]);
const
auto
&
matched_pad
=
std
::
static_pointer_cast
<
op
::
Pad
>
(
pattern_map
[
pad_label
]);
const
auto
&
matched_conv
=
std
::
static_pointer_cast
<
ngraph
::
op
::
ConvolutionBackpropFilters
>
(
pattern_map
[
conv_label
]);
const
auto
&
matched_pad
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Pad
>
(
pattern_map
[
pad_label
]);
if
(
!
zero_padded_conv_consistency_check
(
m
.
get_match_root
(),
pad_value_op
,
...
...
@@ -615,7 +631,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_zero_padded_conv_backprop_
static_cast
<
CoordinateDiff
::
value_type
>
(
matched_pad
->
get_padding_above
().
at
(
3
))};
auto
zero_padded_conv_backprop_filters
=
std
::
make_shared
<
op
::
ConvolutionBackpropFilters
>
(
std
::
make_shared
<
ngraph
::
op
::
ConvolutionBackpropFilters
>
(
pattern_map
[
pad_input
],
matched_conv
->
get_filters_shape
(),
pattern_map
[
output_delta
],
...
...
@@ -640,9 +656,9 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias()
auto
filters
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape
);
auto
pbias
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{});
auto
pbroadcast
=
std
::
make_shared
<
op
::
Broadcast
>
(
pbias
,
shape
,
AxisSet
{
0
,
1
,
2
,
3
});
auto
pbroadcast
=
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
pbias
,
shape
,
AxisSet
{
0
,
1
,
2
,
3
});
auto
pconv1
=
std
::
make_shared
<
op
::
Convolution
>
(
data_batch
,
auto
pconv1
=
std
::
make_shared
<
ngraph
::
op
::
Convolution
>
(
data_batch
,
filters
,
Strides
{
1
,
1
},
Strides
{
1
,
1
},
...
...
@@ -656,9 +672,10 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias()
<<
m
.
get_match_root
()
->
get_name
();
auto
pattern_map
=
m
.
get_pattern_map
();
auto
conv
=
std
::
static_pointer_cast
<
op
::
Convolution
>
(
m
.
get_match_root
()
->
get_argument
(
0
));
auto
conv
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Convolution
>
(
m
.
get_match_root
()
->
get_argument
(
0
));
if
(
!
runtime
::
cpu
::
mkldnn_utils
::
can_use_mkldnn_conv
<
op
::
Convolution
>
(
conv
.
get
()))
if
(
!
runtime
::
cpu
::
mkldnn_utils
::
can_use_mkldnn_conv
<
ngraph
::
op
::
Convolution
>
(
conv
.
get
()))
{
NGRAPH_DEBUG
<<
"Convolution not supported by MKLDNN"
;
return
false
;
...
...
@@ -671,14 +688,15 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias()
NGRAPH_DEBUG
<<
"mpattern = "
<<
m
.
get_match_root
()
->
get_name
()
<<
"conv_bias bias shape != 1, requires reshape to match filter count."
;
auto
order
=
ngraph
::
get_default_order
(
bias_shape
);
auto
bias_reshape
=
std
::
make_shared
<
op
::
Reshape
>
(
bias
,
order
,
Shape
{
conv
->
get_input_shape
(
1
)[
0
]});
auto
conv_bias
=
std
::
shared_ptr
<
Node
>
(
new
op
::
ConvolutionBias
(
conv
,
bias_reshape
));
auto
bias_reshape
=
std
::
make_shared
<
ngraph
::
op
::
Reshape
>
(
bias
,
order
,
Shape
{
conv
->
get_input_shape
(
1
)[
0
]});
auto
conv_bias
=
std
::
shared_ptr
<
Node
>
(
new
ngraph
::
op
::
ConvolutionBias
(
conv
,
bias_reshape
));
ngraph
::
replace_node
(
m
.
get_match_root
(),
conv_bias
);
}
else
{
auto
conv_bias
=
std
::
shared_ptr
<
Node
>
(
new
op
::
ConvolutionBias
(
conv
,
bias
));
auto
conv_bias
=
std
::
shared_ptr
<
Node
>
(
new
ngraph
::
op
::
ConvolutionBias
(
conv
,
bias
));
ngraph
::
replace_node
(
m
.
get_match_root
(),
conv_bias
);
}
return
true
;
...
...
@@ -694,7 +712,8 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_bprop()
Shape
shape
{
2
,
2
,
1
,
1
};
auto
data_batch
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape
);
auto
delta
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape
);
auto
conv_bprop_filter
=
std
::
make_shared
<
op
::
ConvolutionBackpropFilters
>
(
data_batch
,
auto
conv_bprop_filter
=
std
::
make_shared
<
ngraph
::
op
::
ConvolutionBackpropFilters
>
(
data_batch
,
shape
,
delta
,
Strides
{
1
,
1
},
...
...
@@ -709,7 +728,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_bprop()
auto
pattern_map
=
m
.
get_pattern_map
();
auto
conv_bprop
=
std
::
static_pointer_cast
<
op
::
ConvolutionBackpropFilters
>
(
m
.
get_match_root
());
std
::
static_pointer_cast
<
ngraph
::
op
::
ConvolutionBackpropFilters
>
(
m
.
get_match_root
());
if
(
conv_bprop
->
get_input_shape
(
0
).
size
()
==
4
&&
conv_bprop
->
get_input_shape
(
1
).
size
()
==
4
&&
...
...
@@ -717,9 +736,9 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_bprop()
{
for
(
auto
delta_user
:
pattern_map
[
delta
]
->
get_users
())
{
if
(
std
::
dynamic_pointer_cast
<
op
::
Sum
>
(
delta_user
))
if
(
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Sum
>
(
delta_user
))
{
auto
bias
=
std
::
dynamic_pointer_cast
<
op
::
Sum
>
(
delta_user
);
auto
bias
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Sum
>
(
delta_user
);
auto
bias_shape
=
bias
->
get_shape
();
bool
flag
=
false
;
if
(
bias_shape
.
size
()
>
1
)
...
...
@@ -728,12 +747,13 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_bprop()
<<
"mpattern = "
<<
m
.
get_match_root
()
->
get_name
()
<<
"conv_bias bias shape != 1, requires reshape to match filter count."
;
auto
order
=
ngraph
::
get_default_order
(
bias_shape
);
auto
bias_reshape
=
std
::
make_shared
<
op
::
Reshape
>
(
auto
bias_reshape
=
std
::
make_shared
<
ngraph
::
op
::
Reshape
>
(
bias
,
order
,
Shape
{
conv_bprop
->
get_filters_shape
()[
0
]});
bias_shape
=
bias_reshape
->
get_shape
();
flag
=
true
;
}
auto
conv_bias_bprop
=
std
::
make_shared
<
op
::
ConvolutionBiasBackpropFiltersBias
>
(
auto
conv_bias_bprop
=
std
::
make_shared
<
ngraph
::
op
::
ConvolutionBiasBackpropFiltersBias
>
(
pattern_map
[
data_batch
],
conv_bprop
->
get_filters_shape
(),
bias_shape
,
...
...
@@ -743,8 +763,8 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_bprop()
conv_bprop
->
get_padding_below_forward
(),
conv_bprop
->
get_padding_above_forward
(),
conv_bprop
->
get_data_dilation_strides_forward
());
auto
goe1
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
conv_bias_bprop
,
0
);
auto
goe2
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
conv_bias_bprop
,
1
);
auto
goe1
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
conv_bias_bprop
,
0
);
auto
goe2
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
conv_bias_bprop
,
1
);
NGRAPH_DEBUG
<<
"Replacing "
<<
m
.
get_match_root
()
->
get_name
()
<<
"with ConvolutionBiasBackpropFiltersBias"
;
ngraph
::
replace_node
(
m
.
get_match_root
(),
goe1
);
...
...
@@ -752,7 +772,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_bprop()
"ConvolutionBiasBackpropFiltersBias"
;
if
(
flag
)
{
auto
goe2_reshape
=
std
::
make_shared
<
op
::
Reshape
>
(
auto
goe2_reshape
=
std
::
make_shared
<
ngraph
::
op
::
Reshape
>
(
goe2
,
AxisVector
{
0
},
delta_user
->
get_shape
());
ngraph
::
replace_node
(
delta_user
,
goe2_reshape
);
}
...
...
@@ -783,16 +803,16 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_batch_norm_relu()
auto
beta_shape
=
Shape
{
2
};
auto
beta
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
beta_shape
);
double
eps
=
0.001
;
auto
bn
=
std
::
make_shared
<
op
::
BatchNormTraining
>
(
eps
,
gamma
,
beta
,
input
);
auto
goe
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
bn
,
0
);
auto
prelu
=
std
::
make_shared
<
op
::
Relu
>
(
goe
);
auto
bn
=
std
::
make_shared
<
ngraph
::
op
::
BatchNormTraining
>
(
eps
,
gamma
,
beta
,
input
);
auto
goe
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
bn
,
0
);
auto
prelu
=
std
::
make_shared
<
ngraph
::
op
::
Relu
>
(
goe
);
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
[
input
,
gamma
,
beta
](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In callback for construct_batch_norm_relu against node = "
<<
m
.
get_match_root
()
->
get_name
();
auto
pattern_map
=
m
.
get_pattern_map
();
auto
m_bn
=
std
::
static_pointer_cast
<
op
::
BatchNormTraining
>
(
auto
m_bn
=
std
::
static_pointer_cast
<
ngraph
::
op
::
BatchNormTraining
>
(
m
.
get_match_root
()
->
get_argument
(
0
)
->
get_inputs
().
at
(
0
).
get_output
().
get_node
());
if
(
!
mkldnn_utils
::
can_use_mkldnn_batchnorm_fprop
(
m_bn
.
get
()))
...
...
@@ -802,7 +822,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_batch_norm_relu()
std
::
vector
<
std
::
shared_ptr
<
Node
>>
mgoes
(
m_bn
->
get_outputs
().
size
());
for
(
auto
bn_in
:
m_bn
->
get_output_inputs
(
0
))
{
auto
mgoe
=
std
::
dynamic_pointer_cast
<
op
::
GetOutputElement
>
(
bn_in
->
get_node
());
auto
mgoe
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
GetOutputElement
>
(
bn_in
->
get_node
());
NGRAPH_ASSERT
(
mgoe
);
mgoes
[
mgoe
->
get_n
()]
=
mgoe
;
}
...
...
@@ -815,12 +835,12 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_batch_norm_relu()
mgoes
[
0
]
=
m
.
get_match_root
();
// replace relu instead of its GetOutputElement
auto
bn_relu
=
std
::
make_shared
<
op
::
BatchNormTrainingRelu
>
(
auto
bn_relu
=
std
::
make_shared
<
ngraph
::
op
::
BatchNormTrainingRelu
>
(
m_bn
->
get_eps_value
(),
pattern_map
[
gamma
],
pattern_map
[
beta
],
pattern_map
[
input
]);
auto
bn_relu_output
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
bn_relu
,
0
);
auto
bn_relu_mean
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
bn_relu
,
1
);
auto
bn_relu_var
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
bn_relu
,
2
);
auto
bn_relu_output
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
bn_relu
,
0
);
auto
bn_relu_mean
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
bn_relu
,
1
);
auto
bn_relu_var
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
bn_relu
,
2
);
std
::
shared_ptr
<
Node
>
new_nodes
[]
=
{
bn_relu_output
,
bn_relu_mean
,
bn_relu_var
};
...
...
@@ -848,12 +868,12 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_batch_norm_relu_global_sta
auto
beta_shape
=
Shape
{
2
};
auto
beta
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
beta_shape
);
auto
bn_pred
=
[](
std
::
shared_ptr
<
Node
>
node
)
{
return
pattern
::
has_class
<
op
::
BatchNormInference
>
()(
node
)
||
pattern
::
has_class
<
op
::
BatchNormTraining
>
()(
node
);
return
pattern
::
has_class
<
ngraph
::
op
::
BatchNormInference
>
()(
node
)
||
pattern
::
has_class
<
ngraph
::
op
::
BatchNormTraining
>
()(
node
);
};
auto
bn
=
std
::
make_shared
<
pattern
::
op
::
Any
>
(
input
,
bn_pred
,
NodeVector
{
gamma
,
beta
,
input
,
mean
,
var
});
auto
prelu
=
std
::
make_shared
<
op
::
Relu
>
(
bn
);
auto
prelu
=
std
::
make_shared
<
ngraph
::
op
::
Relu
>
(
bn
);
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
[
input
,
mean
,
var
,
gamma
,
beta
](
pattern
::
Matcher
&
m
)
{
...
...
@@ -870,13 +890,14 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_batch_norm_relu_global_sta
}
std
::
shared_ptr
<
Node
>
bn_relu
;
if
(
auto
bn_inference
=
std
::
dynamic_pointer_cast
<
op
::
BatchNormInference
>
(
bn_match
))
if
(
auto
bn_inference
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
BatchNormInference
>
(
bn_match
))
{
if
(
!
mkldnn_utils
::
can_use_mkldnn_batchnorm_fprop
(
bn_inference
.
get
()))
{
return
false
;
}
bn_relu
=
std
::
make_shared
<
op
::
BatchNormInferenceRelu
>
(
bn_inference
->
get_eps_value
(),
bn_relu
=
std
::
make_shared
<
ngraph
::
op
::
BatchNormInferenceRelu
>
(
bn_inference
->
get_eps_value
(),
pattern_map
[
gamma
],
pattern_map
[
beta
],
pattern_map
[
input
],
...
...
@@ -904,7 +925,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_relu()
auto
data_batch
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape
);
auto
filters
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape
);
auto
pconv
=
std
::
make_shared
<
op
::
Convolution
>
(
data_batch
,
auto
pconv
=
std
::
make_shared
<
ngraph
::
op
::
Convolution
>
(
data_batch
,
filters
,
Strides
{
1
,
1
},
Strides
{
1
,
1
},
...
...
@@ -912,15 +933,16 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_relu()
CoordinateDiff
{
0
,
0
},
Strides
{
1
,
1
});
auto
prelu
=
std
::
make_shared
<
op
::
Relu
>
(
pconv
);
auto
prelu
=
std
::
make_shared
<
ngraph
::
op
::
Relu
>
(
pconv
);
pattern
::
graph_rewrite_callback
callback
=
[](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In a callback for construct_conv_relu against "
<<
m
.
get_match_root
()
->
get_name
();
auto
conv
=
std
::
static_pointer_cast
<
op
::
Convolution
>
(
m
.
get_match_root
()
->
get_argument
(
0
));
auto
conv
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Convolution
>
(
m
.
get_match_root
()
->
get_argument
(
0
));
if
(
!
runtime
::
cpu
::
mkldnn_utils
::
can_use_mkldnn_conv
<
op
::
Convolution
>
(
conv
.
get
()))
if
(
!
runtime
::
cpu
::
mkldnn_utils
::
can_use_mkldnn_conv
<
ngraph
::
op
::
Convolution
>
(
conv
.
get
()))
{
NGRAPH_DEBUG
<<
"Convolution not supported by MKLDNN"
;
return
false
;
...
...
@@ -932,7 +954,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_relu()
return
false
;
}
auto
conv_relu
=
std
::
shared_ptr
<
Node
>
(
new
op
::
ConvolutionRelu
(
conv
));
auto
conv_relu
=
std
::
shared_ptr
<
Node
>
(
new
ngraph
::
op
::
ConvolutionRelu
(
conv
));
ngraph
::
replace_node
(
m
.
get_match_root
(),
conv_relu
);
return
true
;
};
...
...
@@ -948,7 +970,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_relu()
auto
filters
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape
);
auto
bias
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
shape
[
0
]});
auto
conv_bias
=
std
::
make_shared
<
op
::
ConvolutionBias
>
(
data_batch
,
auto
conv_bias
=
std
::
make_shared
<
ngraph
::
op
::
ConvolutionBias
>
(
data_batch
,
filters
,
bias
,
Strides
{
1
,
1
},
...
...
@@ -957,14 +979,14 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_relu()
CoordinateDiff
{
0
,
0
},
Strides
{
1
,
1
});
auto
prelu
=
std
::
make_shared
<
op
::
Relu
>
(
conv_bias
);
auto
prelu
=
std
::
make_shared
<
ngraph
::
op
::
Relu
>
(
conv_bias
);
pattern
::
graph_rewrite_callback
callback
=
[](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In a callback for construct_conv_relu against "
<<
m
.
get_match_root
()
->
get_name
();
auto
conv
=
std
::
static_pointer_cast
<
op
::
ConvolutionBias
>
(
m
.
get_match_root
()
->
get_argument
(
0
));
auto
conv
=
std
::
static_pointer_cast
<
ngraph
::
op
::
ConvolutionBias
>
(
m
.
get_match_root
()
->
get_argument
(
0
));
if
(
conv
->
get_users
().
size
()
>
1
)
{
...
...
@@ -974,7 +996,8 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_relu()
// ConvolutionBias created only if it can run with MKLDNN.
// No further checks needed.
auto
conv_relu
=
std
::
make_shared
<
op
::
ConvolutionBias
>
(
conv
->
get_argument
(
0
),
auto
conv_relu
=
std
::
make_shared
<
ngraph
::
op
::
ConvolutionBias
>
(
conv
->
get_argument
(
0
),
conv
->
get_argument
(
1
),
conv
->
get_argument
(
2
),
conv
->
get_window_movement_strides
(),
...
...
@@ -997,7 +1020,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_add()
auto
data_batch
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape
);
auto
filters
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape
);
auto
pconv
=
std
::
make_shared
<
op
::
Convolution
>
(
data_batch
,
auto
pconv
=
std
::
make_shared
<
ngraph
::
op
::
Convolution
>
(
data_batch
,
filters
,
Strides
{
1
,
1
},
Strides
{
1
,
1
},
...
...
@@ -1005,7 +1028,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_add()
CoordinateDiff
{
0
,
0
},
Strides
{
1
,
1
});
auto
add_input
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
pconv
->
get_shape
());
auto
padd
=
std
::
make_shared
<
op
::
Add
>
(
add_input
,
pconv
);
auto
padd
=
std
::
make_shared
<
ngraph
::
op
::
Add
>
(
add_input
,
pconv
);
pattern
::
graph_rewrite_callback
callback
=
[
data_batch
,
filters
](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In a callback for construct_conv_add against "
...
...
@@ -1013,16 +1036,16 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_add()
auto
add_m
=
m
.
get_match_root
();
auto
pattern_map
=
m
.
get_pattern_map
();
auto
conv_m
=
std
::
dynamic_pointer_cast
<
op
::
Convolution
>
(
add_m
->
get_argument
(
1
));
auto
conv_m
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Convolution
>
(
add_m
->
get_argument
(
1
));
auto
inplace_input
=
add_m
->
get_argument
(
0
);
if
(
!
conv_m
)
{
conv_m
=
std
::
dynamic_pointer_cast
<
op
::
Convolution
>
(
add_m
->
get_argument
(
0
));
conv_m
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Convolution
>
(
add_m
->
get_argument
(
0
));
inplace_input
=
add_m
->
get_argument
(
1
);
}
if
(
!
runtime
::
cpu
::
mkldnn_utils
::
can_use_mkldnn_conv
<
op
::
Convolution
>
(
conv_m
.
get
()))
if
(
!
runtime
::
cpu
::
mkldnn_utils
::
can_use_mkldnn_conv
<
ngraph
::
op
::
Convolution
>
(
conv_m
.
get
()))
{
NGRAPH_DEBUG
<<
"Convolution not supported by MKLDNN"
;
return
false
;
...
...
@@ -1048,7 +1071,8 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_add()
return
false
;
}
auto
conv_add
=
std
::
shared_ptr
<
Node
>
(
new
op
::
ConvolutionAdd
(
conv_m
,
inplace_input
,
false
));
auto
conv_add
=
std
::
shared_ptr
<
Node
>
(
new
ngraph
::
op
::
ConvolutionAdd
(
conv_m
,
inplace_input
,
false
));
ngraph
::
replace_node
(
m
.
get_match_root
(),
conv_add
);
return
true
;
};
...
...
@@ -1064,7 +1088,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_add_relu()
auto
filters
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape
);
auto
add_input
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape
);
auto
pconv
=
std
::
make_shared
<
op
::
ConvolutionAdd
>
(
data_batch
,
auto
pconv
=
std
::
make_shared
<
ngraph
::
op
::
ConvolutionAdd
>
(
data_batch
,
filters
,
add_input
,
Strides
{
1
,
1
},
...
...
@@ -1073,14 +1097,14 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_add_relu()
CoordinateDiff
{
0
,
0
},
Strides
{
1
,
1
},
false
);
auto
prelu
=
std
::
make_shared
<
op
::
Relu
>
(
pconv
);
auto
prelu
=
std
::
make_shared
<
ngraph
::
op
::
Relu
>
(
pconv
);
pattern
::
graph_rewrite_callback
callback
=
[](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In a callback for construct_conv_add_relu against "
<<
m
.
get_match_root
()
->
get_name
();
auto
conv_m
=
std
::
static_pointer_cast
<
op
::
ConvolutionAdd
>
(
m
.
get_match_root
()
->
get_argument
(
0
));
auto
conv_m
=
std
::
static_pointer_cast
<
ngraph
::
op
::
ConvolutionAdd
>
(
m
.
get_match_root
()
->
get_argument
(
0
));
if
(
conv_m
->
get_users
().
size
()
>
1
)
{
NGRAPH_DEBUG
<<
"Convolution has more than one user"
;
...
...
@@ -1089,7 +1113,8 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_add_relu()
// ConvolutionAdd created only if it can run with MKLDNN.
// No further checks needed.
auto
conv_n
=
std
::
make_shared
<
op
::
ConvolutionAdd
>
(
conv_m
->
get_argument
(
0
),
auto
conv_n
=
std
::
make_shared
<
ngraph
::
op
::
ConvolutionAdd
>
(
conv_m
->
get_argument
(
0
),
conv_m
->
get_argument
(
1
),
conv_m
->
get_argument
(
2
),
conv_m
->
get_window_movement_strides
(),
...
...
@@ -1113,7 +1138,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_add()
auto
filters
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape
);
auto
bias
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
shape
[
0
]});
auto
pconv
=
std
::
make_shared
<
op
::
ConvolutionBias
>
(
data_batch
,
auto
pconv
=
std
::
make_shared
<
ngraph
::
op
::
ConvolutionBias
>
(
data_batch
,
filters
,
bias
,
Strides
{
1
,
1
},
...
...
@@ -1122,7 +1147,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_add()
CoordinateDiff
{
0
,
0
},
Strides
{
1
,
1
});
auto
add_input
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
pconv
->
get_shape
());
auto
padd
=
std
::
make_shared
<
op
::
Add
>
(
add_input
,
pconv
);
auto
padd
=
std
::
make_shared
<
ngraph
::
op
::
Add
>
(
add_input
,
pconv
);
pattern
::
graph_rewrite_callback
callback
=
[
data_batch
,
filters
](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In a callback for construct_conv_sum against "
...
...
@@ -1130,16 +1155,18 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_add()
auto
add_m
=
m
.
get_match_root
();
auto
pattern_map
=
m
.
get_pattern_map
();
auto
conv_m
=
std
::
dynamic_pointer_cast
<
op
::
ConvolutionBias
>
(
add_m
->
get_argument
(
1
));
auto
conv_m
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
ConvolutionBias
>
(
add_m
->
get_argument
(
1
));
auto
inplace_input
=
add_m
->
get_argument
(
0
);
if
(
!
conv_m
)
{
conv_m
=
std
::
dynamic_pointer_cast
<
op
::
ConvolutionBias
>
(
add_m
->
get_argument
(
0
));
conv_m
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
ConvolutionBias
>
(
add_m
->
get_argument
(
0
));
inplace_input
=
add_m
->
get_argument
(
1
);
}
if
(
!
runtime
::
cpu
::
mkldnn_utils
::
can_use_mkldnn_conv
<
op
::
ConvolutionBias
>
(
conv_m
.
get
()))
if
(
!
runtime
::
cpu
::
mkldnn_utils
::
can_use_mkldnn_conv
<
ngraph
::
op
::
ConvolutionBias
>
(
conv_m
.
get
()))
{
NGRAPH_DEBUG
<<
"Convolution not supported by MKLDNN"
;
return
false
;
...
...
@@ -1166,7 +1193,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_add()
}
auto
conv_add
=
std
::
shared_ptr
<
Node
>
(
new
op
::
ConvolutionBiasAdd
(
conv_m
,
inplace_input
,
false
));
std
::
shared_ptr
<
Node
>
(
new
ngraph
::
op
::
ConvolutionBiasAdd
(
conv_m
,
inplace_input
,
false
));
ngraph
::
replace_node
(
m
.
get_match_root
(),
conv_add
);
return
true
;
};
...
...
@@ -1183,7 +1210,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_add_relu()
auto
bias
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
shape
[
0
]});
auto
add_input
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape
);
auto
pconv
=
std
::
make_shared
<
op
::
ConvolutionBiasAdd
>
(
data_batch
,
auto
pconv
=
std
::
make_shared
<
ngraph
::
op
::
ConvolutionBiasAdd
>
(
data_batch
,
filters
,
bias
,
add_input
,
...
...
@@ -1193,14 +1220,14 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_add_relu()
CoordinateDiff
{
0
,
0
},
Strides
{
1
,
1
},
false
);
auto
prelu
=
std
::
make_shared
<
op
::
Relu
>
(
pconv
);
auto
prelu
=
std
::
make_shared
<
ngraph
::
op
::
Relu
>
(
pconv
);
pattern
::
graph_rewrite_callback
callback
=
[](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In a callback for construct_conv_sum against "
<<
m
.
get_match_root
()
->
get_name
();
auto
conv_m
=
std
::
static_pointer_cast
<
op
::
ConvolutionBiasAdd
>
(
m
.
get_match_root
()
->
get_argument
(
0
));
auto
conv_m
=
std
::
static_pointer_cast
<
ngraph
::
op
::
ConvolutionBiasAdd
>
(
m
.
get_match_root
()
->
get_argument
(
0
));
if
(
conv_m
->
get_users
().
size
()
>
1
)
{
NGRAPH_DEBUG
<<
"Convolution has more than one user"
;
...
...
@@ -1220,7 +1247,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_add_relu()
// ConvolutionBiasAdd created only if it can run with MKLDNN.
// No further checks needed.
auto
conv_n
=
std
::
make_shared
<
op
::
ConvolutionBiasAdd
>
(
conv_m
->
get_argument
(
0
),
std
::
make_shared
<
ngraph
::
op
::
ConvolutionBiasAdd
>
(
conv_m
->
get_argument
(
0
),
conv_m
->
get_argument
(
1
),
conv_m
->
get_argument
(
2
),
conv_m
->
get_argument
(
3
),
...
...
@@ -1242,19 +1269,19 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_sigmoid_multiply()
{
// Construct predicate to match sigmoid and tanh
auto
sigmoid_pred
=
[](
std
::
shared_ptr
<
Node
>
n
)
{
return
(
std
::
dynamic_pointer_cast
<
op
::
Sigmoid
>
(
n
)
!=
nullptr
)
||
(
std
::
dynamic_pointer_cast
<
op
::
Tanh
>
(
n
)
!=
nullptr
);
return
(
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Sigmoid
>
(
n
)
!=
nullptr
)
||
(
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Tanh
>
(
n
)
!=
nullptr
);
};
// Construct predicate to match other valid nodes
auto
other_pred
=
[](
std
::
shared_ptr
<
Node
>
n
)
{
return
(
std
::
dynamic_pointer_cast
<
op
::
Sigmoid
>
(
n
)
!=
nullptr
)
||
(
std
::
dynamic_pointer_cast
<
op
::
Tanh
>
(
n
)
!=
nullptr
)
||
(
std
::
dynamic_pointer_cast
<
op
::
Add
>
(
n
)
!=
nullptr
)
||
(
std
::
dynamic_pointer_cast
<
op
::
Broadcast
>
(
n
)
!=
nullptr
);
return
(
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Sigmoid
>
(
n
)
!=
nullptr
)
||
(
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Tanh
>
(
n
)
!=
nullptr
)
||
(
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Add
>
(
n
)
!=
nullptr
)
||
(
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Broadcast
>
(
n
)
!=
nullptr
);
};
auto
sigmoid_0
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
1
,
1
},
sigmoid_pred
);
auto
sigmoid_1
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
1
,
1
},
other_pred
);
auto
elem_mul
=
std
::
make_shared
<
op
::
Multiply
>
(
sigmoid_0
,
sigmoid_1
);
auto
elem_mul
=
std
::
make_shared
<
ngraph
::
op
::
Multiply
>
(
sigmoid_0
,
sigmoid_1
);
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
[
sigmoid_0
,
sigmoid_1
](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In a callback for construct_sigmoid_multiply pattern against "
...
...
@@ -1268,7 +1295,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_sigmoid_multiply()
return
false
;
}
using
FunctionType
=
op
::
SigmoidMultiply
::
FunctionType
;
using
FunctionType
=
ngraph
::
op
::
SigmoidMultiply
::
FunctionType
;
const
int
max_inputs
{
2
};
std
::
array
<
std
::
shared_ptr
<
ngraph
::
Node
>
,
max_inputs
>
match_nodes
{
{
pattern_map
[
sigmoid_0
],
pattern_map
[
sigmoid_1
]}};
...
...
@@ -1276,7 +1303,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_sigmoid_multiply()
std
::
array
<
FunctionType
,
max_inputs
>
input_type
;
for
(
int
i
=
0
;
i
<
max_inputs
;
++
i
)
{
input_type
[
i
]
=
op
::
SigmoidMultiply
::
identify_node_type
(
match_nodes
[
i
]);
input_type
[
i
]
=
ngraph
::
op
::
SigmoidMultiply
::
identify_node_type
(
match_nodes
[
i
]);
if
(
input_type
[
i
]
!=
FunctionType
::
Identity
)
{
if
(
match_nodes
[
i
]
->
get_users
().
size
()
>
1
)
...
...
@@ -1291,7 +1318,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_sigmoid_multiply()
input_nodes
[
i
]
=
match_nodes
[
i
];
}
}
auto
sigmoid_mul_node
=
std
::
make_shared
<
op
::
SigmoidMultiply
>
(
auto
sigmoid_mul_node
=
std
::
make_shared
<
ngraph
::
op
::
SigmoidMultiply
>
(
input_nodes
[
0
],
input_nodes
[
1
],
input_type
[
0
],
input_type
[
1
]);
ngraph
::
replace_node
(
m
.
get_match_root
(),
sigmoid_mul_node
);
return
true
;
...
...
@@ -1304,21 +1331,21 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_sigmoid_multiply()
void
ngraph
::
runtime
::
cpu
::
pass
::
CPUFusion
::
construct_leaky_relu
()
{
auto
input
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{});
auto
iconst1
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
1
});
auto
iconst1
=
ngraph
::
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
1
});
auto
alpha
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
iconst1
);
auto
broadcast_pred
=
[](
std
::
shared_ptr
<
Node
>
n
)
{
return
(
std
::
dynamic_pointer_cast
<
op
::
Broadcast
>
(
n
)
!=
nullptr
);
return
(
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Broadcast
>
(
n
)
!=
nullptr
);
};
auto
skip_broadcast
=
std
::
make_shared
<
pattern
::
op
::
Skip
>
(
alpha
,
broadcast_pred
);
auto
leaky_relu
=
std
::
make_shared
<
op
::
Maximum
>
(
input
,
std
::
make_shared
<
op
::
Multiply
>
(
input
,
skip_broadcast
));
auto
leaky_relu
=
std
::
make_shared
<
ngraph
::
op
::
Maximum
>
(
input
,
std
::
make_shared
<
ngraph
::
op
::
Multiply
>
(
input
,
skip_broadcast
));
pattern
::
graph_rewrite_callback
callback
=
[
input
,
alpha
](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In a callback for construct_leaky_relu against "
<<
m
.
get_match_root
()
->
get_name
();
auto
pattern_map
=
m
.
get_pattern_map
();
if
(
!
std
::
dynamic_pointer_cast
<
op
::
Constant
>
(
pattern_map
[
alpha
]))
if
(
!
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Constant
>
(
pattern_map
[
alpha
]))
{
NGRAPH_DEBUG
<<
"alpha must be constant for leaky relu"
;
return
false
;
...
...
@@ -1330,7 +1357,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_leaky_relu()
return
false
;
}
auto
alpha_const_op
=
std
::
static_pointer_cast
<
op
::
Constant
>
(
pattern_map
[
alpha
]);
auto
alpha_const_op
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Constant
>
(
pattern_map
[
alpha
]);
auto
alpha_vec
=
alpha_const_op
->
get_vector
<
float
>
();
for
(
auto
val
:
alpha_vec
)
{
...
...
@@ -1350,7 +1377,8 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_leaky_relu()
return
false
;
}
auto
cg
=
std
::
shared_ptr
<
Node
>
(
new
op
::
LeakyRelu
(
pattern_map
[
input
],
alpha_vec
[
0
]));
auto
cg
=
std
::
shared_ptr
<
Node
>
(
new
ngraph
::
op
::
LeakyRelu
(
pattern_map
[
input
],
alpha_vec
[
0
]));
ngraph
::
replace_node
(
m
.
get_match_root
(),
cg
);
return
true
;
};
...
...
@@ -1361,14 +1389,14 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_leaky_relu()
void
ngraph
::
runtime
::
cpu
::
pass
::
CPUFusion
::
construct_bounded_relu
()
{
auto
relu_input
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{});
auto
relu
=
std
::
make_shared
<
op
::
Relu
>
(
relu_input
);
auto
iconst1
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
1
});
auto
relu
=
std
::
make_shared
<
ngraph
::
op
::
Relu
>
(
relu_input
);
auto
iconst1
=
ngraph
::
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
1
});
auto
alpha
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
iconst1
);
auto
broadcast_pred
=
[](
std
::
shared_ptr
<
Node
>
n
)
{
return
(
std
::
dynamic_pointer_cast
<
op
::
Broadcast
>
(
n
)
!=
nullptr
);
return
(
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Broadcast
>
(
n
)
!=
nullptr
);
};
auto
skip_broadcast
=
std
::
make_shared
<
pattern
::
op
::
Skip
>
(
alpha
,
broadcast_pred
);
auto
min
=
std
::
make_shared
<
op
::
Minimum
>
(
relu
,
skip_broadcast
);
auto
min
=
std
::
make_shared
<
ngraph
::
op
::
Minimum
>
(
relu
,
skip_broadcast
);
pattern
::
graph_rewrite_callback
callback
=
[
relu_input
,
alpha
](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In a callback for construct_bounded_relu against "
...
...
@@ -1381,7 +1409,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_bounded_relu()
return
false
;
}
auto
pattern_map
=
m
.
get_pattern_map
();
if
(
!
std
::
dynamic_pointer_cast
<
op
::
Constant
>
(
pattern_map
[
alpha
]))
if
(
!
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Constant
>
(
pattern_map
[
alpha
]))
{
NGRAPH_DEBUG
<<
"alpha must be constant for bounded relu"
;
return
false
;
...
...
@@ -1397,12 +1425,13 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_bounded_relu()
return
false
;
}
auto
alpha_const_op
=
std
::
static_pointer_cast
<
op
::
Constant
>
(
pattern_map
[
alpha
]);
auto
alpha_const_op
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Constant
>
(
pattern_map
[
alpha
]);
float
alpha_val
=
*
(
static_cast
<
float
const
*>
(
alpha_const_op
->
get_data_ptr
()));
NGRAPH_DEBUG
<<
"relu_input: "
<<
pattern_map
[
relu_input
]
<<
" min_val: "
<<
*
(
static_cast
<
float
const
*>
(
alpha_const_op
->
get_data_ptr
()));
auto
cg
=
std
::
shared_ptr
<
Node
>
(
new
op
::
BoundedRelu
(
pattern_map
[
relu_input
],
alpha_val
));
auto
cg
=
std
::
shared_ptr
<
Node
>
(
new
ngraph
::
op
::
BoundedRelu
(
pattern_map
[
relu_input
],
alpha_val
));
ngraph
::
replace_node
(
m
.
get_match_root
(),
cg
);
return
true
;
};
...
...
@@ -1417,7 +1446,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_folded_batch_nor
auto
filters
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
2
,
2
,
1
,
1
});
auto
bias
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
2
});
auto
pconv
=
std
::
make_shared
<
op
::
ConvolutionBias
>
(
input
,
auto
pconv
=
std
::
make_shared
<
ngraph
::
op
::
ConvolutionBias
>
(
input
,
filters
,
bias
,
Strides
{
1
,
1
},
...
...
@@ -1431,7 +1460,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_folded_batch_nor
auto
gamma
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
2
});
auto
beta
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
2
});
double
eps
=
0.001
;
auto
bn
=
std
::
make_shared
<
op
::
BatchNormInference
>
(
eps
,
gamma
,
beta
,
pconv
,
mean
,
var
);
auto
bn
=
std
::
make_shared
<
ngraph
::
op
::
BatchNormInference
>
(
eps
,
gamma
,
beta
,
pconv
,
mean
,
var
);
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
[
input
,
filters
,
bias
,
mean
,
var
,
gamma
,
beta
](
pattern
::
Matcher
&
m
)
{
...
...
@@ -1439,8 +1468,10 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_folded_batch_nor
<<
m
.
get_match_root
()
->
get_name
();
auto
pattern_map
=
m
.
get_pattern_map
();
auto
m_bn
=
std
::
static_pointer_cast
<
op
::
BatchNormInference
>
(
m
.
get_match_root
());
auto
m_conv
=
std
::
static_pointer_cast
<
op
::
ConvolutionBias
>
(
m_bn
->
get_argument
(
2
));
auto
m_bn
=
std
::
static_pointer_cast
<
ngraph
::
op
::
BatchNormInference
>
(
m
.
get_match_root
());
auto
m_conv
=
std
::
static_pointer_cast
<
ngraph
::
op
::
ConvolutionBias
>
(
m_bn
->
get_argument
(
2
));
if
(
m_conv
->
get_users
().
size
()
>
1
)
{
...
...
@@ -1455,25 +1486,28 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_folded_batch_nor
// new weights = old weights * gamma / sqrt(variance + epsilon)
// new biases = (old_bias-mean) * gamma / sqrt(variance + epsilon) + beta
auto
bn_eps
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
m_bn
->
get_eps_value
()});
auto
var_eps
=
std
::
make_shared
<
op
::
Add
>
(
auto
bn_eps
=
ngraph
::
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
m_bn
->
get_eps_value
()});
auto
var_eps
=
std
::
make_shared
<
ngraph
::
op
::
Add
>
(
pattern_map
[
var
],
std
::
make_shared
<
op
::
Broadcast
>
(
bn_eps
,
pattern_map
[
var
]
->
get_shape
(),
AxisSet
{
0
}));
auto
sqrt_var_eps
=
std
::
make_shared
<
op
::
Sqrt
>
(
var_eps
);
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
bn_eps
,
pattern_map
[
var
]
->
get_shape
(),
AxisSet
{
0
}));
auto
sqrt_var_eps
=
std
::
make_shared
<
ngraph
::
op
::
Sqrt
>
(
var_eps
);
auto
mean_gamma
=
std
::
make_shared
<
op
::
Multiply
>
(
std
::
make_shared
<
op
::
Subtract
>
(
pattern_map
[
bias
],
pattern_map
[
mean
]),
auto
mean_gamma
=
std
::
make_shared
<
ngraph
::
op
::
Multiply
>
(
std
::
make_shared
<
ngraph
::
op
::
Subtract
>
(
pattern_map
[
bias
],
pattern_map
[
mean
]),
pattern_map
[
gamma
]);
auto
new_biases
=
std
::
make_shared
<
op
::
Add
>
(
pattern_map
[
beta
],
std
::
make_shared
<
op
::
Divide
>
(
mean_gamma
,
sqrt_var_eps
));
auto
weight_scaling
=
std
::
make_shared
<
op
::
Divide
>
(
pattern_map
[
gamma
],
sqrt_var_eps
);
auto
new_weights
=
std
::
make_shared
<
op
::
Multiply
>
(
auto
new_biases
=
std
::
make_shared
<
ngraph
::
op
::
Add
>
(
pattern_map
[
beta
],
std
::
make_shared
<
ngraph
::
op
::
Divide
>
(
mean_gamma
,
sqrt_var_eps
));
auto
weight_scaling
=
std
::
make_shared
<
ngraph
::
op
::
Divide
>
(
pattern_map
[
gamma
],
sqrt_var_eps
);
auto
new_weights
=
std
::
make_shared
<
ngraph
::
op
::
Multiply
>
(
pattern_map
[
filters
],
std
::
make_shared
<
op
::
Broadcast
>
(
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
weight_scaling
,
pattern_map
[
filters
]
->
get_shape
(),
AxisSet
{
1
,
2
,
3
}));
auto
conv_bias
=
std
::
make_shared
<
op
::
ConvolutionBias
>
(
pattern_map
[
input
],
std
::
make_shared
<
ngraph
::
op
::
ConvolutionBias
>
(
pattern_map
[
input
],
new_weights
,
new_biases
,
m_conv
->
get_window_movement_strides
(),
...
...
@@ -1500,7 +1534,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_affine_folding()
auto
filters
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape
);
auto
bias
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
2
});
auto
conv
=
std
::
make_shared
<
op
::
ConvolutionBias
>
(
input
,
auto
conv
=
std
::
make_shared
<
ngraph
::
op
::
ConvolutionBias
>
(
input
,
filters
,
bias
,
Strides
{
1
,
1
},
...
...
@@ -1511,9 +1545,9 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_affine_folding()
auto
conv_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
conv
,
nullptr
,
NodeVector
{
conv
});
auto
Ac
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
2
});
auto
A
=
std
::
make_shared
<
op
::
Broadcast
>
(
Ac
,
Shape
{
2
,
2
,
1
,
1
},
AxisSet
{
0
,
2
,
3
});
auto
A
=
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
Ac
,
Shape
{
2
,
2
,
1
,
1
},
AxisSet
{
0
,
2
,
3
});
auto
A_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
A
,
nullptr
,
NodeVector
{
A
});
auto
multiply
=
std
::
make_shared
<
op
::
Multiply
>
(
conv_label
,
A_label
);
auto
multiply
=
std
::
make_shared
<
ngraph
::
op
::
Multiply
>
(
conv_label
,
A_label
);
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
[
input
,
filters
,
bias
,
conv_label
,
A_label
](
pattern
::
Matcher
&
m
)
{
...
...
@@ -1521,7 +1555,8 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_affine_folding()
<<
m
.
get_match_root
()
->
get_name
();
auto
pattern_map
=
m
.
get_pattern_map
();
auto
conv_m
=
std
::
static_pointer_cast
<
op
::
ConvolutionBias
>
(
pattern_map
[
conv_label
]);
auto
conv_m
=
std
::
static_pointer_cast
<
ngraph
::
op
::
ConvolutionBias
>
(
pattern_map
[
conv_label
]);
if
(
conv_m
->
get_users
().
size
()
>
1
)
{
...
...
@@ -1538,10 +1573,10 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_affine_folding()
return
false
;
}
auto
A_m
=
std
::
static_pointer_cast
<
op
::
Broadcast
>
(
pattern_map
[
A_label
]);
auto
A_m
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Broadcast
>
(
pattern_map
[
A_label
]);
// Check if values are being broadcast along channel (2nd) dimension
auto
is_channel_bcast
=
[](
const
std
::
shared_ptr
<
op
::
Broadcast
>&
bcast
)
{
auto
is_channel_bcast
=
[](
const
std
::
shared_ptr
<
ngraph
::
op
::
Broadcast
>&
bcast
)
{
auto
input_shape
=
bcast
->
get_argument
(
0
)
->
get_shape
();
if
(
input_shape
.
size
()
==
0
||
shape_size
(
input_shape
)
==
1
)
{
...
...
@@ -1566,19 +1601,21 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_affine_folding()
return
false
;
}
auto
get_bcast_input
=
[](
const
std
::
shared_ptr
<
op
::
Broadcast
>&
bcast
)
{
auto
get_bcast_input
=
[](
const
std
::
shared_ptr
<
ngraph
::
op
::
Broadcast
>&
bcast
)
{
auto
input_shape
=
bcast
->
get_argument
(
0
)
->
get_shape
();
if
(
input_shape
.
size
()
==
0
)
{
Shape
bshape
{
bcast
->
get_shape
()[
1
]};
return
std
::
static_pointer_cast
<
ngraph
::
Node
>
(
std
::
make_shared
<
op
::
Broadcast
>
(
bcast
->
get_argument
(
0
),
bshape
,
AxisSet
{
0
}));
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
bcast
->
get_argument
(
0
),
bshape
,
AxisSet
{
0
}));
}
if
(
shape_size
(
input_shape
)
==
1
)
{
Shape
bshape
{
bcast
->
get_shape
()[
1
]};
return
std
::
static_pointer_cast
<
ngraph
::
Node
>
(
std
::
make_shared
<
op
::
Broadcast
>
(
std
::
make_shared
<
op
::
Reshape
>
(
return
std
::
static_pointer_cast
<
ngraph
::
Node
>
(
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
std
::
make_shared
<
ngraph
::
op
::
Reshape
>
(
bcast
->
get_argument
(
0
),
get_default_order
(
input_shape
),
Shape
{}),
bshape
,
AxisSet
{
0
}));
...
...
@@ -1590,7 +1627,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_affine_folding()
if
(
input_shape
.
size
()
==
2
)
{
Shape
bshape
{
input_shape
[
1
]};
return
std
::
static_pointer_cast
<
ngraph
::
Node
>
(
std
::
make_shared
<
op
::
Reshape
>
(
return
std
::
static_pointer_cast
<
ngraph
::
Node
>
(
std
::
make_shared
<
ngraph
::
op
::
Reshape
>
(
bcast
->
get_argument
(
0
),
AxisVector
{
0
,
1
},
bshape
));
}
throw
ngraph_error
(
"Unexpected shape for bcast input"
);
...
...
@@ -1601,15 +1638,15 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_affine_folding()
// new weights = old weights * Ac_m
// new_bias = old_bias * Ac_m;
auto
filters_n
=
std
::
make_shared
<
op
::
Multiply
>
(
auto
filters_n
=
std
::
make_shared
<
ngraph
::
op
::
Multiply
>
(
pattern_map
[
filters
],
std
::
make_shared
<
op
::
Broadcast
>
(
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
Ac_m
,
pattern_map
[
filters
]
->
get_shape
(),
AxisSet
{
1
,
2
,
3
}));
auto
bias_n
=
std
::
make_shared
<
op
::
Multiply
>
(
pattern_map
[
bias
],
Ac_m
);
auto
bias_n
=
std
::
make_shared
<
ngraph
::
op
::
Multiply
>
(
pattern_map
[
bias
],
Ac_m
);
auto
convbias_n
=
std
::
make_shared
<
op
::
ConvolutionBias
>
(
pattern_map
[
input
],
std
::
make_shared
<
ngraph
::
op
::
ConvolutionBias
>
(
pattern_map
[
input
],
filters_n
,
bias_n
,
conv_m
->
get_window_movement_strides
(),
...
...
@@ -1638,7 +1675,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_groupconv_batchnorm_global
auto
filters
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape_b
);
auto
resShape
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape_r
);
auto
conv
=
std
::
make_shared
<
op
::
GroupConvolution
>
(
input
,
auto
conv
=
std
::
make_shared
<
ngraph
::
op
::
GroupConvolution
>
(
input
,
filters
,
Strides
{
1
,
1
},
Strides
{
1
,
1
},
...
...
@@ -1654,7 +1691,8 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_groupconv_batchnorm_global
auto
gamma
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
32
});
auto
beta
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
32
});
double
eps
=
0.001
;
auto
bn
=
std
::
make_shared
<
op
::
BatchNormInference
>
(
eps
,
gamma
,
beta
,
conv_label
,
mean
,
var
);
auto
bn
=
std
::
make_shared
<
ngraph
::
op
::
BatchNormInference
>
(
eps
,
gamma
,
beta
,
conv_label
,
mean
,
var
);
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
[
input
,
filters
,
conv_label
,
mean
,
var
,
gamma
,
beta
,
eps
](
pattern
::
Matcher
&
m
)
{
...
...
@@ -1663,8 +1701,10 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_groupconv_batchnorm_global
<<
m
.
get_match_root
()
->
get_name
();
auto
pattern_map
=
m
.
get_pattern_map
();
auto
m_bn
=
std
::
static_pointer_cast
<
op
::
BatchNormInference
>
(
m
.
get_match_root
());
auto
conv_m
=
std
::
static_pointer_cast
<
op
::
GroupConvolution
>
(
pattern_map
[
conv_label
]);
auto
m_bn
=
std
::
static_pointer_cast
<
ngraph
::
op
::
BatchNormInference
>
(
m
.
get_match_root
());
auto
conv_m
=
std
::
static_pointer_cast
<
ngraph
::
op
::
GroupConvolution
>
(
pattern_map
[
conv_label
]);
if
(
conv_m
->
get_users
().
size
()
>
1
)
{
...
...
@@ -1684,25 +1724,29 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_groupconv_batchnorm_global
// new weights = old weights * gamma / sqrt(variance + epsilon)
// new biases = (-mean) * gamma / sqrt(variance + epsilon) + beta
auto
bn_eps
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
m_bn
->
get_eps_value
()});
auto
bn_eps
=
ngraph
::
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
m_bn
->
get_eps_value
()});
auto
var_eps
=
std
::
make_shared
<
op
::
Add
>
(
auto
var_eps
=
std
::
make_shared
<
ngraph
::
op
::
Add
>
(
pattern_map
[
var
],
std
::
make_shared
<
op
::
Broadcast
>
(
bn_eps
,
pattern_map
[
var
]
->
get_shape
(),
AxisSet
{
0
}));
auto
sqrt_var_eps
=
std
::
make_shared
<
op
::
Sqrt
>
(
var_eps
);
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
bn_eps
,
pattern_map
[
var
]
->
get_shape
(),
AxisSet
{
0
}));
auto
sqrt_var_eps
=
std
::
make_shared
<
ngraph
::
op
::
Sqrt
>
(
var_eps
);
auto
weight_scaling
=
std
::
make_shared
<
op
::
Divide
>
(
pattern_map
[
gamma
],
sqrt_var_eps
);
auto
weight_scaling
=
std
::
make_shared
<
ngraph
::
op
::
Divide
>
(
pattern_map
[
gamma
],
sqrt_var_eps
);
auto
weight_scaling_bcast
=
std
::
make_shared
<
op
::
Broadcast
>
(
auto
weight_scaling_bcast
=
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
weight_scaling
,
pattern_map
[
filters
]
->
get_shape
(),
AxisSet
{
1
,
2
,
3
});
auto
new_weights
=
std
::
make_shared
<
op
::
Multiply
>
(
pattern_map
[
filters
],
weight_scaling_bcast
);
auto
mean_gamma
=
std
::
make_shared
<
op
::
Multiply
>
(
pattern_map
[
mean
],
weight_scaling
);
auto
new_biases
=
std
::
make_shared
<
op
::
Subtract
>
(
pattern_map
[
beta
],
mean_gamma
);
std
::
make_shared
<
ngraph
::
op
::
Multiply
>
(
pattern_map
[
filters
],
weight_scaling_bcast
);
auto
mean_gamma
=
std
::
make_shared
<
ngraph
::
op
::
Multiply
>
(
pattern_map
[
mean
],
weight_scaling
);
auto
new_biases
=
std
::
make_shared
<
ngraph
::
op
::
Subtract
>
(
pattern_map
[
beta
],
mean_gamma
);
auto
g_conv_bias
=
std
::
make_shared
<
op
::
GroupConvolutionBias
>
(
pattern_map
[
input
],
auto
g_conv_bias
=
std
::
make_shared
<
ngraph
::
op
::
GroupConvolutionBias
>
(
pattern_map
[
input
],
new_weights
,
new_biases
,
conv_m
->
get_window_movement_strides
(),
...
...
@@ -1738,7 +1782,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::
auto
bias
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape_bias
);
auto
num
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape_num
);
auto
conv
=
std
::
make_shared
<
op
::
GroupConvolutionBias
>
(
input
,
auto
conv
=
std
::
make_shared
<
ngraph
::
op
::
GroupConvolutionBias
>
(
input
,
filters
,
bias
,
Strides
{
1
,
1
},
...
...
@@ -1753,7 +1797,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::
auto
conv_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
conv
,
nullptr
,
NodeVector
{
conv
});
// GroupConv + BatchNorm + Relu -> GroupConvBias
auto
prelu
=
std
::
make_shared
<
op
::
Relu
>
(
conv_label
);
auto
prelu
=
std
::
make_shared
<
ngraph
::
op
::
Relu
>
(
conv_label
);
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
[
input
,
filters
,
bias
,
num
,
conv_label
,
prelu
](
pattern
::
Matcher
&
m
)
{
...
...
@@ -1763,11 +1807,11 @@ void ngraph::runtime::cpu::pass::CPUFusion::
auto
pattern_map
=
m
.
get_pattern_map
();
auto
conv_m
=
std
::
static_pointer_cast
<
op
::
GroupConvolutionBias
>
(
pattern_map
[
conv_label
]);
auto
relu_m
=
std
::
dynamic_pointer_cast
<
op
::
Relu
>
(
m
.
get_match_root
());
std
::
static_pointer_cast
<
ngraph
::
op
::
GroupConvolutionBias
>
(
pattern_map
[
conv_label
]);
auto
relu_m
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Relu
>
(
m
.
get_match_root
());
auto
g_conv_bias_relu
=
std
::
make_shared
<
op
::
GroupConvolutionBias
>
(
conv_m
->
get_argument
(
0
),
auto
g_conv_bias_relu
=
std
::
make_shared
<
ngraph
::
op
::
GroupConvolutionBias
>
(
conv_m
->
get_argument
(
0
),
conv_m
->
get_argument
(
1
),
conv_m
->
get_argument
(
2
),
conv_m
->
get_window_movement_strides
(),
...
...
@@ -1796,25 +1840,26 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_fuse_lstm_recurrent_state(
auto
bias_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
400
});
ngraph
::
runtime
::
cpu
::
rnn_utils
::
rnntype
rnn_type
=
ngraph
::
runtime
::
cpu
::
rnn_utils
::
rnntype
::
vanilla_lstm
;
auto
lstm1
=
std
::
make_shared
<
op
::
Lstm
>
(
src_layer_label
,
auto
lstm1
=
std
::
make_shared
<
ngraph
::
op
::
Lstm
>
(
src_layer_label
,
src_iter_label
,
weights_layer_label
,
weights_iter_label
,
bias_label
,
rnn_type
);
auto
lstm1_goe0
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
lstm1
,
0
);
auto
lstm1_goe1
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
lstm1
,
1
);
auto
lstm1_goe0
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
lstm1
,
0
);
auto
lstm1_goe1
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
lstm1
,
1
);
auto
lstm1_goe0_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
lstm1_goe0
,
nullptr
,
NodeVector
{
lstm1_goe0
});
auto
lstm1_goe1_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
lstm1_goe1
,
nullptr
,
NodeVector
{
lstm1_goe1
});
auto
lstm1_goe0_slice
=
std
::
make_shared
<
op
::
Slice
>
(
lstm1_goe0_label
,
Coordinate
{
0
,
0
},
Coordinate
{
10
,
100
});
auto
lstm1_goe1_slice
=
std
::
make_shared
<
op
::
Slice
>
(
lstm1_goe1_label
,
Coordinate
{
10
,
0
},
Coordinate
{
20
,
100
});
auto
lstm1_goe0_slice
=
std
::
make_shared
<
ngraph
::
op
::
Slice
>
(
lstm1_goe0_label
,
Coordinate
{
0
,
0
},
Coordinate
{
10
,
100
});
auto
lstm1_goe1_slice
=
std
::
make_shared
<
ngraph
::
op
::
Slice
>
(
lstm1_goe1_label
,
Coordinate
{
10
,
0
},
Coordinate
{
20
,
100
});
auto
concat
=
std
::
make_shared
<
op
::
Concat
>
(
NodeVector
{
lstm1_goe0_slice
,
lstm1_goe1_slice
},
0
);
auto
concat
=
std
::
make_shared
<
ngraph
::
op
::
Concat
>
(
NodeVector
{
lstm1_goe0_slice
,
lstm1_goe1_slice
},
0
);
auto
concat_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
concat
,
nullptr
,
NodeVector
{
concat
});
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
...
...
@@ -1841,19 +1886,20 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_update_slice()
Shape
shape_b
{
1
,
32
,
2
};
auto
input
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape_a
);
auto
slice
=
std
::
make_shared
<
op
::
Slice
>
(
input
,
Coordinate
{
1
,
0
,
0
},
Coordinate
{
2
,
32
,
2
});
auto
slice
=
std
::
make_shared
<
ngraph
::
op
::
Slice
>
(
input
,
Coordinate
{
1
,
0
,
0
},
Coordinate
{
2
,
32
,
2
});
auto
slice_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
slice
,
nullptr
,
NodeVector
{
slice
});
auto
update_input
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape_b
);
auto
update
=
std
::
make_shared
<
op
::
Add
>
(
update_input
,
slice_label
);
auto
replace_slice
=
std
::
make_shared
<
op
::
ReplaceSlice
>
(
auto
update
=
std
::
make_shared
<
ngraph
::
op
::
Add
>
(
update_input
,
slice_label
);
auto
replace_slice
=
std
::
make_shared
<
ngraph
::
op
::
ReplaceSlice
>
(
input
,
update
,
Coordinate
{
1
,
0
,
0
},
Coordinate
{
2
,
32
,
2
});
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
[
input
,
update_input
,
slice_label
](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In callback for update_slice = "
<<
m
.
get_match_root
()
->
get_name
();
auto
pattern_map
=
m
.
get_pattern_map
();
auto
slice_m
=
std
::
static_pointer_cast
<
op
::
Slice
>
(
pattern_map
[
slice_label
]);
auto
replace_m
=
std
::
static_pointer_cast
<
op
::
ReplaceSlice
>
(
m
.
get_match_root
());
auto
slice_m
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Slice
>
(
pattern_map
[
slice_label
]);
auto
replace_m
=
std
::
static_pointer_cast
<
ngraph
::
op
::
ReplaceSlice
>
(
m
.
get_match_root
());
if
(
replace_m
->
get_lower_bounds
()
!=
slice_m
->
get_lower_bounds
()
||
replace_m
->
get_upper_bounds
()
!=
slice_m
->
get_upper_bounds
()
||
replace_m
->
get_strides
()
!=
slice_m
->
get_strides
())
...
...
@@ -1869,7 +1915,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_update_slice()
return
false
;
}
auto
update_slice
=
std
::
make_shared
<
op
::
UpdateSlice
>
(
pattern_map
[
input
],
auto
update_slice
=
std
::
make_shared
<
ngraph
::
op
::
UpdateSlice
>
(
pattern_map
[
input
],
pattern_map
[
update_input
],
replace_m
->
get_lower_bounds
(),
replace_m
->
get_upper_bounds
(),
...
...
@@ -1897,7 +1943,7 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
if
(
with_bias
)
{
auto
bias
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
i32
,
Shape
{
shape
[
0
]});
qconv
=
std
::
make_shared
<
op
::
QuantizedConvolutionBias
>
(
data_batch
,
qconv
=
std
::
make_shared
<
ngraph
::
op
::
QuantizedConvolutionBias
>
(
data_batch
,
filters
,
bias
,
Strides
{
1
,
1
},
...
...
@@ -1910,7 +1956,7 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
}
else
{
qconv
=
std
::
make_shared
<
op
::
QuantizedConvolution
>
(
data_batch
,
qconv
=
std
::
make_shared
<
ngraph
::
op
::
QuantizedConvolution
>
(
data_batch
,
filters
,
Strides
{
1
,
1
},
Strides
{
1
,
1
},
...
...
@@ -1919,14 +1965,16 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
Strides
{
1
,
1
},
requantization_scale
);
}
auto
dq
=
std
::
make_shared
<
op
::
Dequantize
>
(
qconv
,
dq_scale
,
dq_zp
,
element
::
f32
,
AxisSet
{});
auto
relu
=
std
::
make_shared
<
op
::
Relu
>
(
dq
);
auto
dq
=
std
::
make_shared
<
ngraph
::
op
::
Dequantize
>
(
qconv
,
dq_scale
,
dq_zp
,
element
::
f32
,
AxisSet
{});
auto
relu
=
std
::
make_shared
<
ngraph
::
op
::
Relu
>
(
dq
);
pattern
::
graph_rewrite_callback
callback
=
[
with_bias
](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In a callback for construct_qconv_relu against "
<<
m
.
get_match_root
()
->
get_name
();
auto
dq_m
=
std
::
static_pointer_cast
<
op
::
Dequantize
>
(
m
.
get_match_root
()
->
get_argument
(
0
));
auto
dq_m
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Dequantize
>
(
m
.
get_match_root
()
->
get_argument
(
0
));
if
(
!
(
ngraph
::
is_zero
(
dq_m
->
get_argument
(
2
))))
{
...
...
@@ -1942,7 +1990,7 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
if
(
!
with_bias
)
{
if
(
!
runtime
::
cpu
::
mkldnn_utils
::
can_use_mkldnn_conv
<
op
::
QuantizedConvolution
>
(
if
(
!
runtime
::
cpu
::
mkldnn_utils
::
can_use_mkldnn_conv
<
ngraph
::
op
::
QuantizedConvolution
>
(
dq_m
->
get_argument
(
0
).
get
()))
{
NGRAPH_DEBUG
<<
"Quantized Convolution not supported by MKLDNN"
;
...
...
@@ -1953,9 +2001,9 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
std
::
shared_ptr
<
ngraph
::
op
::
Op
>
qconv_n
;
if
(
with_bias
)
{
auto
qconv_m
=
std
::
static_pointer_cast
<
op
::
QuantizedConvolutionBias
>
(
dq_m
->
get_argument
(
0
));
qconv_n
=
std
::
make_shared
<
op
::
QuantizedConvolutionBias
>
(
auto
qconv_m
=
std
::
static_pointer_cast
<
ngraph
::
op
::
QuantizedConvolutionBias
>
(
dq_m
->
get_argument
(
0
));
qconv_n
=
std
::
make_shared
<
ngraph
::
op
::
QuantizedConvolutionBias
>
(
qconv_m
->
get_argument
(
0
),
qconv_m
->
get_argument
(
1
),
qconv_m
->
get_argument
(
2
),
...
...
@@ -1970,8 +2018,8 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
else
{
auto
qconv_m
=
std
::
static_pointer_cast
<
op
::
QuantizedConvolution
>
(
dq_m
->
get_argument
(
0
));
qconv_n
=
std
::
make_shared
<
op
::
QuantizedConvolutionRelu
>
(
std
::
static_pointer_cast
<
ngraph
::
op
::
QuantizedConvolution
>
(
dq_m
->
get_argument
(
0
));
qconv_n
=
std
::
make_shared
<
ngraph
::
op
::
QuantizedConvolutionRelu
>
(
qconv_m
->
get_argument
(
0
),
qconv_m
->
get_argument
(
1
),
qconv_m
->
get_window_movement_strides
(),
...
...
@@ -1983,7 +2031,7 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
}
auto
zp
=
builder
::
make_constant
<
uint8_t
>
(
element
::
u8
,
dq_m
->
get_argument
(
1
)
->
get_shape
(),
0
);
auto
dq_n
=
std
::
make_shared
<
op
::
Dequantize
>
(
auto
dq_n
=
std
::
make_shared
<
ngraph
::
op
::
Dequantize
>
(
qconv_n
,
dq_m
->
get_argument
(
1
),
zp
,
dq_m
->
get_output_element_type
(
0
),
dq_m
->
get_axes
());
ngraph
::
replace_node
(
m
.
get_match_root
(),
dq_n
);
return
true
;
...
...
@@ -2008,24 +2056,25 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qavg_pool()
auto
input
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
i8
,
shape
);
auto
dq_scale
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{});
auto
dq_zp
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
i8
,
Shape
{});
auto
dq
=
std
::
make_shared
<
op
::
Dequantize
>
(
input
,
dq_scale
,
dq_zp
,
element
::
f32
,
AxisSet
{});
auto
avg_pool
=
std
::
make_shared
<
op
::
AvgPool
>
(
dq
,
Shape
{
1
,
1
});
auto
dq
=
std
::
make_shared
<
ngraph
::
op
::
Dequantize
>
(
input
,
dq_scale
,
dq_zp
,
element
::
f32
,
AxisSet
{});
auto
avg_pool
=
std
::
make_shared
<
ngraph
::
op
::
AvgPool
>
(
dq
,
Shape
{
1
,
1
});
pattern
::
graph_rewrite_callback
callback
=
[](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In a callback for construct_qavg_pool against "
<<
m
.
get_match_root
()
->
get_name
();
auto
avg_pool_m
=
std
::
static_pointer_cast
<
op
::
AvgPool
>
(
m
.
get_match_root
());
auto
dq_m
=
std
::
static_pointer_cast
<
op
::
Dequantize
>
(
avg_pool_m
->
get_argument
(
0
));
auto
avg_pool_m
=
std
::
static_pointer_cast
<
ngraph
::
op
::
AvgPool
>
(
m
.
get_match_root
());
auto
dq_m
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Dequantize
>
(
avg_pool_m
->
get_argument
(
0
));
auto
qavg_pool_n
=
std
::
make_shared
<
op
::
QuantizedAvgPool
>
(
auto
qavg_pool_n
=
std
::
make_shared
<
ngraph
::
op
::
QuantizedAvgPool
>
(
dq_m
->
get_argument
(
0
),
avg_pool_m
->
get_window_shape
(),
avg_pool_m
->
get_window_movement_strides
(),
avg_pool_m
->
get_padding_below
(),
avg_pool_m
->
get_padding_above
(),
avg_pool_m
->
get_include_padding_in_avg_computation
());
auto
dq_n
=
std
::
make_shared
<
op
::
Dequantize
>
(
qavg_pool_n
,
auto
dq_n
=
std
::
make_shared
<
ngraph
::
op
::
Dequantize
>
(
qavg_pool_n
,
dq_m
->
get_argument
(
1
),
dq_m
->
get_argument
(
2
),
dq_m
->
get_output_element_type
(
0
),
...
...
@@ -2045,23 +2094,24 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qmax_pool()
auto
input
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
i8
,
shape
);
auto
dq_scale
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{});
auto
dq_zp
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
i8
,
Shape
{});
auto
dq
=
std
::
make_shared
<
op
::
Dequantize
>
(
input
,
dq_scale
,
dq_zp
,
element
::
f32
,
AxisSet
{});
auto
max_pool
=
std
::
make_shared
<
op
::
MaxPool
>
(
dq
,
Shape
{
1
,
1
});
auto
dq
=
std
::
make_shared
<
ngraph
::
op
::
Dequantize
>
(
input
,
dq_scale
,
dq_zp
,
element
::
f32
,
AxisSet
{});
auto
max_pool
=
std
::
make_shared
<
ngraph
::
op
::
MaxPool
>
(
dq
,
Shape
{
1
,
1
});
pattern
::
graph_rewrite_callback
callback
=
[](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In a callback for construct_qmax_pool against "
<<
m
.
get_match_root
()
->
get_name
();
auto
max_pool_m
=
std
::
static_pointer_cast
<
op
::
MaxPool
>
(
m
.
get_match_root
());
auto
dq_m
=
std
::
static_pointer_cast
<
op
::
Dequantize
>
(
max_pool_m
->
get_argument
(
0
));
auto
max_pool_m
=
std
::
static_pointer_cast
<
ngraph
::
op
::
MaxPool
>
(
m
.
get_match_root
());
auto
dq_m
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Dequantize
>
(
max_pool_m
->
get_argument
(
0
));
auto
qmax_pool_n
=
std
::
make_shared
<
op
::
QuantizedMaxPool
>
(
dq_m
->
get_argument
(
0
),
auto
qmax_pool_n
=
std
::
make_shared
<
ngraph
::
op
::
QuantizedMaxPool
>
(
dq_m
->
get_argument
(
0
),
max_pool_m
->
get_window_shape
(),
max_pool_m
->
get_window_movement_strides
(),
max_pool_m
->
get_padding_below
(),
max_pool_m
->
get_padding_above
());
auto
dq_n
=
std
::
make_shared
<
op
::
Dequantize
>
(
qmax_pool_n
,
auto
dq_n
=
std
::
make_shared
<
ngraph
::
op
::
Dequantize
>
(
qmax_pool_n
,
dq_m
->
get_argument
(
1
),
dq_m
->
get_argument
(
2
),
dq_m
->
get_output_element_type
(
0
),
...
...
@@ -2086,15 +2136,15 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconcat()
for
(
size_t
i
=
0
;
i
<
5
;
i
++
)
{
inputs
.
push_back
(
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape
));
concats
.
push_back
(
std
::
make_shared
<
op
::
Concat
>
(
inputs
,
0
));
concats
.
push_back
(
std
::
make_shared
<
ngraph
::
op
::
Concat
>
(
inputs
,
0
));
}
pattern
::
graph_rewrite_callback
callback
=
[](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In a callback for construct_qconcat against "
<<
m
.
get_match_root
()
->
get_name
();
auto
concat_m
=
std
::
static_pointer_cast
<
op
::
Concat
>
(
m
.
get_match_root
());
auto
dq_m
=
std
::
static_pointer_cast
<
op
::
Dequantize
>
(
concat_m
->
get_argument
(
0
));
auto
concat_m
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Concat
>
(
m
.
get_match_root
());
auto
dq_m
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Dequantize
>
(
concat_m
->
get_argument
(
0
));
NodeVector
new_args
;
for
(
auto
arg
:
concat_m
->
get_arguments
())
{
...
...
@@ -2112,9 +2162,9 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconcat()
new_args
.
push_back
(
arg
->
get_argument
(
0
));
}
auto
concat_n
=
std
::
make_shared
<
op
::
QuantizedConcat
>
(
new_args
,
concat_m
->
get_concatenation_axis
());
auto
dq_n
=
std
::
make_shared
<
op
::
Dequantize
>
(
concat_n
,
auto
concat_n
=
std
::
make_shared
<
ngraph
::
op
::
QuantizedConcat
>
(
new_args
,
concat_m
->
get_concatenation_axis
());
auto
dq_n
=
std
::
make_shared
<
ngraph
::
op
::
Dequantize
>
(
concat_n
,
dq_m
->
get_argument
(
1
),
dq_m
->
get_argument
(
2
),
dq_m
->
get_element_type
(),
...
...
@@ -2141,16 +2191,19 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_dq_q()
auto
q_scale
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{});
auto
q_zp
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
i8
,
Shape
{});
auto
dq
=
std
::
make_shared
<
op
::
Dequantize
>
(
input
,
dq_scale
,
dq_zp
,
element
::
f32
,
AxisSet
{});
op
::
Quantize
::
RoundMode
round_mode
=
op
::
Quantize
::
RoundMode
::
ROUND_NEAREST_TOWARD_EVEN
;
auto
q
=
std
::
make_shared
<
op
::
Quantize
>
(
dq
,
q_scale
,
q_zp
,
element
::
i8
,
AxisSet
{},
round_mode
);
auto
dq
=
std
::
make_shared
<
ngraph
::
op
::
Dequantize
>
(
input
,
dq_scale
,
dq_zp
,
element
::
f32
,
AxisSet
{});
ngraph
::
op
::
Quantize
::
RoundMode
round_mode
=
ngraph
::
op
::
Quantize
::
RoundMode
::
ROUND_NEAREST_TOWARD_EVEN
;
auto
q
=
std
::
make_shared
<
ngraph
::
op
::
Quantize
>
(
dq
,
q_scale
,
q_zp
,
element
::
i8
,
AxisSet
{},
round_mode
);
pattern
::
graph_rewrite_callback
callback
=
[
input
](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In a callback for construct_dq_q against "
<<
m
.
get_match_root
()
->
get_name
();
auto
q_m
=
std
::
static_pointer_cast
<
op
::
Quantize
>
(
m
.
get_match_root
());
auto
dq_m
=
std
::
static_pointer_cast
<
op
::
Dequantize
>
(
q_m
->
get_argument
(
0
));
auto
q_m
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Quantize
>
(
m
.
get_match_root
());
auto
dq_m
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Dequantize
>
(
q_m
->
get_argument
(
0
));
if
(
!
(
ngraph
::
is_zero
(
q_m
->
get_argument
(
2
))
&&
ngraph
::
is_zero
(
dq_m
->
get_argument
(
2
))))
{
NGRAPH_DEBUG
<<
"Non-zero zero points"
;
...
...
@@ -2195,7 +2248,7 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconvb_add()
auto
dq_zp2
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
i8
,
Shape
{});
// Left Graph
auto
qconvb
=
std
::
make_shared
<
op
::
QuantizedConvolutionBias
>
(
data_batch
,
auto
qconvb
=
std
::
make_shared
<
ngraph
::
op
::
QuantizedConvolutionBias
>
(
data_batch
,
filters
,
bias
,
Strides
{
1
,
1
},
...
...
@@ -2206,8 +2259,8 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconvb_add()
requantization_scale
,
false
);
auto
qconvb_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
qconvb
,
nullptr
,
NodeVector
{
qconvb
});
auto
dq_l
=
std
::
make_shared
<
op
::
Dequantize
>
(
qconvb_label
,
dq_scale1
,
dq_zp1
,
element
::
f32
,
AxisSet
{});
auto
dq_l
=
std
::
make_shared
<
ngraph
::
op
::
Dequantize
>
(
qconvb_label
,
dq_scale1
,
dq_zp1
,
element
::
f32
,
AxisSet
{});
auto
dq_l_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
dq_l
,
nullptr
,
NodeVector
{
dq_l
});
auto
skipr_l
=
std
::
make_shared
<
pattern
::
op
::
Skip
>
(
dq_l_label
,
[](
std
::
shared_ptr
<
Node
>
n
)
{
return
n
->
description
()
==
"Reshape"
;
});
...
...
@@ -2216,8 +2269,8 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconvb_add()
//Right Graph
auto
summand
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
i8
,
qconvb
->
get_shape
());
auto
dq_r
=
s
td
::
make_shared
<
op
::
Dequantize
>
(
s
ummand
,
dq_scale2
,
dq_zp2
,
element
::
f32
,
AxisSet
{});
auto
dq_r
=
std
::
make_shared
<
ngraph
::
op
::
Dequantize
>
(
summand
,
dq_scale2
,
dq_zp2
,
element
::
f32
,
AxisSet
{});
auto
dq_r_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
dq_r
,
nullptr
,
NodeVector
{
dq_r
});
auto
skipr_r
=
std
::
make_shared
<
pattern
::
op
::
Skip
>
(
dq_r_label
,
[](
std
::
shared_ptr
<
Node
>
n
)
{
return
n
->
description
()
==
"Reshape"
;
});
...
...
@@ -2226,17 +2279,18 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconvb_add()
//Add left + right
auto
add
=
skipb_l
+
skipb_r
;
auto
prelu
=
std
::
make_shared
<
op
::
Relu
>
(
add
);
auto
prelu
=
std
::
make_shared
<
ngraph
::
op
::
Relu
>
(
add
);
pattern
::
graph_rewrite_callback
callback
=
[
dq_l_label
,
dq_r_label
](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In a callback for construct_qconvb_dq_add_relu against "
<<
m
.
get_match_root
()
->
get_name
();
auto
pattern_map
=
m
.
get_pattern_map
();
auto
add_m
=
std
::
dynamic_pointer_cast
<
op
::
Add
>
(
m
.
get_match_root
()
->
get_argument
(
0
));
auto
dq_l_m
=
std
::
dynamic_pointer_cast
<
op
::
Dequantize
>
(
pattern_map
[
dq_l_label
]);
auto
dq_r_m
=
std
::
dynamic_pointer_cast
<
op
::
Dequantize
>
(
pattern_map
[
dq_r_label
]);
auto
add_m
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Add
>
(
m
.
get_match_root
()
->
get_argument
(
0
));
auto
dq_l_m
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Dequantize
>
(
pattern_map
[
dq_l_label
]);
auto
dq_r_m
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Dequantize
>
(
pattern_map
[
dq_r_label
]);
auto
qconv
=
std
::
static_pointer_cast
<
op
::
QuantizedConvolutionBias
>
(
dq_l_m
->
get_argument
(
0
));
std
::
static_pointer_cast
<
ngraph
::
op
::
QuantizedConvolutionBias
>
(
dq_l_m
->
get_argument
(
0
));
auto
inplace_input
=
dq_r_m
->
get_argument
(
0
);
if
(
!
(
ngraph
::
is_zero
(
dq_l_m
->
get_argument
(
2
))
&&
ngraph
::
is_zero
(
dq_r_m
->
get_argument
(
2
))))
...
...
@@ -2289,8 +2343,8 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconvb_add()
if
(
dq_r_m
->
get_input_element_type
(
2
)
==
element
::
i8
)
{
// TODO (jbobba): Investigate the need for Convert op
qconvba
=
std
::
make_shared
<
op
::
Convert
>
(
std
::
make_shared
<
op
::
QuantizedConvolutionBiasSignedAdd
>
(
qconvba
=
std
::
make_shared
<
ngraph
::
op
::
Convert
>
(
std
::
make_shared
<
ngraph
::
op
::
QuantizedConvolutionBiasSignedAdd
>
(
qconv
->
get_argument
(
0
),
qconv
->
get_argument
(
1
),
qconv
->
get_argument
(
2
),
...
...
@@ -2307,7 +2361,7 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconvb_add()
}
else
{
qconvba
=
std
::
make_shared
<
op
::
QuantizedConvolutionBiasAdd
>
(
qconvba
=
std
::
make_shared
<
ngraph
::
op
::
QuantizedConvolutionBiasAdd
>
(
qconv
->
get_argument
(
0
),
qconv
->
get_argument
(
1
),
qconv
->
get_argument
(
2
),
...
...
@@ -2321,9 +2375,9 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconvb_add()
sum_scale
,
true
);
}
auto
zp
=
op
::
Constant
::
create
(
element
::
u8
,
Shape
{},
{
0
});
auto
DQ
=
std
::
make_shared
<
op
::
Dequantize
>
(
qconvba
,
dq_l_scale
,
zp
,
element
::
f32
,
AxisSet
{});
auto
zp
=
ngraph
::
op
::
Constant
::
create
(
element
::
u8
,
Shape
{},
{
0
});
auto
DQ
=
std
::
make_shared
<
ngraph
::
op
::
Dequantize
>
(
qconvba
,
dq_l_scale
,
zp
,
element
::
f32
,
AxisSet
{});
ngraph
::
replace_node
(
m
.
get_match_root
(),
DQ
);
return
true
;
...
...
src/ngraph/runtime/cpu/pass/cpu_memory_optimization.cpp
View file @
b466027e
...
...
@@ -64,7 +64,7 @@ bool runtime::cpu::pass::CPUMemoryOptimization::run_on_function(std::shared_ptr<
{
if
(
n
->
description
()
==
"Concat"
)
{
auto
concat
=
std
::
static_pointer_cast
<
op
::
Concat
>
(
n
);
auto
concat
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Concat
>
(
n
);
auto
shape
=
concat
->
get_input_shape
(
0
);
auto
axis
=
concat
->
get_concatenation_axis
();
auto
product
=
1
;
...
...
@@ -134,7 +134,7 @@ bool runtime::cpu::pass::CPUMemoryOptimization::run_on_function(std::shared_ptr<
{
if
(
arg
->
is_op
())
{
auto
op
=
std
::
static_pointer_cast
<
op
::
Op
>
(
arg
);
auto
op
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Op
>
(
arg
);
auto
annotation
=
op
->
get_op_annotations
();
if
(
annotation
&&
annotation
->
get_in_place_oi_pairs
().
size
()
>
0
)
...
...
@@ -177,7 +177,7 @@ bool runtime::cpu::pass::CPUMemoryOptimization::run_on_function(std::shared_ptr<
{
if
(
user
->
is_op
())
{
auto
op
=
std
::
static_pointer_cast
<
op
::
Op
>
(
user
);
auto
op
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Op
>
(
user
);
if
(
auto
op_annotations
=
op
->
get_op_annotations
())
{
if
(
op_annotations
->
get_in_place_oi_pairs
().
size
()
>
0
)
...
...
@@ -227,7 +227,7 @@ bool runtime::cpu::pass::CPUMemoryOptimization::run_on_function(std::shared_ptr<
{
if
(
n
->
description
()
==
"Slice"
)
{
auto
slice
=
std
::
static_pointer_cast
<
op
::
Slice
>
(
n
);
auto
slice
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Slice
>
(
n
);
auto
in_shape
=
slice
->
get_input_shape
(
0
);
auto
out_shape
=
slice
->
get_output_shape
(
0
);
auto
strides
=
slice
->
get_strides
();
...
...
src/ngraph/runtime/cpu/pass/cpu_rnn_fusion.cpp
View file @
b466027e
...
...
@@ -66,15 +66,16 @@ void ngraph::runtime::cpu::pass::LSTMFusion::construct_sigmoid()
{
// construct variance
auto
input
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
3
,
4
});
auto
neg_input
=
std
::
make_shared
<
op
::
Negative
>
(
input
);
auto
exp_neg_input
=
std
::
make_shared
<
op
::
Exp
>
(
neg_input
);
auto
neg_input
=
std
::
make_shared
<
ngraph
::
op
::
Negative
>
(
input
);
auto
exp_neg_input
=
std
::
make_shared
<
ngraph
::
op
::
Exp
>
(
neg_input
);
// broadcast input
auto
constant
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{});
auto
broadcast_constant
=
std
::
make_shared
<
op
::
Broadcast
>
(
constant
,
Shape
{
3
,
4
},
AxisSet
{
0
,
1
});
auto
broadcast_constant
=
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
constant
,
Shape
{
3
,
4
},
AxisSet
{
0
,
1
});
auto
add_exp
=
std
::
make_shared
<
op
::
Add
>
(
exp_neg_input
,
broadcast_constant
);
auto
divide_1_over_exp
=
std
::
make_shared
<
op
::
Divide
>
(
broadcast_constant
,
add_exp
);
auto
add_exp
=
std
::
make_shared
<
ngraph
::
op
::
Add
>
(
exp_neg_input
,
broadcast_constant
);
auto
divide_1_over_exp
=
std
::
make_shared
<
ngraph
::
op
::
Divide
>
(
broadcast_constant
,
add_exp
);
// Define a call back that needs to called once the DFG matches the pattern
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
[
input
](
pattern
::
Matcher
&
m
)
{
...
...
@@ -96,7 +97,7 @@ void ngraph::runtime::cpu::pass::LSTMFusion::construct_sigmoid()
return
false
;
}
auto
sigmoid_node
=
std
::
make_shared
<
op
::
Sigmoid
>
(
pattern_map
[
input
]);
auto
sigmoid_node
=
std
::
make_shared
<
ngraph
::
op
::
Sigmoid
>
(
pattern_map
[
input
]);
ngraph
::
replace_node
(
m
.
get_match_root
(),
sigmoid_node
);
return
true
;
};
...
...
@@ -147,43 +148,50 @@ void ngraph::runtime::cpu::pass::LSTMFusion::construct_lstm_fprop()
auto
ct_1
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
10
,
100
});
auto
broadcast_pred
=
[](
std
::
shared_ptr
<
Node
>
n
)
{
return
((
std
::
dynamic_pointer_cast
<
op
::
Broadcast
>
(
n
)
!=
nullptr
)
||
(
std
::
dynamic_pointer_cast
<
op
::
Reshape
>
(
n
)
!=
nullptr
));
return
((
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Broadcast
>
(
n
)
!=
nullptr
)
||
(
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Reshape
>
(
n
)
!=
nullptr
));
};
// Fused MatMuls
// (W_{ii} | (W_{if} | W_{ig} | W_{io}) * x_t + (b_{ii} | b_{if} | b_{ig} | b_{io})
auto
dot1
=
std
::
make_shared
<
op
::
Dot
>
(
xt
,
w_i2h
);
auto
add1
=
std
::
make_shared
<
op
::
Add
>
(
auto
dot1
=
std
::
make_shared
<
ngraph
::
op
::
Dot
>
(
xt
,
w_i2h
);
auto
add1
=
std
::
make_shared
<
ngraph
::
op
::
Add
>
(
dot1
,
std
::
make_shared
<
pattern
::
op
::
Skip
>
(
bias_i2h
,
broadcast_pred
));
// (W_{hi} | (W_{hf} | W_{hg} | W_{ho}) * h_{(t-1)} + (b_{hi} | b_{hf} | b_{hg} | b_{ho})
auto
dot2
=
std
::
make_shared
<
op
::
Dot
>
(
ht_1
,
w_h2h
);
auto
add2
=
std
::
make_shared
<
op
::
Add
>
(
auto
dot2
=
std
::
make_shared
<
ngraph
::
op
::
Dot
>
(
ht_1
,
w_h2h
);
auto
add2
=
std
::
make_shared
<
ngraph
::
op
::
Add
>
(
dot2
,
std
::
make_shared
<
pattern
::
op
::
Skip
>
(
bias_h2h
,
broadcast_pred
));
auto
X
=
std
::
make_shared
<
op
::
Add
>
(
add2
,
add1
);
auto
X
=
std
::
make_shared
<
ngraph
::
op
::
Add
>
(
add2
,
add1
);
// construct gates
auto
it
=
std
::
make_shared
<
op
::
Sigmoid
>
(
std
::
make_shared
<
op
::
Slice
>
(
X
,
Coordinate
{
0
,
0
},
Coordinate
{
10
,
100
}));
auto
ft
=
std
::
make_shared
<
op
::
Sigmoid
>
(
std
::
make_shared
<
op
::
Slice
>
(
X
,
Coordinate
{
0
,
100
},
Coordinate
{
10
,
200
}));
auto
gt
=
std
::
make_shared
<
op
::
Tanh
>
(
std
::
make_shared
<
op
::
Slice
>
(
X
,
Coordinate
{
0
,
200
},
Coordinate
{
10
,
300
}));
auto
ot
=
std
::
make_shared
<
op
::
Sigmoid
>
(
std
::
make_shared
<
op
::
Slice
>
(
X
,
Coordinate
{
0
,
300
},
Coordinate
{
10
,
400
}));
auto
it
=
std
::
make_shared
<
ngraph
::
op
::
Sigmoid
>
(
std
::
make_shared
<
ngraph
::
op
::
Slice
>
(
X
,
Coordinate
{
0
,
0
},
Coordinate
{
10
,
100
}));
auto
ft
=
std
::
make_shared
<
ngraph
::
op
::
Sigmoid
>
(
std
::
make_shared
<
ngraph
::
op
::
Slice
>
(
X
,
Coordinate
{
0
,
100
},
Coordinate
{
10
,
200
}));
auto
gt
=
std
::
make_shared
<
ngraph
::
op
::
Tanh
>
(
std
::
make_shared
<
ngraph
::
op
::
Slice
>
(
X
,
Coordinate
{
0
,
200
},
Coordinate
{
10
,
300
}));
auto
ot
=
std
::
make_shared
<
ngraph
::
op
::
Sigmoid
>
(
std
::
make_shared
<
ngraph
::
op
::
Slice
>
(
X
,
Coordinate
{
0
,
300
},
Coordinate
{
10
,
400
}));
// construct (c_t) cell state
auto
ct
=
std
::
make_shared
<
op
::
Add
>
(
std
::
make_shared
<
op
::
Multiply
>
(
ft
,
ct_1
),
std
::
make_shared
<
op
::
Multiply
>
(
it
,
gt
));
auto
ct
=
std
::
make_shared
<
ngraph
::
op
::
Add
>
(
std
::
make_shared
<
ngraph
::
op
::
Multiply
>
(
ft
,
ct_1
),
std
::
make_shared
<
ngraph
::
op
::
Multiply
>
(
it
,
gt
));
auto
ct_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
ct
,
nullptr
,
NodeVector
{
ct
});
// construct (h_t)
auto
ht
=
std
::
make_shared
<
op
::
Multiply
>
(
ot
,
std
::
make_shared
<
op
::
Tanh
>
(
ct_label
));
auto
ht
=
std
::
make_shared
<
ngraph
::
op
::
Multiply
>
(
ot
,
std
::
make_shared
<
ngraph
::
op
::
Tanh
>
(
ct_label
));
// Define a call back that needs to called once the DFG matches the pattern
pattern
::
graph_rewrite_callback
callback
=
[
ct_label
,
w_i2h
,
bias_i2h
,
w_h2h
,
bias_h2h
,
xt
,
ht_1
,
ct_1
](
pattern
::
Matcher
&
m
)
{
pattern
::
graph_rewrite_callback
callback
=
[
ct_label
,
w_i2h
,
bias_i2h
,
w_h2h
,
bias_h2h
,
xt
,
ht_1
,
ct_1
](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In a callback for construct_fprop_lstm pattern against "
<<
m
.
get_match_root
()
->
get_name
();
...
...
@@ -213,10 +221,10 @@ void ngraph::runtime::cpu::pass::LSTMFusion::construct_lstm_fprop()
// this checks avoids fusing of LSTM cells if its a part of decoder, we
// will remove this once mkldnn optimizes individual LSTM cell or once
// we have decoder pattern for GNMT.
if
(
!
(
std
::
dynamic_pointer_cast
<
op
::
Broadcast
>
(
cell_state
)
&&
std
::
dynamic_pointer_cast
<
op
::
Constant
>
(
cell_state
->
get_argument
(
0
)))
&&
!
(
std
::
dynamic_pointer_cast
<
op
::
Slice
>
(
cell_state
)
&&
std
::
dynamic_pointer_cast
<
op
::
GetOutputElement
>
(
cell_state
->
get_argument
(
0
))))
if
(
!
(
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Broadcast
>
(
cell_state
)
&&
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Constant
>
(
cell_state
->
get_argument
(
0
)))
&&
!
(
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Slice
>
(
cell_state
)
&&
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
GetOutputElement
>
(
cell_state
->
get_argument
(
0
))))
{
return
false
;
}
...
...
@@ -232,8 +240,8 @@ void ngraph::runtime::cpu::pass::LSTMFusion::construct_lstm_fprop()
// pattern matcher cannot guarantee this since the computations are
// symmetric around x_t and ht_1. Use heuristics to swap the matched
// labels
if
(
std
::
dynamic_pointer_cast
<
op
::
Broadcast
>
(
src_layer
)
&&
std
::
dynamic_pointer_cast
<
op
::
Constant
>
(
src_layer
->
get_argument
(
0
)))
if
(
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Broadcast
>
(
src_layer
)
&&
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Constant
>
(
src_layer
->
get_argument
(
0
)))
{
// First timestep of an RNN layer
swap_lstm_inputs
();
...
...
@@ -242,7 +250,8 @@ void ngraph::runtime::cpu::pass::LSTMFusion::construct_lstm_fprop()
{
swap_lstm_inputs
();
}
else
if
(
std
::
dynamic_pointer_cast
<
op
::
GetOutputElement
>
(
cell_state
->
get_argument
(
0
)))
else
if
(
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
GetOutputElement
>
(
cell_state
->
get_argument
(
0
)))
{
// swap the inputs if the cell_state and hidden state does not
// belong to the same Lstm
...
...
@@ -256,8 +265,7 @@ void ngraph::runtime::cpu::pass::LSTMFusion::construct_lstm_fprop()
if
(
hidden_state
->
get_shape
()
!=
cell_state
->
get_shape
())
{
NGRAPH_DEBUG
<<
"Lstm MKLDNN kernel requires recurrent output hidden states to match "
;
NGRAPH_DEBUG
<<
"Lstm MKLDNN kernel requires recurrent output hidden states to match "
;
return
false
;
}
...
...
@@ -281,26 +289,26 @@ void ngraph::runtime::cpu::pass::LSTMFusion::construct_lstm_fprop()
}
std
::
shared_ptr
<
Node
>
src_iter
=
std
::
make_shared
<
op
::
Concat
>
(
NodeVector
{
hidden_state
,
cell_state
},
0
);
std
::
make_shared
<
ngraph
::
op
::
Concat
>
(
NodeVector
{
hidden_state
,
cell_state
},
0
);
if
(
src_layer
->
get_shape
()[
1
]
!=
slc
||
src_iter
->
get_shape
()[
1
]
!=
sic
)
{
NGRAPH_DEBUG
<<
"Feature size mismatch between weights and input tensors"
;
return
false
;
}
auto
bias
=
std
::
make_shared
<
op
::
Add
>
(
pattern_map
[
bias_i2h
],
pattern_map
[
bias_h2h
]);
auto
bias
=
std
::
make_shared
<
ngraph
::
op
::
Add
>
(
pattern_map
[
bias_i2h
],
pattern_map
[
bias_h2h
]);
auto
lstm_node
=
std
::
make_shared
<
op
::
Lstm
>
(
auto
lstm_node
=
std
::
make_shared
<
ngraph
::
op
::
Lstm
>
(
src_layer
,
src_iter
,
weights_layer
,
weights_iter
,
bias
,
rnn_type
);
auto
lstm_ht_output
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
lstm_node
,
0
);
auto
lstm_ht_ct_output
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
lstm_node
,
1
);
auto
lstm_ht_output
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
lstm_node
,
0
);
auto
lstm_ht_ct_output
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
lstm_node
,
1
);
// dst_iter of lstm mkldnn output holds the results of both recurrent state
// tensor outputs. we need to slice the ct.
auto
ht_slice
=
std
::
make_shared
<
op
::
Slice
>
(
auto
ht_slice
=
std
::
make_shared
<
ngraph
::
op
::
Slice
>
(
lstm_ht_output
,
Coordinate
{
0
,
0
},
Coordinate
{
batch_size
,
dlc
});
auto
ct_slice
=
std
::
make_shared
<
op
::
Slice
>
(
auto
ct_slice
=
std
::
make_shared
<
ngraph
::
op
::
Slice
>
(
lstm_ht_ct_output
,
Coordinate
{
batch_size
,
0
},
Coordinate
{(
2
*
batch_size
),
dic
});
if
(
lstm_node
->
get_outputs
().
at
(
0
).
get_inputs
().
size
()
!=
2
)
...
...
@@ -330,44 +338,45 @@ void ngraph::runtime::cpu::pass::RNNFusion::construct_rnn_lstm_fprop()
auto
lstm_ht
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
10
,
100
});
auto
lstm_ct
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
10
,
100
});
auto
lstm_src_iter
=
std
::
make_shared
<
op
::
Concat
>
(
NodeVector
{
lstm_ht
,
lstm_ct
},
0
);
auto
lstm_src_iter
=
std
::
make_shared
<
ngraph
::
op
::
Concat
>
(
NodeVector
{
lstm_ht
,
lstm_ct
},
0
);
auto
lstm_src_iter_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
lstm_src_iter
,
nullptr
,
NodeVector
{
lstm_src_iter
});
auto
lstm_weights_layer_shared
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
400
,
100
},
pattern
::
has_class
<
op
::
Parameter
>
());
auto
lstm_weights_layer
=
std
::
make_shared
<
op
::
Reshape
>
(
lstm_weights_layer_shared
,
AxisVector
{
1
,
0
},
Shape
{
100
,
400
});
element
::
f32
,
Shape
{
400
,
100
},
pattern
::
has_class
<
ngraph
::
op
::
Parameter
>
());
auto
lstm_weights_layer
=
std
::
make_shared
<
ngraph
::
op
::
Reshape
>
(
lstm_weights_layer_shared
,
AxisVector
{
1
,
0
},
Shape
{
100
,
400
});
auto
lstm_weights_layer_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
lstm_weights_layer
,
nullptr
,
NodeVector
{
lstm_weights_layer
});
auto
lstm_weights_iter_shared
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
400
,
100
},
pattern
::
has_class
<
op
::
Parameter
>
());
auto
lstm_weights_iter
=
std
::
make_shared
<
op
::
Reshape
>
(
lstm_weights_iter_shared
,
AxisVector
{
1
,
0
},
Shape
{
100
,
400
});
element
::
f32
,
Shape
{
400
,
100
},
pattern
::
has_class
<
ngraph
::
op
::
Parameter
>
());
auto
lstm_weights_iter
=
std
::
make_shared
<
ngraph
::
op
::
Reshape
>
(
lstm_weights_iter_shared
,
AxisVector
{
1
,
0
},
Shape
{
100
,
400
});
auto
lstm_weights_iter_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
lstm_weights_iter
,
nullptr
,
NodeVector
{
lstm_weights_iter
});
auto
lstm_bias_layer_shared
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
400
});
auto
lstm_bias_iter_shared
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
400
});
auto
lstm_bias
=
std
::
make_shared
<
op
::
Add
>
(
lstm_bias_layer_shared
,
lstm_bias_iter_shared
);
auto
lstm_bias
=
std
::
make_shared
<
ngraph
::
op
::
Add
>
(
lstm_bias_layer_shared
,
lstm_bias_iter_shared
);
auto
lstm_bias_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
lstm_bias
,
nullptr
,
NodeVector
{
lstm_bias
});
ngraph
::
runtime
::
cpu
::
rnn_utils
::
rnntype
ref_rnn_type
=
ngraph
::
runtime
::
cpu
::
rnn_utils
::
rnntype
::
vanilla_lstm
;
auto
lstm
=
std
::
make_shared
<
op
::
Lstm
>
(
lstm_src_layer
,
auto
lstm
=
std
::
make_shared
<
ngraph
::
op
::
Lstm
>
(
lstm_src_layer
,
lstm_src_iter_label
,
lstm_weights_layer_label
,
lstm_weights_iter_label
,
lstm_bias_label
,
ref_rnn_type
);
auto
lstm_goe
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
lstm
,
1
);
auto
lstm_goe
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
lstm
,
1
);
// We cannot attach labels to multi-output nodes, so we attach a label to the goe instead
auto
lstm_goe_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
lstm_goe
,
nullptr
,
NodeVector
{
lstm_goe
});
auto
lstm_goe_slice
=
std
::
make_shared
<
op
::
Slice
>
(
lstm_goe_label
,
Coordinate
{
10
,
0
},
Coordinate
{
20
,
100
});
std
::
make_shared
<
ngraph
::
op
::
Slice
>
(
lstm_goe_label
,
Coordinate
{
10
,
0
},
Coordinate
{
20
,
100
});
pattern
::
recurrent_graph_rewrite_callback
callback
=
[
lstm_goe_label
,
lstm_src_layer
,
...
...
@@ -387,7 +396,7 @@ void ngraph::runtime::cpu::pass::RNNFusion::construct_rnn_lstm_fprop()
{
auto
node_labels
=
m
.
get_bound_nodes_for_pattern
(
input_label
);
std
::
reverse
(
node_labels
.
begin
(),
node_labels
.
end
());
return
std
::
make_shared
<
op
::
Concat
>
(
node_labels
,
0
);
return
std
::
make_shared
<
ngraph
::
op
::
Concat
>
(
node_labels
,
0
);
}
};
...
...
@@ -429,9 +438,9 @@ void ngraph::runtime::cpu::pass::RNNFusion::construct_rnn_lstm_fprop()
}
auto
check_const_input
=
[
&
](
std
::
shared_ptr
<
Node
>
n
)
{
if
(
std
::
dynamic_pointer_cast
<
op
::
Constant
>
(
n
)
||
(
std
::
dynamic_pointer_cast
<
op
::
Broadcast
>
(
n
)
&&
std
::
dynamic_pointer_cast
<
op
::
Constant
>
(
n
->
get_argument
(
0
))))
if
(
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Constant
>
(
n
)
||
(
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Broadcast
>
(
n
)
&&
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Constant
>
(
n
->
get_argument
(
0
))))
{
return
true
;
}
...
...
@@ -460,7 +469,7 @@ void ngraph::runtime::cpu::pass::RNNFusion::construct_rnn_lstm_fprop()
return
false
;
}
auto
rnn
=
std
::
make_shared
<
op
::
Rnn
>
(
rnn_src_layer
,
auto
rnn
=
std
::
make_shared
<
ngraph
::
op
::
Rnn
>
(
rnn_src_layer
,
rnn_src_iter
,
rnn_weights_layer
,
rnn_weights_iter
,
...
...
@@ -473,13 +482,14 @@ void ngraph::runtime::cpu::pass::RNNFusion::construct_rnn_lstm_fprop()
num_fused_rnn_layers
,
rnn_type
);
std
::
vector
<
std
::
shared_ptr
<
op
::
Slice
>>
ht_slice_per_timestep
(
sequence_len
,
nullptr
);
auto
rnn_ht_goe
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
rnn
,
0
);
auto
rnn_ht_ct_goe
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
rnn
,
1
);
std
::
vector
<
std
::
shared_ptr
<
ngraph
::
op
::
Slice
>>
ht_slice_per_timestep
(
sequence_len
,
nullptr
);
auto
rnn_ht_goe
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
rnn
,
0
);
auto
rnn_ht_ct_goe
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
rnn
,
1
);
for
(
size_t
i
=
0
,
start_index
=
0
;
i
<
sequence_len
;
i
++
,
start_index
+=
batch_size
)
{
ht_slice_per_timestep
[
i
]
=
(
std
::
make_shared
<
op
::
Slice
>
(
ht_slice_per_timestep
[
i
]
=
(
std
::
make_shared
<
ngraph
::
op
::
Slice
>
(
rnn_ht_goe
,
Coordinate
{
start_index
,
0
},
Coordinate
{
start_index
+
batch_size
,
src_iter_feature_size
}));
...
...
@@ -503,7 +513,7 @@ void ngraph::runtime::cpu::pass::RNNFusion::construct_rnn_lstm_fprop()
for
(
size_t
index
=
0
;
index
<
sequence_len
;
index
++
)
{
auto
goe_nodes
=
op
::
get_output_elements
(
lstm_nodes
[
index
]);
auto
goe_nodes
=
ngraph
::
op
::
get_output_elements
(
lstm_nodes
[
index
]);
// if there is no GOE followed by the Lstm, their might be pattern match error
// we will return safely
...
...
@@ -521,7 +531,7 @@ void ngraph::runtime::cpu::pass::RNNFusion::construct_rnn_lstm_fprop()
{
if
(
ngraph
::
is_used
(
goe0_user
.
get
()))
{
if
(
!
std
::
dynamic_pointer_cast
<
op
::
Slice
>
(
goe0_user
))
if
(
!
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Slice
>
(
goe0_user
))
{
NGRAPH_DEBUG
<<
"Did not find LSTM slice to replace with RNN slice"
;
return
false
;
...
...
@@ -536,7 +546,7 @@ void ngraph::runtime::cpu::pass::RNNFusion::construct_rnn_lstm_fprop()
}
}
auto
rnn_ct_goe
=
op
::
get_output_elements
(
lstm_nodes
[
sequence_len
-
1
])[
1
];
auto
rnn_ct_goe
=
ngraph
::
op
::
get_output_elements
(
lstm_nodes
[
sequence_len
-
1
])[
1
];
if
(
rnn_ct_goe
)
{
replace_collapse_node_user
(
rnn_ct_goe
,
rnn_ht_ct_goe
->
get_outputs
().
at
(
0
));
...
...
@@ -566,7 +576,7 @@ void ngraph::runtime::cpu::pass::RNNFusion::construct_rnn_lstm_fprop()
static
std
::
shared_ptr
<
Node
>
stack_rnn_inputs
(
NodeVector
rnn_input_nodes
)
{
std
::
reverse
(
rnn_input_nodes
.
begin
(),
rnn_input_nodes
.
end
());
return
std
::
make_shared
<
op
::
Concat
>
(
rnn_input_nodes
,
0
);
return
std
::
make_shared
<
ngraph
::
op
::
Concat
>
(
rnn_input_nodes
,
0
);
}
void
ngraph
::
runtime
::
cpu
::
pass
::
MultiLayerRNNFusion
::
construct_multi_layer_rnn_fusion_fprop
()
...
...
@@ -585,7 +595,7 @@ void ngraph::runtime::cpu::pass::MultiLayerRNNFusion::construct_multi_layer_rnn_
ngraph
::
runtime
::
cpu
::
rnn_utils
::
rnntype
ref_rnn_type
=
ngraph
::
runtime
::
cpu
::
rnn_utils
::
rnntype
::
vanilla_lstm
;
auto
ref_rnn_node
=
std
::
make_shared
<
op
::
Rnn
>
(
rnn_src_layer
,
auto
ref_rnn_node
=
std
::
make_shared
<
ngraph
::
op
::
Rnn
>
(
rnn_src_layer
,
rnn_src_iter
,
rnn_weights_layer
,
rnn_weights_iter
,
...
...
@@ -598,7 +608,7 @@ void ngraph::runtime::cpu::pass::MultiLayerRNNFusion::construct_multi_layer_rnn_
ref_num_of_rnn_fused_layer
,
ref_rnn_type
);
auto
rnn_goe0
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
ref_rnn_node
,
0
);
auto
rnn_goe0
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
ref_rnn_node
,
0
);
auto
rnn_goe0_label
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
rnn_goe0
,
nullptr
,
NodeVector
{
rnn_goe0
});
...
...
@@ -622,10 +632,11 @@ void ngraph::runtime::cpu::pass::MultiLayerRNNFusion::construct_multi_layer_rnn_
auto
rnn_goe0_bounded_nodes
=
m
.
get_bound_nodes_for_pattern
(
rnn_goe0_label
);
std
::
vector
<
std
::
shared_ptr
<
op
::
Rnn
>>
rnn_nodes
;
std
::
vector
<
std
::
shared_ptr
<
ngraph
::
op
::
Rnn
>>
rnn_nodes
;
for
(
auto
rnn_goe
:
m
.
get_bound_nodes_for_pattern
(
rnn_goe0_label
))
{
if
(
auto
rnn_op
=
std
::
dynamic_pointer_cast
<
op
::
Rnn
>
(
rnn_goe
->
get_arguments
()[
0
]))
if
(
auto
rnn_op
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Rnn
>
(
rnn_goe
->
get_arguments
()[
0
]))
{
rnn_nodes
.
push_back
(
rnn_op
);
}
...
...
@@ -695,7 +706,7 @@ void ngraph::runtime::cpu::pass::MultiLayerRNNFusion::construct_multi_layer_rnn_
"layer"
);
}
auto
rnn
=
std
::
make_shared
<
op
::
Rnn
>
(
mrnn_src_layer
,
auto
rnn
=
std
::
make_shared
<
ngraph
::
op
::
Rnn
>
(
mrnn_src_layer
,
mrnn_src_iter
,
mrnn_weights_layer
,
mrnn_weights_iter
,
...
...
@@ -708,8 +719,8 @@ void ngraph::runtime::cpu::pass::MultiLayerRNNFusion::construct_multi_layer_rnn_
num_fused_rnn_layers
,
rnn_type
);
auto
mrnn_ht
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
rnn
,
0
);
auto
mrnn_ht_ct
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
rnn
,
1
);
auto
mrnn_ht
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
rnn
,
0
);
auto
mrnn_ht_ct
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
rnn
,
1
);
// Replace all the users of RNN cell state {ct} across different user.
auto
replace_rnn_output_cellstate
=
[
&
](
std
::
shared_ptr
<
Node
>
rnn_ct_goe1
,
size_t
layer
)
{
...
...
@@ -718,7 +729,7 @@ void ngraph::runtime::cpu::pass::MultiLayerRNNFusion::construct_multi_layer_rnn_
// of all the layers, {{ht_1 | ct_1} || {ht2 |ct2} || ....{htn | ctn}}
// we will slice the cell state output tensor {ct_*} from the fused RNN kerenel output and feeds
// {ct_*} consumer if any
auto
ct_slice
=
std
::
make_shared
<
op
::
Slice
>
(
auto
ct_slice
=
std
::
make_shared
<
ngraph
::
op
::
Slice
>
(
mrnn_ht_ct
,
Coordinate
{((
layer
-
1
)
*
batch_size
*
num_rnn_cell_states
)
+
batch_size
,
0
},
Coordinate
{
layer
*
batch_size
*
num_rnn_cell_states
,
src_iter_feature_size
});
...
...
@@ -732,7 +743,7 @@ void ngraph::runtime::cpu::pass::MultiLayerRNNFusion::construct_multi_layer_rnn_
// i.e {RNN7, RNN6, RNN5.... RNN0}
for
(
size_t
index
=
0
;
index
<
rnn_nodes
.
size
();
index
++
)
{
auto
goe_nodes
=
op
::
get_output_elements
(
rnn_nodes
[
index
]);
auto
goe_nodes
=
ngraph
::
op
::
get_output_elements
(
rnn_nodes
[
index
]);
// if there is no GOE followed by the Lstm, their might be pattern match error
// we will return safely
if
(
goe_nodes
.
size
()
!=
2
)
...
...
@@ -771,15 +782,17 @@ void ngraph::runtime::cpu::pass::MultiLayerRNNFusion::construct_multi_layer_rnn_
void
ngraph
::
runtime
::
cpu
::
pass
::
BiDirectionalRnn
::
construct_bidirectional_rnn
()
{
auto
rnn_left_to_right
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
1
,
256
},
pattern
::
has_class
<
op
::
Rnn
>
());
element
::
f32
,
Shape
{
1
,
256
},
pattern
::
has_class
<
ngraph
::
op
::
Rnn
>
());
auto
rnn_right_to_left
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
1
,
256
},
pattern
::
has_class
<
op
::
Rnn
>
());
element
::
f32
,
Shape
{
1
,
256
},
pattern
::
has_class
<
ngraph
::
op
::
Rnn
>
());
auto
reshape_pred
=
[](
std
::
shared_ptr
<
Node
>
n
)
{
return
(
std
::
dynamic_pointer_cast
<
op
::
Reshape
>
(
n
)
!=
nullptr
);
return
(
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Reshape
>
(
n
)
!=
nullptr
);
};
auto
rnn_left_to_right_goe0
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
rnn_left_to_right
,
0
);
auto
rnn_right_to_left_goe0
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
rnn_right_to_left
,
0
);
auto
rnn_left_to_right_goe0
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
rnn_left_to_right
,
0
);
auto
rnn_right_to_left_goe0
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
rnn_right_to_left
,
0
);
auto
rnn_rtol_goe0_reshape_ntc
=
std
::
make_shared
<
pattern
::
op
::
Skip
>
(
rnn_right_to_left_goe0
,
reshape_pred
);
...
...
@@ -791,21 +804,23 @@ void ngraph::runtime::cpu::pass::BiDirectionalRnn::construct_bidirectional_rnn()
std
::
make_shared
<
pattern
::
op
::
Skip
>
(
rnn_ltor_goe0_reshape_ntc
,
reshape_pred
);
auto
reverse_seq_predicate
=
[](
std
::
shared_ptr
<
Node
>
node
)
{
return
pattern
::
has_class
<
op
::
ReverseSequence
>
()(
node
)
||
pattern
::
has_class
<
op
::
Reverse
>
()(
node
);
return
pattern
::
has_class
<
ngraph
::
op
::
ReverseSequence
>
()(
node
)
||
pattern
::
has_class
<
ngraph
::
op
::
Reverse
>
()(
node
);
};
auto
skip_reverse_seq
=
std
::
make_shared
<
pattern
::
op
::
Skip
>
(
rnn_rtol_goe0_reshape_tnc
,
reverse_seq_predicate
);
auto
concat
=
std
::
make_shared
<
op
::
Concat
>
(
NodeVector
{
rnn_ltor_goe0_reshape_tnc
,
skip_reverse_seq
},
0
);
auto
concat
=
std
::
make_shared
<
ngraph
::
op
::
Concat
>
(
NodeVector
{
rnn_ltor_goe0_reshape_tnc
,
skip_reverse_seq
},
0
);
// Define a call back that needs to called once the DFG matches the pattern
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
[
rnn_left_to_right
,
rnn_right_to_left
](
pattern
::
Matcher
&
m
)
{
auto
pattern_map
=
m
.
get_pattern_map
();
auto
rnn_ltor_node
=
std
::
static_pointer_cast
<
op
::
Rnn
>
(
pattern_map
[
rnn_left_to_right
]);
auto
rnn_rtol_node
=
std
::
static_pointer_cast
<
op
::
Rnn
>
(
pattern_map
[
rnn_right_to_left
]);
auto
rnn_ltor_node
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Rnn
>
(
pattern_map
[
rnn_left_to_right
]);
auto
rnn_rtol_node
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Rnn
>
(
pattern_map
[
rnn_right_to_left
]);
if
(
rnn_ltor_node
->
get_src_sequence_length
()
!=
rnn_rtol_node
->
get_src_sequence_length
())
{
...
...
@@ -852,7 +867,7 @@ void ngraph::runtime::cpu::pass::BiDirectionalRnn::construct_bidirectional_rnn()
auto
nodes
=
NodeVector
{
rnn_ltor_node
->
get_argument
(
index
),
rnn_rtol_node
->
get_argument
(
index
)};
return
std
::
make_shared
<
op
::
Concat
>
(
nodes
,
0
);
return
std
::
make_shared
<
ngraph
::
op
::
Concat
>
(
nodes
,
0
);
};
auto
src_layer
=
rnn_ltor_node
->
get_arguments
()[
0
];
...
...
@@ -861,7 +876,7 @@ void ngraph::runtime::cpu::pass::BiDirectionalRnn::construct_bidirectional_rnn()
auto
weights_iter
=
construct_birnn_inputs
(
3
);
auto
bias
=
construct_birnn_inputs
(
4
);
auto
rnn
=
std
::
make_shared
<
op
::
Rnn
>
(
src_layer
,
auto
rnn
=
std
::
make_shared
<
ngraph
::
op
::
Rnn
>
(
src_layer
,
src_iter
,
weights_layer
,
weights_iter
,
...
...
@@ -874,7 +889,7 @@ void ngraph::runtime::cpu::pass::BiDirectionalRnn::construct_bidirectional_rnn()
num_fused_rnn_layers
,
rnn_type
);
auto
layer_rnn_ht
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
rnn
,
0
);
auto
layer_rnn_ht
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
rnn
,
0
);
size_t
batch_size
=
layer_rnn_ht
->
get_shape
()[
0
]
/
num_time_steps
;
size_t
feature_size
=
layer_rnn_ht
->
get_shape
()[
1
];
...
...
@@ -882,15 +897,15 @@ void ngraph::runtime::cpu::pass::BiDirectionalRnn::construct_bidirectional_rnn()
std
::
shared_ptr
<
Node
>
layer_rnn_ht_reshape
=
layer_rnn_ht
;
if
(
m
.
get_match_root
()
->
get_shape
()
!=
layer_rnn_ht
->
get_shape
())
{
layer_rnn_ht_reshape
=
std
::
make_shared
<
op
::
Reshape
>
(
layer_rnn_ht_reshape
=
std
::
make_shared
<
ngraph
::
op
::
Reshape
>
(
layer_rnn_ht
,
AxisVector
{
0
,
1
},
Shape
{
num_time_steps
,
batch_size
,
feature_size
});
}
// we will check if the node being replaced is in Shape{n, t, c}, if so we will transpose
if
(
m
.
get_match_root
()
->
get_shape
()
==
Shape
{
batch_size
,
num_time_steps
,
feature_size
})
{
layer_rnn_ht_reshape
=
std
::
make_shared
<
op
::
Reshape
>
(
layer_rnn_ht_reshape
,
layer_rnn_ht_reshape
=
std
::
make_shared
<
ngraph
::
op
::
Reshape
>
(
layer_rnn_ht_reshape
,
AxisVector
{
1
,
0
,
2
},
Shape
{
batch_size
,
num_time_steps
,
feature_size
});
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment