Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
0d125c51
Commit
0d125c51
authored
May 21, 2018
by
Jayaram Bobba
Committed by
Scott Cyphers
May 21, 2018
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Batch norm folding (#992)
* Batch norm folding * Addressed PR feedback * Style fixes * Style fix
parent
39d0453f
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
220 additions
and
0 deletions
+220
-0
core_fusion.cpp
src/ngraph/pass/core_fusion.cpp
+89
-0
core_fusion.hpp
src/ngraph/pass/core_fusion.hpp
+2
-0
cpu_fusion.cpp
src/ngraph/runtime/cpu/pass/cpu_fusion.cpp
+68
-0
cpu_fusion.hpp
src/ngraph/runtime/cpu/pass/cpu_fusion.hpp
+2
-0
cpu_fusion.cpp
test/cpu_fusion.cpp
+59
-0
No files found.
src/ngraph/pass/core_fusion.cpp
View file @
0d125c51
...
...
@@ -21,11 +21,19 @@
#include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp"
#include "ngraph/op/add.hpp"
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/divide.hpp"
#include "ngraph/op/maximum.hpp"
#include "ngraph/op/multiply.hpp"
#include "ngraph/op/negative.hpp"
#include "ngraph/op/parameter.hpp"
#include "ngraph/op/relu.hpp"
#include "ngraph/op/sqrt.hpp"
#include "ngraph/op/subtract.hpp"
#include "ngraph/pass/graph_rewrite.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pattern/matcher.hpp"
...
...
@@ -73,3 +81,84 @@ void pass::CoreFusion::construct_relu()
auto
m
=
make_shared
<
pattern
::
Matcher
>
(
max
,
callback
);
this
->
add_matcher
(
m
);
}
void
pass
::
CoreFusion
::
construct_folded_batch_norm
()
{
Shape
shape
{
2
,
2
,
1
,
1
};
auto
input
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape
);
auto
filters
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape
);
auto
pconv
=
std
::
make_shared
<
op
::
Convolution
>
(
input
,
filters
,
Strides
{
1
,
1
},
Strides
{
1
,
1
},
CoordinateDiff
{
0
,
0
},
CoordinateDiff
{
0
,
0
},
Strides
{
1
,
1
});
auto
mean_shape
=
Shape
{
2
};
auto
mean
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
mean_shape
);
auto
var_shape
=
Shape
{
2
};
auto
var
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
var_shape
);
auto
gamma_shape
=
Shape
{
2
};
auto
gamma
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
gamma_shape
);
auto
beta_shape
=
Shape
{
2
};
auto
beta
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
beta_shape
);
double
eps
=
0.001
;
auto
shape_r
=
Shape
{
1
,
2
,
2
,
2
};
auto
bn
=
std
::
make_shared
<
op
::
BatchNorm
>
(
eps
,
gamma
,
beta
,
pconv
,
mean
,
var
);
ngraph
::
pattern
::
graph_rewrite_callback
callback
=
[
input
,
filters
,
mean
,
var
,
gamma
,
beta
](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In callback for folded batch norm against node = "
<<
m
.
get_match_root
()
->
get_name
();
auto
pattern_map
=
m
.
get_pattern_map
();
auto
m_bn
=
std
::
dynamic_pointer_cast
<
op
::
BatchNorm
>
(
m
.
get_match_root
());
auto
m_conv
=
std
::
dynamic_pointer_cast
<
op
::
Convolution
>
(
m_bn
->
get_argument
(
2
));
if
(
m_conv
->
get_users
().
size
()
>
1
)
{
return
false
;
}
if
(
m_conv
->
get_shape
().
size
()
!=
4
)
{
return
false
;
}
// new weights = old weights * gamma / sqrt(variance + epsilon)
// new biases = -mean * gamma / sqrt(variance + epsilon) + beta
auto
bn_eps
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
m_bn
->
get_eps_value
()});
auto
var_eps
=
std
::
make_shared
<
op
::
Add
>
(
pattern_map
[
var
],
std
::
make_shared
<
op
::
Broadcast
>
(
bn_eps
,
pattern_map
[
var
]
->
get_shape
(),
AxisSet
{
0
}));
auto
sqrt_var_eps
=
std
::
make_shared
<
op
::
Sqrt
>
(
var_eps
);
auto
mean_gamma
=
std
::
make_shared
<
op
::
Multiply
>
(
pattern_map
[
mean
],
pattern_map
[
gamma
]);
auto
new_biases
=
std
::
make_shared
<
op
::
Subtract
>
(
pattern_map
[
beta
],
std
::
make_shared
<
op
::
Divide
>
(
mean_gamma
,
sqrt_var_eps
));
auto
weight_scaling
=
std
::
make_shared
<
op
::
Divide
>
(
pattern_map
[
gamma
],
sqrt_var_eps
);
auto
new_weights
=
std
::
make_shared
<
op
::
Multiply
>
(
pattern_map
[
filters
],
std
::
make_shared
<
op
::
Broadcast
>
(
weight_scaling
,
pattern_map
[
filters
]
->
get_shape
(),
AxisSet
{
1
,
2
,
3
}));
auto
conv
=
std
::
make_shared
<
op
::
Convolution
>
(
pattern_map
[
input
],
new_weights
,
m_conv
->
get_window_movement_strides
(),
m_conv
->
get_window_dilation_strides
(),
m_conv
->
get_padding_below
(),
m_conv
->
get_padding_above
(),
m_conv
->
get_data_dilation_strides
());
auto
conv_bias
=
conv
+
std
::
make_shared
<
op
::
Broadcast
>
(
new_biases
,
conv
->
get_shape
(),
AxisSet
{
0
,
2
,
3
});
ngraph
::
replace_node
(
m
.
get_match_root
(),
conv_bias
);
return
true
;
};
auto
m
=
std
::
make_shared
<
ngraph
::
pattern
::
Matcher
>
(
bn
,
callback
);
this
->
add_matcher
(
m
);
}
src/ngraph/pass/core_fusion.hpp
View file @
0d125c51
...
...
@@ -33,6 +33,8 @@ public:
:
GraphRewrite
()
{
construct_relu
();
construct_folded_batch_norm
();
}
void
construct_relu
();
void
construct_folded_batch_norm
();
};
src/ngraph/runtime/cpu/pass/cpu_fusion.cpp
View file @
0d125c51
...
...
@@ -943,3 +943,71 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_relu()
auto
m
=
std
::
make_shared
<
pattern
::
Matcher
>
(
prelu
,
callback
);
this
->
add_matcher
(
m
);
}
void
ngraph
::
runtime
::
cpu
::
pass
::
CPUFusion
::
construct_conv_bias_relu
()
{
Shape
shape
{
2
,
2
,
1
,
1
};
auto
data_batch
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape
);
auto
filters
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape
);
auto
bias
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
Shape
{
1
});
auto
conv_bias
=
std
::
make_shared
<
op
::
ConvolutionBias
>
(
data_batch
,
filters
,
bias
,
Strides
{
1
,
1
},
Strides
{
1
,
1
},
CoordinateDiff
{
0
,
0
},
CoordinateDiff
{
0
,
0
},
Strides
{
1
,
1
});
auto
prelu
=
std
::
make_shared
<
op
::
Relu
>
(
conv_bias
);
pattern
::
graph_rewrite_callback
callback
=
[](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In a callback for construct_conv_relu against "
<<
m
.
get_match_root
()
->
get_name
();
auto
conv
=
std
::
dynamic_pointer_cast
<
op
::
ConvolutionBias
>
(
m
.
get_match_root
()
->
get_argument
(
0
));
//These checks are to make sure a MKLDNN Convolution kernel can be used.
bool
data_dilated
=
false
;
for
(
size_t
s
:
conv
->
get_data_dilation_strides
())
{
data_dilated
=
data_dilated
||
(
s
!=
1
);
}
if
(
data_dilated
)
{
NGRAPH_DEBUG
<<
"Convolution has dilations greater than 1"
;
return
false
;
}
if
(
conv
->
get_element_type
()
!=
element
::
f32
)
{
NGRAPH_DEBUG
<<
"Convolution isn't of type float"
;
return
false
;
}
auto
arg0_rank
=
conv
->
get_input_shape
(
0
).
size
();
auto
arg1_rank
=
conv
->
get_input_shape
(
1
).
size
();
if
(
arg0_rank
!=
4
||
arg1_rank
!=
4
)
{
NGRAPH_DEBUG
<<
"Convolution's arguments ranks aren't equal to 4"
;
return
false
;
}
if
(
conv
->
get_users
().
size
()
>
1
)
{
NGRAPH_DEBUG
<<
"Convolution has more than one user"
;
return
false
;
}
auto
conv_relu
=
std
::
shared_ptr
<
Node
>
(
new
op
::
ConvolutionBiasRelu
(
conv
));
ngraph
::
replace_node
(
m
.
get_match_root
(),
conv_relu
);
return
true
;
};
auto
m
=
std
::
make_shared
<
pattern
::
Matcher
>
(
prelu
,
callback
);
this
->
add_matcher
(
m
);
}
src/ngraph/runtime/cpu/pass/cpu_fusion.hpp
View file @
0d125c51
...
...
@@ -63,6 +63,7 @@ public:
construct_batch_norm_relu
();
construct_batch_norm_relu_global_stats
();
construct_conv_relu
();
construct_conv_bias_relu
();
}
if
(
fusions
&
DIFFERENTIABLE_FUSIONS
)
...
...
@@ -84,4 +85,5 @@ private:
void
construct_batch_norm_relu
();
void
construct_batch_norm_relu_global_stats
();
void
construct_conv_relu
();
void
construct_conv_bias_relu
();
};
test/cpu_fusion.cpp
View file @
0d125c51
...
...
@@ -1234,3 +1234,62 @@ TEST(cpu_fusion, backwards_maxpool_with_indices_n4_c1_hw4_2x2_max)
backend
->
call
(
df
,
{
output
},
{
input
,
ep
});
ASSERT_TRUE
(
read_vector
<
float
>
(
output
)
==
expected
);
}
TEST
(
cpu_fusion
,
batch_norm_folding
)
{
Shape
shape_input
{
1
,
8
,
3
,
3
};
Shape
shape_weights
{
2
,
8
,
1
,
1
};
Shape
shape_norm
{
2
};
auto
make_function
=
[
shape_input
,
shape_weights
,
shape_norm
]()
{
auto
input
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_input
);
auto
weights
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_weights
);
double
eps
=
0.001
;
auto
gamma
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_norm
);
auto
beta
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_norm
);
auto
mean
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_norm
);
auto
var
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_norm
);
auto
conv
=
std
::
make_shared
<
op
::
Convolution
>
(
input
,
weights
,
Strides
{
1
,
1
},
Strides
{
1
,
1
});
auto
bn
=
std
::
make_shared
<
op
::
BatchNorm
>
(
eps
,
gamma
,
beta
,
conv
,
mean
,
var
);
auto
f
=
make_shared
<
Function
>
(
NodeVector
{
bn
},
op
::
ParameterVector
{
input
,
weights
,
gamma
,
beta
,
mean
,
var
});
return
f
;
};
auto
int_f
=
make_function
();
auto
cpu_f
=
make_function
();
vector
<
vector
<
float
>>
args
{
{
1.25
f
,
2.25
f
,
5.25
f
,
6.25
f
,
-
1.25
f
,
-
1.25
f
,
3.25
f
,
-
4.25
f
,
7.25
f
,
8.25
f
,
-
1.25
f
,
-
1.25
f
,
1.25
f
,
2.25
f
,
-
3.25
f
,
2.25
f
,
4.25
f
,
4.25
f
,
1.25
f
,
2.25
f
,
-
4.25
f
,
2.25
f
,
4.25
f
,
4.25
f
,
0.
f
,
0.
f
,
-
1.
f
,
0.
f
,
2.
f
,
2.
f
,
0.
f
,
0.
f
,
0.
f
,
0.
f
,
2.
f
,
2.
f
,
1.25
f
,
2.25
f
,
5.25
f
,
6.25
f
,
1.25
f
,
1.25
f
,
3.25
f
,
4.25
f
,
-
7.25
f
,
8.25
f
,
1.25
f
,
-
1.25
f
,
-
1.25
f
,
2.25
f
,
3.25
f
,
2.25
f
,
-
4.25
f
,
-
4.25
f
,
-
1.25
f
,
-
2.25
f
,
4.25
f
,
2.25
f
,
4.25
f
,
4.25
f
,
0.
f
,
0.
f
,
1.
f
,
0.
f
,
-
2.
f
,
2.
f
,
0.
f
,
0.
f
,
0.
f
,
0.
f
,
-
2.
f
,
-
2.
f
},
{
1.25
f
,
2.25
f
,
5.25
f
,
6.25
f
,
-
1.25
f
,
-
1.25
f
,
3.25
f
,
-
4.25
f
,
7.25
f
,
8.25
f
,
-
1.25
f
,
0.
f
,
0.
f
,
0.
f
,
0.
f
,
-
2.
f
},
{
-
0.9384
f
,
0.01875
f
},
{
11.0
f
,
1.3
f
},
{
0.12
f
,
0.31
f
},
{
0.01
f
,
0.11
f
},
};
auto
int_results
=
execute
(
int_f
,
args
,
"INTERPRETER"
);
auto
cpu_results
=
execute
(
cpu_f
,
args
,
"CPU"
);
EXPECT_TRUE
(
test
::
all_close
(
cpu_results
.
at
(
0
),
int_results
.
at
(
0
)));
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment