Commit 2d2b3b2f authored by Amy Zhuang's avatar Amy Zhuang Committed by Robert Kimball

Add CPU horizontal fusion pass for inception. (#1577)

* Add CPU horizontal fusion pass for inception.

* Name change.

* Move horizontal fusion to cpu_fusion.

* Change horizontal fusion pass for inception to a general horizontal fusion pass.
Add a unit test conv_horizontal_fusion to cpu_fusion.

* Rename files.

* Correct cpu_fusion.hpp.

* Add NGRAPH_DEBUG.

* Set native layout when input format of slice is nChw16c or nChw8c and lower bound of
channels is not a multiple of 16 or 8.
parent 7da3ec33
......@@ -102,6 +102,7 @@ set(SRC
pass/cpu_collapse_dims.cpp
pass/cpu_concat_inputs.cpp
pass/cpu_fusion.cpp
pass/cpu_horizontal_fusion.cpp
pass/cpu_layout.cpp
pass/cpu_loop_kernel_fusion.cpp
pass/cpu_mat_fusion.cpp
......
......@@ -160,6 +160,7 @@
#include "ngraph/runtime/cpu/pass/cpu_collapse_dims.hpp"
#include "ngraph/runtime/cpu/pass/cpu_concat_inputs.hpp"
#include "ngraph/runtime/cpu/pass/cpu_fusion.hpp"
#include "ngraph/runtime/cpu/pass/cpu_horizontal_fusion.hpp"
#include "ngraph/runtime/cpu/pass/cpu_layout.hpp"
#include "ngraph/runtime/cpu/pass/cpu_mat_fusion.hpp"
#include "ngraph/runtime/cpu/pass/cpu_post_layout_optimizations.hpp"
......@@ -387,6 +388,7 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes(ngraph::pass::Ma
pass_manager.register_pass<ngraph::pass::CommonSubexpressionElimination>();
pass_manager.register_pass<ngraph::pass::CoreFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUHorizontalFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUCollapseDims>();
NodeVector nv_cwi; // We dont need CPUWorkspaceInsertion to return list of indices
pass_manager.register_pass<runtime::cpu::pass::CPUWorkspaceInsertion>(nv_cwi, false);
......@@ -1138,7 +1140,6 @@ void runtime::cpu::CPU_ExternalFunction::build()
m_mkldnn_emitter.reset(new MKLDNNEmitter());
ngraph::pass::Manager pass_manager;
register_common_passes(pass_manager);
pass_manager.register_pass<ngraph::pass::Liveness>();
pass_manager.register_pass<ngraph::pass::MemoryLayout>(size_t(s_memory_pool_alignment), true);
pass_manager.run_passes(m_function, false);
......
......@@ -26,12 +26,14 @@
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/divide.hpp"
#include "ngraph/op/dot.hpp"
#include "ngraph/op/exp.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/max_pool.hpp"
#include "ngraph/op/maximum.hpp"
#include "ngraph/op/minimum.hpp"
#include "ngraph/op/multiply.hpp"
......@@ -41,6 +43,7 @@
#include "ngraph/op/relu.hpp"
#include "ngraph/op/reshape.hpp"
#include "ngraph/op/sigmoid.hpp"
#include "ngraph/op/slice.hpp"
#include "ngraph/op/sqrt.hpp"
#include "ngraph/op/subtract.hpp"
#include "ngraph/op/sum.hpp"
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/runtime/cpu/pass/cpu_horizontal_fusion.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp"
#include "ngraph/op/avg_pool.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/slice.hpp"
#include "ngraph/pass/graph_rewrite.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pattern/matcher.hpp"
#include "ngraph/pattern/op/label.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp"
using namespace ngraph;
using namespace std;
void ngraph::runtime::cpu::pass::CPUHorizontalFusion::cpu_conv_horizontal_fusion()
{
auto has_multiple_users = [](std::shared_ptr<Node> n) {
auto inputs = n->get_output_inputs(0);
return inputs.size() > 1;
};
auto data_conv = std::make_shared<pattern::op::Label>(
element::f32, Shape{1, 256, 35, 35}, has_multiple_users);
auto filters = std::make_shared<pattern::op::Label>(element::f32, Shape{64, 256, 1, 1});
auto bias = std::make_shared<pattern::op::Label>(element::f32, Shape{64});
auto conv_bias = std::make_shared<ngraph::op::ConvolutionBias>(data_conv,
filters,
bias,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1},
true);
pattern::graph_rewrite_callback callback = [data_conv](pattern::Matcher& m) {
NGRAPH_DEBUG << "conv_horizontal_fusion: In a callback for conv horizontal fusion for "
<< m.get_match_root()->get_name();
auto conv_bias_root = std::dynamic_pointer_cast<op::ConvolutionBias>(m.get_match_root());
//check if the node has been replaced
if (conv_bias_root->get_users().empty())
{
return false;
}
auto m_filters_shape = conv_bias_root->get_input_shape(1);
auto f_h = m_filters_shape[2];
auto f_w = m_filters_shape[3];
// get weights and bias from each CBR and create Concat nodes
std::vector<std::shared_ptr<Node>> weights_nodes;
std::vector<std::shared_ptr<Node>> bias_nodes;
std::vector<std::shared_ptr<Node>> conv_bias_nodes;
for (auto u : m.get_pattern_map()[data_conv]->get_users())
{
if (!pattern::has_class<ngraph::op::ConvolutionBias>()(u))
{
continue;
}
if (u->get_argument(0) != m.get_pattern_map()[data_conv])
{
NGRAPH_DEBUG << "conv_horizontal_fusion: data_conv is not input 0 for "
<< u->get_name() << "\n";
continue;
}
auto u_filters_shape = u->get_input_shape(1);
if (u_filters_shape[2] != f_h || u_filters_shape[3] != f_w)
{
NGRAPH_DEBUG
<< "conv_horizontal_fusion: skip conv node with different filter shape\n";
continue;
}
weights_nodes.push_back(u->get_argument(1));
bias_nodes.push_back(u->get_argument(2));
conv_bias_nodes.push_back(u);
}
if (conv_bias_nodes.size() <= 1)
{
NGRAPH_DEBUG << "conv_horizontal_fusion: need more than one nodes to do fusion\n";
return false;
}
auto concat_weights = std::make_shared<ngraph::op::Concat>(weights_nodes, 0);
auto concat_bias = std::make_shared<ngraph::op::Concat>(bias_nodes, 0);
auto conv_bias_new = std::make_shared<ngraph::op::ConvolutionBias>(
conv_bias_root->get_argument(0),
concat_weights,
concat_bias,
conv_bias_root->get_window_movement_strides(),
conv_bias_root->get_window_dilation_strides(),
conv_bias_root->get_padding_below(),
conv_bias_root->get_padding_above(),
conv_bias_root->get_data_dilation_strides(),
conv_bias_root->with_relu());
NGRAPH_DEBUG << "conv_horizontal_fusion: new cb shape "
<< conv_bias_new->get_output_shape(0) << "\n";
//slice
size_t index = 0;
for (auto cb : conv_bias_nodes)
{
auto slice_shape = cb->get_output_shape(0);
NGRAPH_DEBUG << "conv_horizontal_fusion: slice shape " << slice_shape << "\n";
auto lower_bounds = Coordinate{0, index, 0, 0};
index += slice_shape[1];
auto upper_bounds = Coordinate{slice_shape[0], index, slice_shape[2], slice_shape[2]};
NGRAPH_DEBUG << "conv_horizontal_fusion: lower_bounds " << lower_bounds << "\n";
NGRAPH_DEBUG << "conv_horizontal_fusion: upper_bounds " << upper_bounds << "\n";
auto slice =
std::make_shared<ngraph::op::Slice>(conv_bias_new, lower_bounds, upper_bounds);
ngraph::replace_node(cb, slice);
}
return true;
};
auto m = make_shared<pattern::Matcher>(conv_bias, callback);
this->add_matcher(m);
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/pass/graph_rewrite.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace pass
{
class CPUHorizontalFusion;
}
}
}
}
class ngraph::runtime::cpu::pass::CPUHorizontalFusion : public ngraph::pass::GraphRewrite
{
public:
CPUHorizontalFusion()
: GraphRewrite()
{
cpu_conv_horizontal_fusion();
}
private:
void cpu_conv_horizontal_fusion();
};
......@@ -1585,6 +1585,33 @@ namespace ngraph
auto result_format =
static_cast<mkldnn::memory::format>(input_md.data.format);
auto slice = static_cast<ngraph::op::Slice*>(node.get());
auto lower_bounds = slice->get_lower_bounds();
if (result_format == mkldnn::memory::nChw16c)
{
// check lower bound of channels
if (lower_bounds[1] % 16 != 0)
{
NGRAPH_DEBUG
<< "slice nChw16c: lower bound of channels not multiple of 16, "
"set native layout\n";
set_native_layouts(external_function, node);
return;
}
}
else if (result_format == mkldnn::memory::nChw8c)
{
// check lower bound of channels
if (lower_bounds[1] % 8 != 0)
{
NGRAPH_DEBUG
<< "slice nChw8C: lower bound of channels not multiple of 8,"
"set native layout\n";
set_native_layouts(external_function, node);
return;
}
}
vector<memory::desc> o_mds;
if (result_format == mkldnn::memory::blocked)
{
......
......@@ -890,6 +890,57 @@ TEST(cpu_fusion, conv_bias_relu_n2c1h2w2_2)
EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0)));
}
TEST(cpu_fusion, conv_horizontal_fusion)
{
Shape shape_a{2, 1, 6, 6};
Shape shape_weights{1, 1, 2, 2};
Shape shape_bias{1};
auto make_function = [shape_a, shape_weights, shape_bias]() {
auto A = std::make_shared<op::Parameter>(element::f32, shape_a);
auto weights1 = std::make_shared<op::Parameter>(element::f32, shape_weights);
auto conv1 = std::make_shared<op::Convolution>(A, weights1, Strides{2, 2}, Strides{1, 1});
auto bias1 = std::make_shared<op::Parameter>(element::f32, shape_bias);
auto conv_bias1 =
conv1 + std::make_shared<op::Broadcast>(bias1, conv1->get_shape(), AxisSet{0, 2, 3});
auto relu1 = std::make_shared<op::Relu>(conv_bias1);
auto weights2 = std::make_shared<op::Parameter>(element::f32, shape_weights);
auto conv2 = std::make_shared<op::Convolution>(A, weights2, Strides{2, 2}, Strides{1, 1});
auto bias2 = std::make_shared<op::Parameter>(element::f32, shape_bias);
auto conv_bias2 =
conv2 + std::make_shared<op::Broadcast>(bias2, conv2->get_shape(), AxisSet{0, 2, 3});
auto relu2 = std::make_shared<op::Relu>(conv_bias2);
auto concat = std::make_shared<op::Concat>(NodeVector{relu1, relu2}, 1);
auto f = make_shared<Function>(NodeVector{concat},
op::ParameterVector{A, weights1, bias1, weights2, bias2});
return f;
};
auto int_f = make_function();
auto cpu_f = make_function();
vector<vector<float>> args{
{1.25f, 2.25f, 5.25f, 6.25f, -1.25f, -1.25f, 3.25f, -4.25f, 7.25f, 8.25f, -1.25f,
-1.25f, 1.25f, 2.25f, -3.25f, 2.25f, 4.25f, 4.25f, 1.25f, 2.25f, -4.25f, 2.25f,
4.25f, 4.25f, 0.f, 0.f, -1.f, 0.f, 2.f, 2.f, 0.f, 0.f, 0.f,
0.f, 2.f, 2.f, 1.25f, 2.25f, 5.25f, 6.25f, 1.25f, 1.25f, 3.25f, 4.25f,
-7.25f, 8.25f, 1.25f, -1.25f, -1.25f, 2.25f, 3.25f, 2.25f, -4.25f, -4.25f, -1.25f,
-2.25f, 4.25f, 2.25f, 4.25f, 4.25f, 0.f, 0.f, 1.f, 0.f, -2.f, 2.f,
0.f, 0.f, 0.f, 0.f, -2.f, -2.f},
{2., 2., 2., 2.},
{0.1f},
{3., 3., 3., 3.},
{0.2f}};
auto int_results = execute(int_f, args, "INTERPRETER");
auto cpu_results = execute(cpu_f, args, "CPU");
EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0)));
size_t cpu_cb = count_ops_of_type<op::ConvolutionBias>(cpu_f);
ASSERT_EQ(cpu_cb, 1);
}
// ConvolutionBiasAdd relies on an in-place fused MKLDNN kernel.
// Need to ensure that it is fused only when in-place buffer allocation is feasible
shared_ptr<Function> gen_conv_bias_add(bool param_input, bool result_output)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment