Commit 37174c90 authored by gaurides's avatar gaurides Committed by Scott Cyphers

Add conv add fusion (#1526)

* Add conv add fusion

* Updated file permissions and cpu_fusion order

* Formatted code using maint/apply-code-format.sh

* Fixed minor review comments

* Use NODE_VALIDATION_ASSERT instead of throw ngraph_error;\nupgrade baseline and fix issues

* Some more fixes
parent 36e1de51
...@@ -85,6 +85,7 @@ set(SRC ...@@ -85,6 +85,7 @@ set(SRC
op/max_pool_with_indices.cpp op/max_pool_with_indices.cpp
op/rnn.cpp op/rnn.cpp
op/sigmoid_mul.cpp op/sigmoid_mul.cpp
op/conv_add.cpp
pass/cpu_assignment.cpp pass/cpu_assignment.cpp
pass/cpu_collapse_dims.cpp pass/cpu_collapse_dims.cpp
pass/cpu_concat_inputs.cpp pass/cpu_concat_inputs.cpp
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "ngraph/runtime/cpu/kernel/convolution.hpp" #include "ngraph/runtime/cpu/kernel/convolution.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp" #include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp" #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/conv_add.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp" #include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp" #include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp" #include "ngraph/runtime/cpu/op/group_conv.hpp"
...@@ -205,6 +206,36 @@ namespace ngraph ...@@ -205,6 +206,36 @@ namespace ngraph
} }
} }
template <>
void Builder::BUILDER_DECL(ngraph::op::ConvolutionAdd)
{
auto& functors = external_function->get_functors();
auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = mkldnn_emitter->build_convolution<ngraph::op::ConvolutionAdd>(
node, args, out);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, conv_index);
};
functors.emplace_back(functor);
}
else
{
throw ngraph_error("ConvolutionAdd is only supported with MKLDNN kernel.");
}
}
template <> template <>
void Builder::BUILDER_DECL(ngraph::op::ConvolutionBackpropData) void Builder::BUILDER_DECL(ngraph::op::ConvolutionBackpropData)
{ {
...@@ -516,6 +547,7 @@ namespace ngraph ...@@ -516,6 +547,7 @@ namespace ngraph
REGISTER_OP_BUILDER(ConvolutionBackpropFilters); REGISTER_OP_BUILDER(ConvolutionBackpropFilters);
REGISTER_OP_BUILDER(ConvolutionBiasBackpropFiltersBias); REGISTER_OP_BUILDER(ConvolutionBiasBackpropFiltersBias);
REGISTER_OP_BUILDER(GroupConvolution); REGISTER_OP_BUILDER(GroupConvolution);
REGISTER_OP_BUILDER(ConvolutionAdd);
} }
} }
} }
...@@ -99,6 +99,7 @@ ...@@ -99,6 +99,7 @@
#include "ngraph/runtime/cpu/op/batch_dot.hpp" #include "ngraph/runtime/cpu/op/batch_dot.hpp"
#include "ngraph/runtime/cpu/op/batch_norm_relu.hpp" #include "ngraph/runtime/cpu/op/batch_norm_relu.hpp"
#include "ngraph/runtime/cpu/op/bounded_relu.hpp" #include "ngraph/runtime/cpu/op/bounded_relu.hpp"
#include "ngraph/runtime/cpu/op/conv_add.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp" #include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp" #include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp" #include "ngraph/runtime/cpu/op/convert_layout.hpp"
...@@ -2960,6 +2961,31 @@ namespace ngraph ...@@ -2960,6 +2961,31 @@ namespace ngraph
} }
} }
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::ConvolutionAdd)
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = mkldnn_emitter->build_convolution<ngraph::op::ConvolutionAdd>(
node, args, out);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(conv_index) << ");\n";
}
else
{
throw ngraph_error("ConvolutionAdd is only supported with MKLDNN kernel.");
}
}
template <> template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::ConvolutionBiasBackpropFiltersBias) void CPU_Emitter::EMITTER_DECL(ngraph::op::ConvolutionBiasBackpropFiltersBias)
{ {
......
...@@ -139,6 +139,7 @@ ...@@ -139,6 +139,7 @@
#include "ngraph/runtime/cpu/op/batch_dot.hpp" #include "ngraph/runtime/cpu/op/batch_dot.hpp"
#include "ngraph/runtime/cpu/op/batch_norm_relu.hpp" #include "ngraph/runtime/cpu/op/batch_norm_relu.hpp"
#include "ngraph/runtime/cpu/op/bounded_relu.hpp" #include "ngraph/runtime/cpu/op/bounded_relu.hpp"
#include "ngraph/runtime/cpu/op/conv_add.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp" #include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp" #include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp" #include "ngraph/runtime/cpu/op/convert_layout.hpp"
...@@ -343,6 +344,8 @@ static const runtime::cpu::OpMap dispatcher{ ...@@ -343,6 +344,8 @@ static const runtime::cpu::OpMap dispatcher{
{TI(ngraph::runtime::cpu::op::LoopKernel), {TI(ngraph::runtime::cpu::op::LoopKernel),
&runtime::cpu::CPU_Emitter::emit<runtime::cpu::op::LoopKernel>}, &runtime::cpu::CPU_Emitter::emit<runtime::cpu::op::LoopKernel>},
{TI(ngraph::op::LRN), &runtime::cpu::CPU_Emitter::emit<ngraph::op::LRN>}, {TI(ngraph::op::LRN), &runtime::cpu::CPU_Emitter::emit<ngraph::op::LRN>},
{TI(ngraph::op::ConvolutionAdd), &runtime::cpu::CPU_Emitter::emit<op::ConvolutionAdd>},
}; };
static void static void
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp" #include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp" #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/bounded_relu.hpp" #include "ngraph/runtime/cpu/op/bounded_relu.hpp"
#include "ngraph/runtime/cpu/op/conv_add.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp" #include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp" #include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/shape.hpp" #include "ngraph/shape.hpp"
...@@ -132,7 +133,8 @@ namespace ngraph ...@@ -132,7 +133,8 @@ namespace ngraph
mkldnn::post_ops ops; mkldnn::post_ops ops;
if (std::is_same<OP, ngraph::op::ConvolutionBiasAdd>()) if (std::is_same<OP, ngraph::op::ConvolutionBiasAdd>() ||
std::is_same<OP, ngraph::op::ConvolutionAdd>())
{ {
ops.append_sum(1.f); ops.append_sum(1.f);
} }
...@@ -148,6 +150,11 @@ namespace ngraph ...@@ -148,6 +150,11 @@ namespace ngraph
return (dynamic_cast<const ngraph::op::ConvolutionBiasAdd*>(node)) return (dynamic_cast<const ngraph::op::ConvolutionBiasAdd*>(node))
->with_relu(); ->with_relu();
} }
if (dynamic_cast<const ngraph::op::ConvolutionAdd*>(node))
{
return (dynamic_cast<const ngraph::op::ConvolutionAdd*>(node))
->with_relu();
}
if (dynamic_cast<const ngraph::op::ConvolutionRelu*>(node)) if (dynamic_cast<const ngraph::op::ConvolutionRelu*>(node))
{ {
return true; return true;
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <numeric>
#include "conv_add.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/util.hpp"
using namespace std;
using namespace ngraph;
void op::util::validate_conv_shapes(const Node* node,
const Shape& data_shape,
const Shape& filters_shape)
{
NODE_VALIDATION_ASSERT(node, data_shape[1] == filters_shape[1])
<< "Number of channels for data and filters do not match (data num channels: "
<< data_shape[1] << ", filters num channels: " << filters_shape[1] << ").";
}
op::ConvolutionAdd::ConvolutionAdd(const std::shared_ptr<op::Convolution>& conv,
const std::shared_ptr<Node>& sum_input,
bool with_relu)
: Op("ConvolutionAdd",
check_single_output_args({conv->get_argument(0), conv->get_argument(1), sum_input}))
, m_window_movement_strides(conv->get_window_movement_strides())
, m_window_dilation_strides(conv->get_window_dilation_strides())
, m_padding_below(conv->get_padding_below())
, m_padding_above(conv->get_padding_above())
, m_data_dilation_strides(conv->get_data_dilation_strides())
, m_with_relu(with_relu)
{
constructor_validate_and_infer_types();
util::validate_conv_shapes(
this, conv->get_argument(0)->get_shape(), conv->get_argument(1)->get_shape());
set_output_type(0, conv->get_element_type(), conv->get_shape());
}
op::ConvolutionAdd::ConvolutionAdd(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const std::shared_ptr<Node>& sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
bool with_relu)
: Op("ConvolutionAdd", check_single_output_args({data_batch, filters, sum_input}))
, m_window_movement_strides(window_movement_strides)
, m_window_dilation_strides(window_dilation_strides)
, m_padding_below(padding_below)
, m_padding_above(padding_above)
, m_data_dilation_strides(data_dilation_strides)
, m_with_relu(with_relu)
{
constructor_validate_and_infer_types();
auto& data_batch_shape = data_batch->get_shape();
auto& data_batch_et = data_batch->get_element_type();
auto& filters_shape = filters->get_shape();
auto& filters_et = filters->get_element_type();
//
// Make sure data batch and filter element types match.
//
NODE_VALIDATION_ASSERT(this, data_batch_et == filters_et)
<< "Element types for data_batch and filters do not match (data batch element type: "
<< data_batch_et << ", filters element type: " << filters_et << ").";
util::validate_conv_shapes(this, data_batch_shape, filters_shape);
set_output_type(0,
data_batch_et,
util::infer_convolution_output_shape(this,
data_batch_shape,
filters_shape,
window_movement_strides,
window_dilation_strides,
padding_below,
padding_above,
data_dilation_strides,
0, /* batch_axis_data, */
1, /* input_channel_axis_data, */
1, /* input_channel_axis_filters, */
0, /* output_channel_axis_filters, */
0, /* batch_axis_result, */
1 /* output_channel_axis_result, */
));
}
std::shared_ptr<Node> op::ConvolutionAdd::copy_with_new_args(const NodeVector& new_args) const
{
NODE_VALIDATION_ASSERT(this, new_args.size() != 3)
<< "New arg size is not 3 (new args size: " << new_args.size() << ").";
return std::shared_ptr<Node>(new ConvolutionAdd(new_args.at(0),
new_args.at(1),
new_args.at(2),
get_window_movement_strides(),
get_window_dilation_strides(),
get_padding_below(),
get_padding_above(),
get_data_dilation_strides(),
m_with_relu));
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/op.hpp"
namespace ngraph
{
namespace op
{
class ConvolutionAdd : public Op
{
public:
ConvolutionAdd(const std::shared_ptr<op::Convolution>& conv,
const std::shared_ptr<Node>& sum_input,
bool with_relu);
ConvolutionAdd(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const std::shared_ptr<Node>& sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
bool with_relu);
const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
const Strides& get_window_dilation_strides() const { return m_window_dilation_strides; }
const CoordinateDiff& get_padding_below() const { return m_padding_below; }
const CoordinateDiff& get_padding_above() const { return m_padding_above; }
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
bool with_relu() const { return m_with_relu; }
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
protected:
Strides m_window_movement_strides;
Strides m_window_dilation_strides;
CoordinateDiff m_padding_below;
CoordinateDiff m_padding_above;
Strides m_data_dilation_strides;
bool m_with_relu;
};
namespace util
{
void validate_conv_shapes(const Node* node,
const Shape& data_shape,
const Shape& filters_shape);
}
}
}
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include "ngraph/runtime/cpu/mkldnn_utils.hpp" #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/batch_norm_relu.hpp" #include "ngraph/runtime/cpu/op/batch_norm_relu.hpp"
#include "ngraph/runtime/cpu/op/bounded_relu.hpp" #include "ngraph/runtime/cpu/op/bounded_relu.hpp"
#include "ngraph/runtime/cpu/op/conv_add.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp" #include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp" #include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/dequantize.hpp" #include "ngraph/runtime/cpu/op/dequantize.hpp"
...@@ -220,6 +221,33 @@ namespace ngraph ...@@ -220,6 +221,33 @@ namespace ngraph
} }
} }
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::ConvolutionAdd)
{
auto convolution = static_cast<op::ConvolutionAdd*>(node);
auto arg0_rank = node->get_input_shape(0).size();
auto arg1_rank = node->get_input_shape(1).size();
bool data_dilated = false;
for (size_t s : convolution->get_data_dilation_strides())
{
data_dilated = data_dilated || (s != 1);
}
if (!data_dilated && arg0_rank == 4 && arg1_rank == 4 &&
node->get_input_element_type(0) == element::f32)
{
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
const int ADD_INPUT = 2;
// Accumulates conv into the second input of the unfused add
op_annotations->add_in_place_oi_pair({0, ADD_INPUT, true});
convolution->set_op_annotations(op_annotations);
}
}
template <> template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::BatchNormRelu) void CPUAssignment::ASSIGN_DECL(ngraph::op::BatchNormRelu)
{ {
...@@ -727,6 +755,8 @@ static const runtime::cpu::pass::AssignOpMap s_dispatcher{ ...@@ -727,6 +755,8 @@ static const runtime::cpu::pass::AssignOpMap s_dispatcher{
{TI(ngraph::op::Lstm), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Lstm>}, {TI(ngraph::op::Lstm), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Lstm>},
{TI(ngraph::op::Rnn), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Rnn>}, {TI(ngraph::op::Rnn), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Rnn>},
{TI(ngraph::op::Softmax), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Softmax>}, {TI(ngraph::op::Softmax), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Softmax>},
{TI(ngraph::op::ConvolutionAdd),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::ConvolutionAdd>},
{TI(ngraph::op::Dequantize), {TI(ngraph::op::Dequantize),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Dequantize>}, &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Dequantize>},
}; };
......
...@@ -50,6 +50,7 @@ ...@@ -50,6 +50,7 @@
#include "ngraph/pattern/op/skip.hpp" #include "ngraph/pattern/op/skip.hpp"
#include "ngraph/runtime/cpu/op/batch_norm_relu.hpp" #include "ngraph/runtime/cpu/op/batch_norm_relu.hpp"
#include "ngraph/runtime/cpu/op/bounded_relu.hpp" #include "ngraph/runtime/cpu/op/bounded_relu.hpp"
#include "ngraph/runtime/cpu/op/conv_add.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp" #include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp" #include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/matmul_bias.hpp" #include "ngraph/runtime/cpu/op/matmul_bias.hpp"
...@@ -993,6 +994,143 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_relu() ...@@ -993,6 +994,143 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_relu()
this->add_matcher(m); this->add_matcher(m);
} }
void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_add()
{
Shape shape{2, 2, 1, 1};
auto data_batch = std::make_shared<pattern::op::Label>(element::f32, shape);
auto filters = std::make_shared<pattern::op::Label>(element::f32, shape);
auto pconv = std::make_shared<op::Convolution>(data_batch,
filters,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto add_input = std::make_shared<pattern::op::Label>(element::f32, pconv->get_shape());
auto padd = std::make_shared<op::Add>(add_input, pconv);
pattern::graph_rewrite_callback callback = [data_batch, filters](pattern::Matcher& m) {
NGRAPH_DEBUG << "In a callback for construct_conv_add against "
<< m.get_match_root()->get_name();
auto add_m = m.get_match_root();
auto pattern_map = m.get_pattern_map();
auto conv_m = std::dynamic_pointer_cast<op::Convolution>(add_m->get_argument(1));
auto inplace_input = add_m->get_argument(0);
if (!conv_m)
{
conv_m = std::dynamic_pointer_cast<op::Convolution>(add_m->get_argument(0));
inplace_input = add_m->get_argument(1);
}
//These checks are to make sure a MKLDNN Convolution kernel can be used.
bool data_dilated = false;
for (size_t s : conv_m->get_data_dilation_strides())
{
data_dilated = data_dilated || (s != 1);
}
if (data_dilated)
{
NGRAPH_DEBUG << "Convolution has dilations greater than 1";
return false;
}
if (conv_m->get_element_type() != element::f32)
{
NGRAPH_DEBUG << "Convolution isn't of type float";
return false;
}
auto arg0_rank = conv_m->get_input_shape(0).size();
auto arg1_rank = conv_m->get_input_shape(1).size();
if (arg0_rank != 4 || arg1_rank != 4)
{
NGRAPH_DEBUG << "Convolution's arguments ranks aren't equal to 4";
return false;
}
if (get_user_count(conv_m.get()) > 1)
{
NGRAPH_DEBUG << "Convolution has more than one user";
return false;
}
if (!is_post_dominated(inplace_input.get(), add_m.get()))
{
NGRAPH_DEBUG << "Unsafe to use in-place kernel since add's in-place input has "
"potential live users";
return false;
}
if (inplace_input->is_parameter())
{
NGRAPH_DEBUG
<< "Unsafe to use in-place kernel since add's in-place input is a parameter";
return false;
}
auto conv_add = std::shared_ptr<Node>(new op::ConvolutionAdd(conv_m, inplace_input, false));
ngraph::replace_node(m.get_match_root(), conv_add);
return true;
};
auto m = std::make_shared<pattern::Matcher>(padd, callback, "conv_add");
this->add_matcher(m);
}
void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_add_relu()
{
Shape shape{2, 2, 1, 1};
auto data_batch = std::make_shared<pattern::op::Label>(element::f32, shape);
auto filters = std::make_shared<pattern::op::Label>(element::f32, shape);
auto add_input = std::make_shared<pattern::op::Label>(element::f32, shape);
auto pconv = std::make_shared<op::ConvolutionAdd>(data_batch,
filters,
add_input,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1},
false);
auto prelu = std::make_shared<op::Relu>(pconv);
pattern::graph_rewrite_callback callback = [](pattern::Matcher& m) {
NGRAPH_DEBUG << "In a callback for construct_conv_add_relu against "
<< m.get_match_root()->get_name();
auto conv_m =
std::dynamic_pointer_cast<op::ConvolutionAdd>(m.get_match_root()->get_argument(0));
if (conv_m->get_users().size() > 1)
{
NGRAPH_DEBUG << "Convolution has more than one user";
return false;
}
// ConvolutionAdd created only if it can run with MKLDNN.
// No further checks needed.
auto conv_n = std::make_shared<op::ConvolutionAdd>(conv_m->get_argument(0),
conv_m->get_argument(1),
conv_m->get_argument(2),
conv_m->get_window_movement_strides(),
conv_m->get_window_dilation_strides(),
conv_m->get_padding_below(),
conv_m->get_padding_above(),
conv_m->get_data_dilation_strides(),
true);
ngraph::replace_node(m.get_match_root(), conv_n);
return true;
};
auto m = std::make_shared<pattern::Matcher>(prelu, callback, "conv_add_relu");
this->add_matcher(m);
}
void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_add() void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_add()
{ {
Shape shape{2, 2, 1, 1}; Shape shape{2, 2, 1, 1};
...@@ -1074,17 +1212,6 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_add() ...@@ -1074,17 +1212,6 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_add()
return false; return false;
} }
for (auto add_user : m.get_match_root()->get_users())
{
if (add_user->is_output())
{
// TODO: Remove restriction once we handle this case in codegen
NGRAPH_DEBUG
<< "Unsafe to use in-place kernel since add's in-place output is a result";
return false;
}
}
auto conv_add = auto conv_add =
std::shared_ptr<Node>(new op::ConvolutionBiasAdd(conv_m, inplace_input, false)); std::shared_ptr<Node>(new op::ConvolutionBiasAdd(conv_m, inplace_input, false));
ngraph::replace_node(m.get_match_root(), conv_add); ngraph::replace_node(m.get_match_root(), conv_add);
......
...@@ -49,6 +49,12 @@ public: ...@@ -49,6 +49,12 @@ public:
CPUFusion(int fusions = ALL) CPUFusion(int fusions = ALL)
: GraphRewrite() : GraphRewrite()
{ {
if (fusions & DIFFERENTIABLE_FUSIONS)
{
construct_conv_bias();
construct_sigmoid_multiply();
}
if (fusions & REGULAR_FUSIONS) if (fusions & REGULAR_FUSIONS)
{ {
construct_matmul(); construct_matmul();
...@@ -65,12 +71,9 @@ public: ...@@ -65,12 +71,9 @@ public:
construct_conv_bias_add(); construct_conv_bias_add();
construct_conv_bias_add_relu(); construct_conv_bias_add_relu();
construct_bounded_relu(); construct_bounded_relu();
} // construct_conv_add() should always be after construct_conv_bias()
construct_conv_add();
if (fusions & DIFFERENTIABLE_FUSIONS) construct_conv_add_relu();
{
construct_conv_bias();
construct_sigmoid_multiply();
} }
} }
...@@ -90,5 +93,7 @@ private: ...@@ -90,5 +93,7 @@ private:
void construct_conv_bias_relu(); void construct_conv_bias_relu();
void construct_conv_bias_add(); void construct_conv_bias_add();
void construct_conv_bias_add_relu(); void construct_conv_bias_add_relu();
void construct_conv_add();
void construct_conv_add_relu();
void construct_bounded_relu(); void construct_bounded_relu();
}; };
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#include "ngraph/runtime/cpu/mkldnn_utils.hpp" #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/batch_norm_relu.hpp" #include "ngraph/runtime/cpu/op/batch_norm_relu.hpp"
#include "ngraph/runtime/cpu/op/bounded_relu.hpp" #include "ngraph/runtime/cpu/op/bounded_relu.hpp"
#include "ngraph/runtime/cpu/op/conv_add.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp" #include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp" #include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp" #include "ngraph/runtime/cpu/op/convert_layout.hpp"
...@@ -475,6 +476,26 @@ namespace ngraph ...@@ -475,6 +476,26 @@ namespace ngraph
} }
} }
template <>
void CPULayout::LAYOUT_DECL(ngraph::op::ConvolutionAdd)
{
if (mkldnn_utils::use_mkldnn_kernel(node.get()))
{
vector<memory::desc> i_mds;
vector<memory::desc> o_mds;
ConvolutionLayout<ngraph::op::ConvolutionAdd, false, false>(
node, i_mds, o_mds);
// Force second input to sum to use the same layout as convolution output
i_mds.push_back(o_mds[0]);
node = insert_input_conversions(external_function, node, i_mds);
set_output_layouts(node, o_mds);
}
else
{
throw ngraph_error("ConvolutionAdd only supported in MKLDNN for now");
}
}
template <> template <>
void CPULayout::LAYOUT_DECL(ngraph::op::ConvolutionBackpropData) void CPULayout::LAYOUT_DECL(ngraph::op::ConvolutionBackpropData)
{ {
...@@ -1608,6 +1629,8 @@ static const runtime::cpu::pass::LayoutOpMap s_dispatcher{ ...@@ -1608,6 +1629,8 @@ static const runtime::cpu::pass::LayoutOpMap s_dispatcher{
{TI(ngraph::op::Rnn), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Rnn>}, {TI(ngraph::op::Rnn), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Rnn>},
{TI(ngraph::op::Softmax), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Softmax>}, {TI(ngraph::op::Softmax), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Softmax>},
{TI(ngraph::op::BoundedRelu), &runtime::cpu::pass::CPULayout::layout<ngraph::op::BoundedRelu>}, {TI(ngraph::op::BoundedRelu), &runtime::cpu::pass::CPULayout::layout<ngraph::op::BoundedRelu>},
{TI(ngraph::op::ConvolutionAdd),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::ConvolutionAdd>},
{TI(ngraph::op::Dequantize), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Dequantize>}, {TI(ngraph::op::Dequantize), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Dequantize>},
}; };
......
...@@ -50,6 +50,7 @@ ...@@ -50,6 +50,7 @@
#include "ngraph/runtime/cpu/op/batch_dot.hpp" #include "ngraph/runtime/cpu/op/batch_dot.hpp"
#include "ngraph/runtime/cpu/op/batch_norm_relu.hpp" #include "ngraph/runtime/cpu/op/batch_norm_relu.hpp"
#include "ngraph/runtime/cpu/op/bounded_relu.hpp" #include "ngraph/runtime/cpu/op/bounded_relu.hpp"
#include "ngraph/runtime/cpu/op/conv_add.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp" #include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp" #include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp" #include "ngraph/runtime/cpu/op/convert_layout.hpp"
...@@ -924,7 +925,7 @@ TEST(cpu_fusion, fuse_conv_bias_add) ...@@ -924,7 +925,7 @@ TEST(cpu_fusion, fuse_conv_bias_add)
ASSERT_EQ(count_ops_of_type<op::ConvolutionBiasAdd>(func_nofuse1), 0); ASSERT_EQ(count_ops_of_type<op::ConvolutionBiasAdd>(func_nofuse1), 0);
pass_manager.run_passes(func_nofuse2); pass_manager.run_passes(func_nofuse2);
ASSERT_EQ(count_ops_of_type<op::ConvolutionBiasAdd>(func_nofuse2), 0); ASSERT_EQ(count_ops_of_type<op::ConvolutionBiasAdd>(func_nofuse2), 1);
} }
TEST(cpu_fusion, conv_bias_add) TEST(cpu_fusion, conv_bias_add)
...@@ -942,6 +943,61 @@ TEST(cpu_fusion, conv_bias_add) ...@@ -942,6 +943,61 @@ TEST(cpu_fusion, conv_bias_add)
EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0)));
} }
// ConvolutionAdd relies on an in-place fused MKLDNN kernel.
// Need to ensure that it is fused only when in-place buffer allocation is feasible
shared_ptr<Function> gen_conv_add(bool param_input, bool result_output)
{
auto A = make_shared<op::Parameter>(element::f32, Shape{2, 1, 2, 2});
auto weights = make_shared<op::Parameter>(element::f32, Shape{1, 1, 1, 1});
auto conv = make_shared<op::Convolution>(A, weights, Strides{1, 1}, Strides{1, 1});
auto B = make_shared<op::Parameter>(element::f32, Shape{2, 1, 2, 2});
auto abs_B = make_shared<op::Abs>(B);
auto add = param_input ? make_shared<op::Add>(conv, B) : make_shared<op::Add>(conv, abs_B);
auto abs = make_shared<op::Abs>(add);
return result_output ? make_shared<Function>(add, op::ParameterVector{A, weights, B})
: make_shared<Function>(abs, op::ParameterVector{A, weights, B});
}
TEST(cpu_fusion, fuse_conv_add)
{
auto func_fuse = gen_conv_add(false, false);
auto func_nofuse1 = gen_conv_add(true, false);
auto func_nofuse2 = gen_conv_add(false, true);
pass::Manager pass_manager;
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
pass_manager.run_passes(func_fuse);
ASSERT_EQ(count_ops_of_type<op::ConvolutionAdd>(func_fuse), 1);
pass_manager.run_passes(func_nofuse1);
ASSERT_EQ(count_ops_of_type<op::ConvolutionAdd>(func_nofuse1), 0);
pass_manager.run_passes(func_nofuse2);
ASSERT_EQ(count_ops_of_type<op::ConvolutionAdd>(func_nofuse2), 1);
}
TEST(cpu_fusion, conv_add)
{
auto int_f = gen_conv_add(false, false);
auto cpu_f = gen_conv_add(false, false);
vector<vector<float>> args{{1.25f, 2.25f, 5.25f, 6.25f, -1.25f, -1.25f, 3.25f, -4.25f},
{-1.25f},
{1.25f, 2.25f, -3.25f, 2.25f, 4.25f, 4.25f, 1.25f, 2.25f}};
auto int_results = execute(int_f, args, "INTERPRETER");
auto cpu_results = execute(cpu_f, args, "CPU");
EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0)));
int_f = gen_conv_add(false, true);
cpu_f = gen_conv_add(false, true);
int_results = execute(int_f, args, "INTERPRETER");
cpu_results = execute(cpu_f, args, "CPU");
EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0)));
}
std::vector<shared_ptr<runtime::TensorView>> std::vector<shared_ptr<runtime::TensorView>>
rnn_matrix_fusion_eval(const size_t time_steps, rnn_matrix_fusion_eval(const size_t time_steps,
const Shape& data_shape, const Shape& data_shape,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment