Commit c637d629 authored by gaurides's avatar gaurides Committed by Robert Kimball

Gauri/groupconv batchnorm (#1900)

* Initial implementation of GroupConv+BatchNorm fusion

* Added GroupConv+BatchNorm with Relu fusion

* Added changes to fuse with BoundedRelu

* Changed BoundedRelu to Relu

* Added test; Code cleanup

* Code formatting

* Removed dead code

* Added test cases and other misc

* Bug fix in group conv callback and general cleanup

* Address PR feedback

* Minor edit to comment. MKLDNN divides both input and output channels by groups

* Style fixes and PR feedback
parent 2a49f1c8
......@@ -82,6 +82,7 @@ set(SRC
op/batch_norm_relu.cpp
op/bounded_relu.cpp
op/group_conv.cpp
op/group_conv_bias.cpp
op/halide_op.cpp
op/conv_bias.cpp
op/conv_relu.cpp
......
......@@ -19,6 +19,7 @@
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/group_conv_bias.hpp"
using namespace std;
using namespace ngraph;
......@@ -51,17 +52,27 @@ namespace ngraph
else if (input_desc.data.format == mkldnn_nchw && input_desc.data.ndims == 4 &&
result_desc.data.ndims == 5 && node->get_users().size() == 1)
{
auto gconv = std::dynamic_pointer_cast<ngraph::op::GroupConvolution>(
*(begin(node->get_users())));
if (gconv)
Shape weights_shape_groups;
if (auto gconv = std::dynamic_pointer_cast<ngraph::op::GroupConvolution>(
node->get_users()[0]))
{
Shape weights_shape_groups = gconv->get_weights_dimensions();
input_desc = mkldnn::memory::desc(
mkldnn::memory::dims(weights_shape_groups.begin(),
weights_shape_groups.end()),
mkldnn_utils::get_mkldnn_data_type(args[0].get_element_type()),
mkldnn::memory::format::goihw);
weights_shape_groups = gconv->get_weights_dimensions();
}
else if (auto gconvb =
std::dynamic_pointer_cast<ngraph::op::GroupConvolutionBias>(
node->get_users()[0]))
{
weights_shape_groups = gconvb->get_weights_dimensions();
}
else
{
throw ngraph_error("Incompatible input/output shape in ConvertLayout op");
}
input_desc = mkldnn::memory::desc(
mkldnn::memory::dims(weights_shape_groups.begin(),
weights_shape_groups.end()),
mkldnn_utils::get_mkldnn_data_type(args[0].get_element_type()),
mkldnn::memory::format::goihw);
}
size_t reorder_index = mkldnn_emitter->build_reorder(input_desc, result_desc);
......
......@@ -23,6 +23,7 @@
#include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/group_conv_bias.hpp"
using namespace std;
using namespace ngraph;
......@@ -489,6 +490,81 @@ namespace ngraph
}
}
template <>
void Builder::BUILDER_DECL(ngraph::op::GroupConvolutionBias)
{
auto& functors = external_function->get_functors();
auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto& arg2_tensor = external_function->get_tensor_data(args[2].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto convolution = static_cast<const ngraph::op::GroupConvolutionBias*>(node);
auto arg0_shape = args[0].get_shape();
auto arg1_shape = args[1].get_shape();
auto arg2_shape = args[2].get_shape();
auto result_shape = out[0].get_shape();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto padding_below = convolution->get_padding_below();
auto padding_above = convolution->get_padding_above();
auto filter_strides = convolution->get_window_movement_strides();
const float ops_scale = 1.f;
const float ops_alpha = -0.f; // relu negative slope
const float ops_beta = 0.f;
mkldnn::post_ops ops;
if (convolution->with_relu())
{
ops.append_eltwise(
ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
}
size_t conv_index =
mkldnn_emitter->build_convolution_forward(input_data_desc,
weights_desc,
bias_desc,
result_desc,
filter_strides,
window_dilation_strides_adjusted,
padding_below,
padding_above,
ops);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], arg2_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[3], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, conv_index);
};
functors.emplace_back(functor);
}
else
{
throw ngraph_error("unsupported parameters for GroupConvolutionBias");
}
}
REGISTER_OP_BUILDER(Convolution);
REGISTER_OP_BUILDER(ConvolutionRelu);
REGISTER_OP_BUILDER(ConvolutionBias);
......@@ -498,6 +574,7 @@ namespace ngraph
REGISTER_OP_BUILDER(ConvolutionBiasBackpropFiltersBias);
REGISTER_OP_BUILDER(GroupConvolution);
REGISTER_OP_BUILDER(ConvolutionAdd);
REGISTER_OP_BUILDER(GroupConvolutionBias);
}
}
}
......@@ -109,6 +109,7 @@
#include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/group_conv_bias.hpp"
#include "ngraph/runtime/cpu/op/loop_kernel.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/matmul_bias.hpp"
......@@ -2828,6 +2829,77 @@ namespace ngraph
}
}
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::GroupConvolutionBias)
{
auto convolution = static_cast<const ngraph::op::GroupConvolutionBias*>(node);
auto arg0_shape = args[0].get_shape();
auto arg1_shape = args[1].get_shape();
auto arg2_shape = args[2].get_shape();
auto result_shape = out[0].get_shape();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto padding_below = convolution->get_padding_below();
auto padding_above = convolution->get_padding_above();
auto filter_strides = convolution->get_window_movement_strides();
const float ops_scale = 1.f;
const float ops_alpha = -0.f; // relu negative slope
const float ops_beta = 0.f;
mkldnn::post_ops ops;
if (convolution->with_relu())
{
ops.append_eltwise(
ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
}
size_t conv_index =
mkldnn_emitter->build_convolution_forward(input_data_desc,
weights_desc,
bias_desc,
result_desc,
filter_strides,
window_dilation_strides_adjusted,
padding_below,
padding_above,
ops);
// invoke group convolution bias
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3])
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(conv_index) << ");\n";
}
else
{
throw ngraph_error("Unsupported parameters for GroupConvolutionBias");
}
}
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::Convolution)
{
......@@ -3920,17 +3992,27 @@ namespace ngraph
result_desc.data.ndims == 5 /*Goihw16g/Goihw8g/etc*/ &&
node->get_users().size() == 1)
{
auto gconv = std::dynamic_pointer_cast<ngraph::op::GroupConvolution>(
*(begin(node->get_users())));
if (gconv)
Shape weights_shape_groups;
if (auto gconv = std::dynamic_pointer_cast<ngraph::op::GroupConvolution>(
node->get_users()[0]))
{
weights_shape_groups = gconv->get_weights_dimensions();
}
else if (auto gconvb =
std::dynamic_pointer_cast<ngraph::op::GroupConvolutionBias>(
node->get_users()[0]))
{
weights_shape_groups = gconvb->get_weights_dimensions();
}
else
{
Shape weights_shape_groups = gconv->get_weights_dimensions();
input_desc = mkldnn::memory::desc(
mkldnn::memory::dims(weights_shape_groups.begin(),
weights_shape_groups.end()),
mkldnn_utils::get_mkldnn_data_type(args[0].get_element_type()),
mkldnn::memory::format::goihw);
throw ngraph_error("Incompatible input/output shape in ConvertLayout op");
}
input_desc = mkldnn::memory::desc(
mkldnn::memory::dims(weights_shape_groups.begin(),
weights_shape_groups.end()),
mkldnn_utils::get_mkldnn_data_type(args[0].get_element_type()),
mkldnn::memory::format::goihw);
}
size_t reorder_index = mkldnn_emitter->build_reorder(input_desc, result_desc);
......
......@@ -145,6 +145,7 @@
#include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/group_conv_bias.hpp"
#include "ngraph/runtime/cpu/op/loop_kernel.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/matmul_bias.hpp"
......@@ -360,6 +361,8 @@ static const runtime::cpu::OpMap dispatcher{
{TI(ngraph::op::ConvolutionAdd), &runtime::cpu::CPU_Emitter::emit<op::ConvolutionAdd>},
{TI(ngraph::op::Quantize), &runtime::cpu::CPU_Emitter::emit<op::Quantize>},
{TI(ngraph::op::Dequantize), &runtime::cpu::CPU_Emitter::emit<op::Dequantize>},
{TI(ngraph::op::GroupConvolutionBias),
&runtime::cpu::CPU_Emitter::emit<op::GroupConvolutionBias>},
};
......
......@@ -15,6 +15,8 @@
//*****************************************************************************
#include "cpu_visualize_tree.hpp"
#include "ngraph/op/reshape.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp"
......@@ -37,6 +39,38 @@ static void visualize_convert_layout(const Node& node, ostream& ss)
ss << " ";
}
static void visualize_reshape(const Node& node, ostream& ss)
{
try
{
auto input_desc = node.get_inputs().at(0).get_tensor().get_tensor_layout();
auto result_desc = node.get_output_tensor_ptr()->get_tensor_layout();
auto in_tvl = static_pointer_cast<runtime::cpu::LayoutDescriptor>(input_desc);
auto out_tvl = static_pointer_cast<runtime::cpu::LayoutDescriptor>(result_desc);
if (!in_tvl || !out_tvl)
{
return;
}
if (!in_tvl->is_mkldnn_layout() || !out_tvl->is_mkldnn_layout())
{
return;
}
ss << "\nin="
<< runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
static_cast<mkldnn::memory::format>(in_tvl->get_mkldnn_md().data.format));
ss << " out="
<< runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
static_cast<mkldnn::memory::format>(out_tvl->get_mkldnn_md().data.format));
ss << " ";
}
catch (...)
{
NGRAPH_DEBUG << "Exception in visualize_reshape \n";
}
}
namespace ngraph
{
namespace runtime
......@@ -46,7 +80,8 @@ namespace ngraph
const visualize_tree_ops_map_t& get_visualize_tree_ops_map()
{
const static visualize_tree_ops_map_t vtom{
{TI(runtime::cpu::op::ConvertLayout), visualize_convert_layout}};
{TI(runtime::cpu::op::ConvertLayout), visualize_convert_layout},
{TI(ngraph::op::Reshape), visualize_reshape}};
return vtom;
}
}
......
......@@ -40,32 +40,6 @@ using namespace mkldnn;
using namespace ngraph;
using namespace std;
#define TI(x) std::type_index(typeid(x))
std::unordered_set<std::type_index>& runtime::cpu::mkldnn_utils::get_op_registry()
{
static std::unordered_set<std::type_index> s_op_registry{
TI(ngraph::op::Add),
TI(ngraph::op::AvgPool),
TI(ngraph::op::AvgPoolBackprop),
TI(ngraph::op::BatchNormTraining),
TI(ngraph::op::BatchNormInference),
TI(ngraph::op::BatchNormTrainingBackprop),
TI(ngraph::op::Concat),
TI(ngraph::op::Convolution),
TI(ngraph::op::ConvolutionBackpropData),
TI(ngraph::op::ConvolutionBackpropFilters),
TI(ngraph::op::ConvolutionBias),
TI(ngraph::op::ConvolutionRelu),
TI(ngraph::op::ConvolutionBiasBackpropFiltersBias),
TI(ngraph::op::MaxPool),
TI(ngraph::op::MaxPoolBackprop),
TI(ngraph::op::Relu),
TI(ngraph::op::ReluBackprop),
TI(ngraph::op::Reshape)};
return s_op_registry;
}
std::map<element::Type, const mkldnn::memory::data_type>&
runtime::cpu::mkldnn_utils::get_mkldnn_data_type_map()
{
......@@ -107,7 +81,6 @@ std::map<element::Type, const std::string>&
std::map<memory::format, const std::string>&
runtime::cpu::mkldnn_utils::get_mkldnn_format_string_map()
{
// TODO (jbobba): Add the rest of memory formats to this map as well
static std::map<memory::format, const std::string> s_mkldnn_format_string_map{
{memory::format::format_undef, "memory::format::format_undef"},
{memory::format::any, "memory::format::any"},
......@@ -119,37 +92,78 @@ std::map<memory::format, const std::string>&
{memory::format::chwn, "memory::format::chwn"},
{memory::format::nChw8c, "memory::format::nChw8c"},
{memory::format::nChw16c, "memory::format::nChw16c"},
{memory::format::ncdhw, "memory::format::ndhwc"},
{memory::format::ncdhw, "memory::format::ndhwc"},
{memory::format::ncdhw, "memory::format::ncdhw"},
{memory::format::ndhwc, "memory::format::ndhwc"},
{memory::format::nCdhw8c, "memory::format::nCdhw8c"},
{memory::format::nCdhw16c, "memory::format::nCdhw16c"},
{memory::format::oi, "memory::format::oi"},
{memory::format::io, "memory::format::io"},
{memory::format::oihw, "memory::format::oihw"},
{memory::format::ihwo, "memory::format::ihwo"},
{memory::format::hwio, "memory::format::hwio"},
// TODO (nishant): Uncomment after the next release of mkl-dnn"
//{memory::format::dhwio, "memory::format::dhwio"},
{memory::format::dhwio, "memory::format::dhwio"},
{memory::format::oidhw, "memory::format::oidhw"},
{memory::format::OIdhw8i8o, "memory::format::OIdhw8i8o"},
{memory::format::OIdhw8o8i, "memory::format::OIdhw8o8i"},
{memory::format::Odhwi8o, "memory::format::Odhwi8o"},
{memory::format::OIdhw16i16o, "memory::format::OIdhw16i16o"},
{memory::format::OIdhw16o16i, "memory::format::OIdhw16o16i"},
{memory::format::Oidhw16o, "memory::format::Oidhw16o"},
{memory::format::Odhwi16o, "memory::format::Odhwi16o"},
{memory::format::oIhw8i, "memory::format::oIhw8i"},
{memory::format::oIhw16i, "memory::format::oIhw16i"},
{memory::format::oIdhw8i, "memory::format::oIdhw8i"},
{memory::format::oIdhw16i, "memory::format::oIdhw16i"},
{memory::format::OIhw8i8o, "memory::format::OIhw8i8o"},
{memory::format::OIhw16i16o, "memory::format::OIhw16i16o"},
{memory::format::IOhw16o16i, "memory::format::IOhw16o16i"},
{memory::format::OIhw8o8i, "memory::format::OIhw8o8i"},
{memory::format::OIhw16o16i, "memory::format::OIhw16o16i"},
{memory::format::IOhw16o16i, "memory::format::IOhw16o16i"},
{memory::format::OIhw8i16o2i, "memory::format::OIhw8i16o2i"},
{memory::format::OIdhw8i16o2i, "memory::format::OIdhw8i16o2i"},
{memory::format::OIhw8o16i2o, "memory::format::OIhw8o16i2o"},
{memory::format::OIhw4i16o4i, "memory::format::OIhw4i16o4i"},
{memory::format::Oihw8o, "memory::format::Oihw8o"},
{memory::format::Oihw16o, "memory::format::Oihw16o"},
{memory::format::Ohwi8o, "memory::format::Ohwi8o"},
{memory::format::Ohwi16o, "memory::format::Ohwi16o"},
{memory::format::OhIw16o4i, "memory::format::OhIw16o4i"},
{memory::format::goihw, "memory::format::goihw"},
{memory::format::hwigo, "memory::format::hwigo"},
{memory::format::gOIdhw8i8o, "memory::format::gOIdhw8i8o"},
{memory::format::gOIdhw8o8i, "memory::format::gOIdhw8o8i"},
{memory::format::gOdhwi8o, "memory::format::gOdhwi8o"},
{memory::format::gOIhw8i8o, "memory::format::gOIhw8i8o"},
{memory::format::gOIhw16i16o, "memory::format::gOIhw16i16o"},
{memory::format::gOIhw8i16o2i, "memory::format::gOIhw8i16o2i"},
{memory::format::gOIdhw8i16o2i, "memory::format::gOIdhw8i16o2i"},
{memory::format::gOIhw8o16i2o, "memory::format::gOIhw8o16i2o"},
{memory::format::gOIhw4i16o4i, "memory::format::gOIhw4i16o4i"},
{memory::format::gOihw8o, "memory::format::gOihw8o"},
{memory::format::gOihw16o, "memory::format::gOihw16o"},
{memory::format::gOhwi8o, "memory::format::gOhwi8o"},
{memory::format::gOhwi16o, "memory::format::gOhwi16o"},
{memory::format::Goihw8g, "memory::format::Goihw8g"},
{memory::format::Goihw16g, "memory::format::Goihw16g"},
{memory::format::gOIhw8o8i, "memory::format::gOIhw8o8i"},
{memory::format::gOIhw16o16i, "memory::format::gOIhw16o16i"},
{memory::format::gIOhw16o16i, "memory::format::gIOhw16o16i"},
{memory::format::gOhIw16o4i, "memory::format::gOhIw16o4i"},
{memory::format::goidhw, "memory::format::goidhw"},
{memory::format::gOIdhw16i16o, "memory::format::gOIdhw16i16o"},
{memory::format::gOIdhw16o16i, "memory::format::gOIdhw16o16i"},
{memory::format::gOidhw16o, "memory::format::gOidhw16o"},
{memory::format::gOdhwi16o, "memory::format::gOdhwi16o"},
{memory::format::ntc, "memory::format::ntc"},
{memory::format::tnc, "memory::format::tnc"},
{memory::format::ldsnc, "memory::format::ldsnc"},
{memory::format::ldigo, "memory::format::ldigo"},
{memory::format::ldigo_p, "memory::format::ldigo_p"},
{memory::format::ldgoi, "memory::format::ldgoi"},
{memory::format::ldgoi_p, "memory::format::ldgoi_p"},
{memory::format::ldgo, "memory::format::ldgo"},
{memory::format::wino_fmt, "memory::format::wino_fmt"},
{memory::format::format_last, "memory::format::format_last"},
};
return s_mkldnn_format_string_map;
}
......@@ -181,10 +195,6 @@ std::set<memory::format>& runtime::cpu::mkldnn_utils::get_filter_formats()
memory::format::OhIw16o4i};
return s_filter_formats;
}
bool runtime::cpu::mkldnn_utils::IsMKLDNNOp(ngraph::Node& op)
{
return (get_op_registry().find(TI(op)) != get_op_registry().end());
}
mkldnn::memory::format runtime::cpu::mkldnn_utils::CreateNativeDataFormat(
const ngraph::runtime::cpu::LayoutDescriptor& layout)
......
......@@ -33,7 +33,6 @@ namespace ngraph
{
extern mkldnn::engine global_cpu_engine;
bool IsMKLDNNOp(ngraph::Node& op);
mkldnn::memory::format
CreateNativeDataFormat(const ngraph::runtime::cpu::LayoutDescriptor& layout);
mkldnn::memory::format CreateNativeDataFormat(const Shape& shape);
......@@ -73,7 +72,6 @@ namespace ngraph
bool use_mkldnn_kernel(const ngraph::Node* node);
std::unordered_set<std::type_index>& get_op_registry();
std::map<element::Type, const mkldnn::memory::data_type>&
get_mkldnn_data_type_map();
std::map<element::Type, const std::string>& get_mkldnn_data_type_string_map();
......
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <numeric>
#include "group_conv.hpp"
#include "group_conv_bias.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/util.hpp"
using namespace std;
using namespace ngraph;
static void validate_groupconvbias_shapes(const Shape& input_shape,
const Shape& filters_shape,
const Shape& bias_shape,
const Shape& output_shape,
size_t groups)
{
// Input - N, C, H, W
// Filter - O, I, H, W
// Output - N, C, H, W
const size_t INPUT_C = 1;
const size_t FILTER_OC = 0;
const size_t FILTER_IC = 1;
const size_t OUTPUT_C = 1;
if (bias_shape.size() != 1)
{
throw ngraph_error("GroupConvolutionBias bias is expected to be 1D, but has shape: " +
vector_to_string(bias_shape));
}
if (bias_shape[0] != filters_shape[FILTER_OC])
{
throw ngraph_error(
"GroupConvolutionBias bias element size does not match number of filters. bias_size "
"= " +
std::to_string(bias_shape[0]) + ", num_filters = " + std::to_string(filters_shape[0]));
}
if (input_shape[INPUT_C] != groups * filters_shape[FILTER_IC])
{
throw ngraph_error(
"Mismatch between GroupConvolutionBias input and filter channels: "
" data channels=" +
std::to_string(input_shape[INPUT_C]) + ", filter channels= " +
std::to_string(filters_shape[FILTER_IC]) + ", groups= " + std::to_string(groups));
}
if (output_shape[OUTPUT_C] != filters_shape[FILTER_OC])
{
throw ngraph_error(
"Mismatch between GroupConvolutionBias output and filter channels: "
" data channels=" +
std::to_string(output_shape[OUTPUT_C]) + ", filter channels= " +
std::to_string(filters_shape[FILTER_OC]));
}
if (output_shape[OUTPUT_C] % groups != 0)
{
throw ngraph_error(
"Output channels for GroupConvolutionBias not divisible by groups: channels=" +
std::to_string(output_shape[OUTPUT_C]) + ", groups= " + std::to_string(groups));
}
}
Shape op::GroupConvolutionBias::get_weights_dimensions()
{
// reshape weights into 5d tensors that includes groups
const size_t OC = 0;
const size_t OC_IN_OUTPUT = 1;
const size_t IC = 1;
Shape weights_shape_groups{get_inputs().at(1).get_shape()};
weights_shape_groups.at(OC) = get_shape().at(OC_IN_OUTPUT) / get_groups();
weights_shape_groups.at(IC) = get_inputs().at(0).get_shape().at(IC) / get_groups();
// push_front the number of groups
weights_shape_groups.insert(weights_shape_groups.begin(), get_groups());
return weights_shape_groups;
}
op::GroupConvolutionBias::GroupConvolutionBias(const shared_ptr<op::GroupConvolution>& conv,
const shared_ptr<Node>& bias,
size_t groups,
const Shape& output_shape,
bool with_relu,
float alpha)
: Op("GroupConvolutionBias",
check_single_output_args({conv->get_argument(0), conv->get_argument(1), bias}))
, m_window_movement_strides(conv->get_window_movement_strides())
, m_window_dilation_strides(conv->get_window_dilation_strides())
, m_padding_below(conv->get_padding_below())
, m_padding_above(conv->get_padding_above())
, m_data_dilation_strides(conv->get_data_dilation_strides())
, m_with_relu(with_relu)
, m_groups(groups)
, m_alpha(alpha)
{
constructor_validate_and_infer_types();
if (conv->get_element_type() != bias->get_element_type())
{
throw ngraph_error("GroupConvolution's element type isn't equal to bias!");
}
validate_groupconvbias_shapes(conv->get_argument(0)->get_shape(),
conv->get_argument(1)->get_shape(),
bias->get_shape(),
output_shape,
groups);
set_output_type(0, conv->get_element_type(), output_shape);
}
op::GroupConvolutionBias::GroupConvolutionBias(const shared_ptr<Node>& data_batch,
const shared_ptr<Node>& filters,
const shared_ptr<Node>& bias,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
size_t groups,
const Shape& output_shape,
bool with_relu,
float alpha)
: Op("GroupConvolutionBias", check_single_output_args({data_batch, filters, bias}))
, m_window_movement_strides(window_movement_strides)
, m_window_dilation_strides(window_dilation_strides)
, m_padding_below(padding_below)
, m_padding_above(padding_above)
, m_data_dilation_strides(data_dilation_strides)
, m_with_relu(with_relu)
, m_groups(groups)
, m_alpha(alpha)
{
constructor_validate_and_infer_types();
auto& data_batch_shape = data_batch->get_shape();
auto& data_batch_et = data_batch->get_element_type();
auto& filters_shape = filters->get_shape();
auto& filters_et = filters->get_element_type();
//
// Make sure data batch and filter element types match.
//
if (data_batch_et != filters_et)
{
throw ngraph_error("GroupConvolutionBias data batch and filter element types do not match");
}
validate_groupconvbias_shapes(
data_batch_shape, filters_shape, bias->get_shape(), output_shape, groups);
set_output_type(0, data_batch_et, output_shape);
}
shared_ptr<Node> op::GroupConvolutionBias::copy_with_new_args(const NodeVector& new_args) const
{
if (new_args.size() != 3)
{
throw ngraph_error("Incorrect number of new arguments");
}
return shared_ptr<Node>(new GroupConvolutionBias(new_args.at(0),
new_args.at(1),
new_args.at(2),
get_window_movement_strides(),
get_window_dilation_strides(),
get_padding_below(),
get_padding_above(),
get_data_dilation_strides(),
get_groups(),
get_output_shape(0),
m_with_relu,
get_alpha()));
}
void op::GroupConvolutionBias::generate_adjoints(autodiff::Adjoints& adjoints,
const NodeVector& deltas)
{
throw ngraph_error("GroupConvolutionBias generate_adjoints not supported implemented");
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "group_conv.hpp"
#include "ngraph/op/op.hpp"
namespace ngraph
{
namespace op
{
/// \brief GroupConvolution + Bias + Relu forward prop for
/// batched GroupConvolution operation.
class GroupConvolutionBias : public Op
{
public:
GroupConvolutionBias(const std::shared_ptr<op::GroupConvolution>& conv,
const std::shared_ptr<Node>& bias,
const size_t groups,
const Shape& output_shape,
bool with_relu,
float alpha = 1.0);
GroupConvolutionBias(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const std::shared_ptr<Node>& bias,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
size_t groups,
const Shape& output_shape,
bool with_relu,
float alpha = 1.0);
Shape get_weights_dimensions();
const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
const Strides& get_window_dilation_strides() const { return m_window_dilation_strides; }
const CoordinateDiff& get_padding_below() const { return m_padding_below; }
const CoordinateDiff& get_padding_above() const { return m_padding_above; }
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
std::shared_ptr<Node> get_bias() { return get_argument(2); }
std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
size_t get_groups() const { return m_groups; }
bool with_relu() const { return m_with_relu; }
float get_alpha() const { return m_alpha; }
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
void generate_adjoints(autodiff::Adjoints& adjoints, const NodeVector& deltas) override;
protected:
Strides m_window_movement_strides;
Strides m_window_dilation_strides;
CoordinateDiff m_padding_below;
CoordinateDiff m_padding_above;
Strides m_data_dilation_strides;
bool m_with_relu;
size_t m_groups = 1;
float m_alpha = 1.0;
};
}
}
......@@ -52,6 +52,7 @@
#include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/group_conv_bias.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
#include "ngraph/runtime/cpu/op/rnn.hpp"
......@@ -187,6 +188,20 @@ namespace ngraph
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::GroupConvolutionBias)
{
auto convolution = static_cast<op::GroupConvolutionBias*>(node);
if (can_use_mkldnn_conv<ngraph::op::GroupConvolutionBias>(node))
{
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
convolution->set_op_annotations(op_annotations);
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::ConvolutionRelu)
{
......@@ -929,6 +944,8 @@ static const runtime::cpu::pass::AssignOpMap s_dispatcher{
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::QuantizedConvolutionRelu>},
{TI(ngraph::op::QuantizedConvolutionBias),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::QuantizedConvolutionBias>},
{TI(ngraph::op::GroupConvolutionBias),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::GroupConvolutionBias>},
{TI(ngraph::op::Quantize), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Quantize>},
{TI(ngraph::op::Dequantize),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Dequantize>},
......
......@@ -17,6 +17,7 @@
#include <algorithm>
#include <iostream>
#include <numeric>
#include <string>
#include <unordered_set>
#include "cpu_fusion.hpp"
......@@ -56,6 +57,8 @@
#include "ngraph/runtime/cpu/op/conv_add.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/group_conv_bias.hpp"
#include "ngraph/runtime/cpu/op/matmul_bias.hpp"
#include "ngraph/runtime/cpu/op/sigmoid_mul.hpp"
#include "ngraph/util.hpp"
......@@ -1636,3 +1639,160 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_conv_bias_affine_folding()
auto m = std::make_shared<ngraph::pattern::Matcher>(multiply, callback);
this->add_matcher(m);
}
void ngraph::runtime::cpu::pass::CPUFusion::construct_groupconv_batchnorm_global_stats_folding()
{
Shape shape_a{1, 32, 2, 2};
Shape shape_b{32, 1, 1, 1};
Shape shape_r{1, 32, 2, 2};
auto input = std::make_shared<pattern::op::Label>(element::f32, shape_a);
auto filters = std::make_shared<pattern::op::Label>(element::f32, shape_b);
auto resShape = std::make_shared<pattern::op::Label>(element::f32, shape_r);
auto conv = std::make_shared<op::GroupConvolution>(input,
filters,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1},
32,
shape_r);
auto conv_label = std::make_shared<pattern::op::Label>(conv, nullptr, NodeVector{conv});
auto mean = std::make_shared<pattern::op::Label>(element::f32, Shape{32});
auto var = std::make_shared<pattern::op::Label>(element::f32, Shape{32});
auto gamma = std::make_shared<pattern::op::Label>(element::f32, Shape{32});
auto beta = std::make_shared<pattern::op::Label>(element::f32, Shape{32});
double eps = 0.001;
auto bn = std::make_shared<op::BatchNormInference>(eps, gamma, beta, conv_label, mean, var);
ngraph::pattern::graph_rewrite_callback callback =
[input, filters, conv_label, mean, var, gamma, beta, eps](pattern::Matcher& m) {
NGRAPH_DEBUG << "In callback for groupconv BatchNorm folding against node = "
<< m.get_match_root()->get_name();
auto pattern_map = m.get_pattern_map();
auto m_bn = std::dynamic_pointer_cast<op::BatchNormInference>(m.get_match_root());
auto conv_m = std::static_pointer_cast<op::GroupConvolution>(pattern_map[conv_label]);
if (conv_m->get_users().size() > 1)
{
return false;
}
if (conv_m->get_shape().size() != 4)
{
return false;
}
if (conv_m->get_groups() == 0)
{
return false;
}
// new weights = old weights * gamma / sqrt(variance + epsilon)
// new biases = (-mean) * gamma / sqrt(variance + epsilon) + beta
auto bn_eps = op::Constant::create(element::f32, Shape{}, {m_bn->get_eps_value()});
auto var_eps = std::make_shared<op::Add>(
pattern_map[var],
std::make_shared<op::Broadcast>(bn_eps, pattern_map[var]->get_shape(), AxisSet{0}));
auto sqrt_var_eps = std::make_shared<op::Sqrt>(var_eps);
auto weight_scaling = std::make_shared<op::Divide>(pattern_map[gamma], sqrt_var_eps);
auto weight_scaling_bcast = std::make_shared<op::Broadcast>(
weight_scaling, pattern_map[filters]->get_shape(), AxisSet{1, 2, 3});
auto new_weights =
std::make_shared<op::Multiply>(pattern_map[filters], weight_scaling_bcast);
auto mean_gamma = std::make_shared<op::Multiply>(pattern_map[mean], weight_scaling);
auto new_biases = std::make_shared<op::Subtract>(pattern_map[beta], mean_gamma);
auto g_conv_bias =
std::make_shared<op::GroupConvolutionBias>(pattern_map[input],
new_weights,
new_biases,
conv_m->get_window_movement_strides(),
conv_m->get_window_dilation_strides(),
conv_m->get_padding_below(),
conv_m->get_padding_above(),
conv_m->get_data_dilation_strides(),
conv_m->get_groups(),
conv_m->get_output_shape(0),
false,
1.0);
ngraph::replace_node(m.get_match_root(), g_conv_bias);
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(bn, callback);
this->add_matcher(m);
}
void ngraph::runtime::cpu::pass::CPUFusion::
construct_groupconv_batchnorm_global_stats_folding_relu()
{
Shape shape_a{1, 32, 2, 2};
Shape shape_b{32, 1, 1, 1};
Shape shape_r{1, 32, 2, 2};
Shape shape_bias{32};
Shape shape_num{0};
auto input = std::make_shared<pattern::op::Label>(element::f32, shape_a);
auto filters = std::make_shared<pattern::op::Label>(element::f32, shape_b);
auto bias = std::make_shared<pattern::op::Label>(element::f32, shape_bias);
auto num = std::make_shared<pattern::op::Label>(element::f32, shape_num);
auto conv = std::make_shared<op::GroupConvolutionBias>(input,
filters,
bias,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1},
32,
shape_r,
false,
1.0);
auto conv_label = std::make_shared<pattern::op::Label>(conv, nullptr, NodeVector{conv});
// GroupConv + BatchNorm + Relu -> GroupConvBias
auto prelu = std::make_shared<op::Relu>(conv_label);
ngraph::pattern::graph_rewrite_callback callback =
[input, filters, bias, num, conv_label, prelu](pattern::Matcher& m) {
NGRAPH_DEBUG << "In callback for GroupConvBias + Relu folding against node = "
<< m.get_match_root()->get_name();
auto pattern_map = m.get_pattern_map();
auto conv_m =
std::static_pointer_cast<op::GroupConvolutionBias>(pattern_map[conv_label]);
auto relu_m = std::dynamic_pointer_cast<op::Relu>(m.get_match_root());
auto g_conv_bias_relu =
std::make_shared<op::GroupConvolutionBias>(conv_m->get_argument(0),
conv_m->get_argument(1),
conv_m->get_argument(2),
conv_m->get_window_movement_strides(),
conv_m->get_window_dilation_strides(),
conv_m->get_padding_below(),
conv_m->get_padding_above(),
conv_m->get_data_dilation_strides(),
conv_m->get_groups(),
conv_m->get_output_shape(0),
true);
ngraph::replace_node(m.get_match_root(), g_conv_bias_relu);
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(prelu, callback);
this->add_matcher(m);
}
......@@ -66,6 +66,8 @@ public:
construct_conv_bias_bprop();
construct_conv_bias_folded_batch_norm();
construct_conv_bias_affine_folding();
construct_groupconv_batchnorm_global_stats_folding();
construct_groupconv_batchnorm_global_stats_folding_relu();
construct_batch_norm_relu();
construct_batch_norm_relu_global_stats();
construct_conv_relu();
......@@ -100,4 +102,6 @@ private:
void construct_bounded_relu();
void construct_conv_bias_folded_batch_norm();
void construct_conv_bias_affine_folding();
void construct_groupconv_batchnorm_global_stats_folding();
void construct_groupconv_batchnorm_global_stats_folding_relu();
};
......@@ -57,6 +57,7 @@
#include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/group_conv_bias.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
#include "ngraph/runtime/cpu/op/rnn.hpp"
......@@ -269,7 +270,7 @@ namespace ngraph
{
namespace pass
{
template <typename T, bool use_bias, bool default_weights_format>
template <typename T, bool use_bias>
void ConvolutionLayout(std::shared_ptr<ngraph::Node> node,
vector<memory::desc>& i_mds,
vector<memory::desc>& o_mds)
......@@ -279,10 +280,16 @@ namespace ngraph
auto arg0_shape = node->get_input_shape(0);
auto arg1_shape = node->get_input_shape(1);
if (default_weights_format)
// Convert filters to MKLDNN shape
// o,i,h,w -> g,o,i,h,w (e.g., {6, 2, 1, 1}, groups = 2 -> {2, 3, 1, 1, 1})
if (auto gconv = std::dynamic_pointer_cast<ngraph::op::GroupConvolution>(node))
{
arg1_shape = gconv->get_weights_dimensions();
}
if (auto gconv =
std::dynamic_pointer_cast<ngraph::op::GroupConvolutionBias>(node))
{
arg1_shape = std::dynamic_pointer_cast<ngraph::op::GroupConvolution>(node)
->get_weights_dimensions();
arg1_shape = gconv->get_weights_dimensions();
}
auto result_shape = node->get_output_shape(0);
auto filter_strides = convolution->get_window_movement_strides();
......@@ -325,8 +332,6 @@ namespace ngraph
memory::data_type et_bias =
mkldnn_utils::get_mkldnn_data_type(node->get_input_element_type(2));
auto arg2_shape = node->get_input_shape(2);
ngraph::op::util::validate_convbias_shapes(
arg0_shape, arg1_shape, arg2_shape);
memory::dims mkldnn_arg2_shape(arg2_shape.begin(), arg2_shape.end());
const memory::desc bias_desc(
mkldnn_arg2_shape, et_bias, memory::format::any);
......@@ -393,7 +398,7 @@ namespace ngraph
{
vector<memory::desc> i_mds;
vector<memory::desc> o_mds;
ConvolutionLayout<ngraph::op::QuantizedConvolution, false, false>(
ConvolutionLayout<ngraph::op::QuantizedConvolution, false>(
node, i_mds, o_mds);
auto scale_input_md = mkldnn_utils::create_default_mkldnn_md(
......@@ -416,8 +421,7 @@ namespace ngraph
{
vector<memory::desc> i_mds;
vector<memory::desc> o_mds;
ConvolutionLayout<ngraph::op::Convolution, false, false>(
node, i_mds, o_mds);
ConvolutionLayout<ngraph::op::Convolution, false>(node, i_mds, o_mds);
node = insert_input_conversions(external_function, node, i_mds);
set_output_layouts(node, o_mds);
......@@ -435,7 +439,25 @@ namespace ngraph
{
vector<memory::desc> i_mds;
vector<memory::desc> o_mds;
ConvolutionLayout<ngraph::op::GroupConvolution, false, true>(
ConvolutionLayout<ngraph::op::GroupConvolution, false>(node, i_mds, o_mds);
node = insert_input_conversions(external_function, node, i_mds);
set_output_layouts(node, o_mds);
}
else
{
set_native_layouts(external_function, node);
}
}
template <>
void CPULayout::LAYOUT_DECL(ngraph::op::GroupConvolutionBias)
{
if (mkldnn_utils::use_mkldnn_kernel(node.get()))
{
vector<memory::desc> i_mds;
vector<memory::desc> o_mds;
ConvolutionLayout<ngraph::op::GroupConvolutionBias, true>(
node, i_mds, o_mds);
node = insert_input_conversions(external_function, node, i_mds);
......@@ -454,8 +476,7 @@ namespace ngraph
{
vector<memory::desc> i_mds;
vector<memory::desc> o_mds;
ConvolutionLayout<ngraph::op::ConvolutionBias, true, false>(
node, i_mds, o_mds);
ConvolutionLayout<ngraph::op::ConvolutionBias, true>(node, i_mds, o_mds);
node = insert_input_conversions(external_function, node, i_mds);
set_output_layouts(node, o_mds);
}
......@@ -472,7 +493,7 @@ namespace ngraph
{
vector<memory::desc> i_mds;
vector<memory::desc> o_mds;
ConvolutionLayout<ngraph::op::QuantizedConvolutionBias, true, false>(
ConvolutionLayout<ngraph::op::QuantizedConvolutionBias, true>(
node, i_mds, o_mds);
auto scale_input_md = mkldnn_utils::create_default_mkldnn_md(
......@@ -496,8 +517,7 @@ namespace ngraph
{
vector<memory::desc> i_mds;
vector<memory::desc> o_mds;
ConvolutionLayout<ngraph::op::ConvolutionRelu, false, false>(
node, i_mds, o_mds);
ConvolutionLayout<ngraph::op::ConvolutionRelu, false>(node, i_mds, o_mds);
node = insert_input_conversions(external_function, node, i_mds);
set_output_layouts(node, o_mds);
}
......@@ -514,7 +534,7 @@ namespace ngraph
{
vector<memory::desc> i_mds;
vector<memory::desc> o_mds;
ConvolutionLayout<ngraph::op::QuantizedConvolutionRelu, false, false>(
ConvolutionLayout<ngraph::op::QuantizedConvolutionRelu, false>(
node, i_mds, o_mds);
auto scale_input_md = mkldnn_utils::create_default_mkldnn_md(
......@@ -538,8 +558,7 @@ namespace ngraph
{
vector<memory::desc> i_mds;
vector<memory::desc> o_mds;
ConvolutionLayout<ngraph::op::ConvolutionBiasAdd, true, false>(
node, i_mds, o_mds);
ConvolutionLayout<ngraph::op::ConvolutionBiasAdd, true>(node, i_mds, o_mds);
// Force second input to sum to use the same layout as convolution output
i_mds.push_back(o_mds[0]);
node = insert_input_conversions(external_function, node, i_mds);
......@@ -558,8 +577,7 @@ namespace ngraph
{
vector<memory::desc> i_mds;
vector<memory::desc> o_mds;
ConvolutionLayout<ngraph::op::ConvolutionAdd, false, false>(
node, i_mds, o_mds);
ConvolutionLayout<ngraph::op::ConvolutionAdd, false>(node, i_mds, o_mds);
// Force second input to sum to use the same layout as convolution output
i_mds.push_back(o_mds[0]);
node = insert_input_conversions(external_function, node, i_mds);
......@@ -1949,6 +1967,8 @@ static const runtime::cpu::pass::LayoutOpMap s_dispatcher{
&runtime::cpu::pass::CPULayout::layout<ngraph::op::QuantizedConvolutionRelu>},
{TI(ngraph::op::QuantizedConvolutionBias),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::QuantizedConvolutionBias>},
{TI(ngraph::op::GroupConvolutionBias),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::GroupConvolutionBias>},
};
bool runtime::cpu::pass::CPULayout::run_on_call_graph(const std::list<std::shared_ptr<Node>>& nodes)
......
......@@ -55,6 +55,7 @@
#include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/group_conv_bias.hpp"
#include "ngraph/runtime/cpu/op/loop_kernel.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/matmul_bias.hpp"
......@@ -1051,6 +1052,145 @@ TEST(cpu_fusion, conv_add)
EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0)));
}
shared_ptr<Function> gen_groupconv_batchnorm(const bool add_goe,
const bool with_relu,
const Shape shape_in,
const Shape shape_weights,
const Shape shape_out,
const size_t groups)
{
auto input = make_shared<op::Parameter>(element::f32, shape_in);
auto weights = make_shared<op::Parameter>(element::f32, shape_weights);
unsigned long OC = shape_out.at(1);
Shape shape_bn{OC};
auto group_conv = make_shared<op::GroupConvolution>(input,
weights,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1},
groups,
shape_out);
double eps = 0.001;
auto gamma = std::make_shared<op::Parameter>(element::f32, shape_bn);
auto beta = std::make_shared<op::Parameter>(element::f32, shape_bn);
auto mean = std::make_shared<op::Parameter>(element::f32, shape_bn);
auto var = std::make_shared<op::Parameter>(element::f32, shape_bn);
auto goe_bn = std::make_shared<op::GetOutputElement>(group_conv, 0);
// Adding a goe will stop fusion since the patterns wont expect to see this op
auto bn =
add_goe ? std::make_shared<op::BatchNormInference>(eps, gamma, beta, goe_bn, mean, var)
: std::make_shared<op::BatchNormInference>(eps, gamma, beta, group_conv, mean, var);
if (with_relu)
{
auto prelu = std::make_shared<op::Relu>(bn);
auto f = make_shared<Function>(NodeVector{prelu},
op::ParameterVector{input, weights, gamma, beta, mean, var});
return f;
}
else
{
auto f = make_shared<Function>(NodeVector{bn},
op::ParameterVector{input, weights, gamma, beta, mean, var});
return f;
}
}
void fuse_groupconv_batchnorm_helper(Shape shape_in,
Shape shape_weights,
Shape shape_r,
size_t groups)
{
auto func_fuse =
gen_groupconv_batchnorm(false, false, shape_in, shape_weights, shape_r, groups);
auto func_fuse2 =
gen_groupconv_batchnorm(false, true, shape_in, shape_weights, shape_r, groups);
{
pass::Manager pass_manager;
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
pass_manager.run_passes(func_fuse);
ASSERT_EQ(count_ops_of_type<op::GroupConvolutionBias>(func_fuse), 1);
}
{
// test groupconv + batchnorm + relu fusion
pass::Manager pass_manager;
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
pass_manager.run_passes(func_fuse2);
ASSERT_EQ(count_ops_of_type<op::GroupConvolutionBias>(func_fuse2), 1);
ASSERT_EQ(count_ops_of_type<op::Relu>(func_fuse2), 0);
}
}
void groupconv_batchnorm_test_val_helper(
const bool with_relu, Shape shape_in, Shape shape_weights, Shape shape_r, size_t groups)
{
shared_ptr<Function> fuse_func =
gen_groupconv_batchnorm(false, with_relu, shape_in, shape_weights, shape_r, groups);
shared_ptr<Function> nofuse_func =
gen_groupconv_batchnorm(true, with_relu, shape_in, shape_weights, shape_r, groups);
test::Uniform<float> rng(1.0f, 100.0f);
vector<vector<float>> args;
for (shared_ptr<op::Parameter> param : fuse_func->get_parameters())
{
vector<float> tensor_val(shape_size(param->get_shape()));
rng.initialize(tensor_val);
args.push_back(tensor_val);
}
auto fuse_results = execute(fuse_func, args, "CPU");
auto nofuse_results = execute(nofuse_func, args, "CPU");
EXPECT_TRUE(test::all_close(fuse_results.at(0), nofuse_results.at(0)));
}
TEST(cpu_fusion, fuse_groupconv_batchnorm1)
{
Shape shape_in{1, 20, 5, 5};
Shape shape_weights{8, 10, 3, 3};
Shape shape_r{1, 8, 3, 3};
fuse_groupconv_batchnorm_helper(shape_in, shape_weights, shape_r, 2);
groupconv_batchnorm_test_val_helper(false, shape_in, shape_weights, shape_r, 2);
groupconv_batchnorm_test_val_helper(true, shape_in, shape_weights, shape_r, 2);
}
TEST(cpu_fusion, fuse_groupconv_batchnorm2)
{
Shape shape_in{1, 20, 5, 5};
Shape shape_weights{5, 4, 3, 3};
Shape shape_r{1, 5, 3, 3};
fuse_groupconv_batchnorm_helper(shape_in, shape_weights, shape_r, 5);
groupconv_batchnorm_test_val_helper(false, shape_in, shape_weights, shape_r, 5);
groupconv_batchnorm_test_val_helper(true, shape_in, shape_weights, shape_r, 5);
}
TEST(cpu_fusion, fuse_groupconv_batchnorm3)
{
Shape shape_in{1, 20, 5, 5};
Shape shape_weights{20, 1, 3, 3};
Shape shape_r{1, 20, 3, 3};
fuse_groupconv_batchnorm_helper(shape_in, shape_weights, shape_r, 20);
groupconv_batchnorm_test_val_helper(false, shape_in, shape_weights, shape_r, 20);
groupconv_batchnorm_test_val_helper(true, shape_in, shape_weights, shape_r, 20);
}
TEST(cpu_fusion, fuse_groupconv_batchnorm4)
{
Shape shape_in{1, 20, 4, 4};
Shape shape_weights{5, 20, 1, 1};
Shape shape_r{1, 5, 4, 4};
fuse_groupconv_batchnorm_helper(shape_in, shape_weights, shape_r, 1);
groupconv_batchnorm_test_val_helper(false, shape_in, shape_weights, shape_r, 1);
groupconv_batchnorm_test_val_helper(true, shape_in, shape_weights, shape_r, 1);
}
std::vector<shared_ptr<runtime::Tensor>> rnn_matrix_fusion_eval(const size_t time_steps,
const Shape& data_shape,
const Shape& weights_shape,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment