Commit 4a2c3c9c authored by Nick Korovaiko's avatar Nick Korovaiko Committed by Robert Kimball

Group Convolution (#1041)

*  group conv init

* add GroupConvolution op; refine checks in fusion logic

* add an emitter, cpu assigment

* cpu_layout

* add checks to algebraic simplification

* updating emitter logic for groupconvolution

* working before refactoring

* moving primitive creation logic to mkldnn_emitter

* group convolution graph test

* rename an opt

* address jbobba's feedback
parent 40069d27
......@@ -37,6 +37,7 @@ set(SRC
mkldnn_utils.cpp
op/batch_dot.cpp
op/batch_norm_relu.cpp
op/group_conv.cpp
op/conv_bias.cpp
op/conv_relu.cpp
op/convert_layout.cpp
......
......@@ -97,6 +97,7 @@
#include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/matmul_bias.hpp"
#include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
......@@ -2604,6 +2605,126 @@ namespace ngraph
}
}
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::GroupConvolution)
{
auto convolution = static_cast<const ngraph::op::GroupConvolution*>(node);
auto arg0_shape = args[0].get_shape();
auto arg1_shape = args[1].get_shape();
auto result_shape = out[0].get_shape();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto input_format =
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node, 0);
auto output_format =
runtime::cpu::mkldnn_utils::get_output_mkldnn_format(node, 0);
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_data_desc =
mkldnn_emitter->build_memory_descriptor(args[0], input_format);
Shape weights_shape_groups = convolution->get_weights_dimensions();
auto weights_desc_any = mkldnn::memory::desc(
mkldnn::memory::dims(weights_shape_groups.begin(),
weights_shape_groups.end()),
mkldnn_utils::get_mkldnn_data_type(args[1].get_element_type()),
mkldnn::memory::format::any);
auto padding_below = convolution->get_padding_below();
auto padding_above = convolution->get_padding_above();
auto filter_strides = convolution->get_window_movement_strides();
auto result_desc =
mkldnn_emitter->build_memory_descriptor(out[0], output_format);
auto weights_optimized_format =
mkldnn_emitter->query_convolution_forward_weight_format(
input_data_desc,
weights_desc_any,
result_desc,
filter_strides,
window_dilation_strides_adjusted,
padding_below,
padding_above);
//create workspace for holding the result of converting weights layouts
auto ws = std::unique_ptr<MKLDNNWorkspace>(new MKLDNNWorkspace(
shape_size(args[1].get_shape()) * args[1].get_element_type().size()));
auto ws_buf_index = mkldnn_emitter->insert_workspace(ws);
//descriptors for reorder operation
auto input_reorder_desc =
mkldnn_emitter->build_memory_descriptor(weights_shape_groups,
args[1].get_element_type(),
mkldnn::memory::format::goihw);
auto result_reorder_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape_groups, args[1].get_element_type(), weights_optimized_format);
auto weights_desc = mkldnn::memory::desc(
mkldnn::memory::dims(weights_shape_groups.begin(),
weights_shape_groups.end()),
mkldnn_utils::get_mkldnn_data_type(args[1].get_element_type()),
weights_optimized_format);
auto prim_indices = mkldnn_emitter->build_group_convolution_forward(
input_reorder_desc, //weights
input_data_desc,
weights_desc,
result_reorder_desc,
result_desc,
convolution->get_window_movement_strides(),
window_dilation_strides_adjusted,
padding_below,
padding_above);
//invoke reorder primitive
{
size_t reorder_index = prim_indices.first;
auto& deps = mkldnn_emitter->get_primitive_deps(reorder_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", "
<< "ctx->mkldnn_workspaces[" << ws_buf_index << "]);\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(reorder_index) << ");\n";
}
//invoke group convolution
{
size_t conv_index = prim_indices.second;
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", "
<< "ctx->mkldnn_workspaces[" << ws_buf_index << "]);\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(conv_index) << ");\n";
}
}
else
{
throw ngraph_error("unsupported parameters for GroupConvolution");
}
}
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::Convolution)
{
......
......@@ -123,6 +123,7 @@
#include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/matmul_bias.hpp"
#include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
......@@ -261,6 +262,7 @@ static const runtime::cpu::OpMap dispatcher{
&runtime::cpu::CPU_Emitter::emit<op::ConvolutionBackpropFilters>},
{TI(ngraph::op::ConvolutionBackpropData),
&runtime::cpu::CPU_Emitter::emit<op::ConvolutionBackpropData>},
{TI(ngraph::op::GroupConvolution), &runtime::cpu::CPU_Emitter::emit<op::GroupConvolution>},
{TI(ngraph::op::ConvolutionBias), &runtime::cpu::CPU_Emitter::emit<op::ConvolutionBias>},
{TI(ngraph::op::ConvolutionRelu), &runtime::cpu::CPU_Emitter::emit<op::ConvolutionRelu>},
{TI(ngraph::op::ConvolutionBiasRelu),
......@@ -342,7 +344,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
pass_manager.register_pass<ngraph::pass::NopElimination>();
pass_manager.register_pass<runtime::cpu::pass::LSTMFusion>();
pass_manager.register_pass<runtime::cpu::pass::RNNFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUBatchDotFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUBatchFusion>();
pass_manager.register_pass<runtime::cpu::pass::ConcatInputs>();
pass_manager.register_pass<ngraph::pass::AlgebraicSimplification>();
pass_manager.register_pass<ngraph::pass::CommonSubexpressionElimination>();
......
......@@ -102,6 +102,63 @@ size_t MKLDNNEmitter::build_memory_primitive(const mkldnn::memory::desc& desc)
new mkldnn::memory({desc, mkldnn_utils::global_cpu_engine}, reinterpret_cast<void*>(0x42)));
}
mkldnn::memory::format MKLDNNEmitter::query_convolution_forward_weight_format(
const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc_any,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& filter_strides,
const ngraph::Strides& window_dilation_strides_adjusted,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above)
{
mkldnn::memory::dims mkldnn_filter_strides(filter_strides.begin(), filter_strides.end());
mkldnn::memory::dims mkldnn_dilated_strides(window_dilation_strides_adjusted.begin(),
window_dilation_strides_adjusted.end());
mkldnn::memory::dims mkldnn_padding_below(padding_below.begin(), padding_below.end());
mkldnn::memory::dims mkldnn_padding_above(padding_above.begin(), padding_above.end());
mkldnn::engine cpu_engine(mkldnn::engine::cpu, 0);
mkldnn::convolution_forward::desc conv_desc_layout(
mkldnn::prop_kind::forward,
mkldnn::algorithm::convolution_direct,
input_data_desc,
weights_desc_any, //this needs to be in default format
result_desc,
mkldnn_filter_strides,
mkldnn_dilated_strides,
mkldnn_padding_below,
mkldnn_padding_above,
mkldnn::padding_kind::zero);
mkldnn::convolution_forward::primitive_desc prim_desc(conv_desc_layout, cpu_engine);
return static_cast<mkldnn::memory::format>(
prim_desc.weights_primitive_desc().desc().data.format);
}
std::pair<size_t, size_t> MKLDNNEmitter::build_group_convolution_forward(
const mkldnn::memory::desc& input_reorder_desc,
const mkldnn::memory::desc& input_conv_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_reorder_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& filter_strides,
const ngraph::Strides& window_dilation_strides_adjusted,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above)
{
size_t reorder_index = this->build_reorder(input_reorder_desc, result_reorder_desc);
size_t conv_index = this->build_convolution_forward(input_conv_desc,
weights_desc,
result_desc,
filter_strides,
window_dilation_strides_adjusted,
padding_below,
padding_above);
return std::make_pair(reorder_index, conv_index);
}
size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
......
......@@ -18,6 +18,7 @@
#include <memory>
#include <unordered_map>
#include <utility>
#include <vector>
#include <mkldnn.hpp>
......@@ -89,6 +90,26 @@ namespace ngraph
const ngraph::CoordinateDiff& padding_above,
const mkldnn::post_ops& pops = mkldnn::post_ops());
mkldnn::memory::format query_convolution_forward_weight_format(
const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc_any,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& filter_strides,
const ngraph::Strides& window_dilation_strides_adjusted,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above);
std::pair<size_t, size_t> build_group_convolution_forward(
const mkldnn::memory::desc& input_reorder_desc,
const mkldnn::memory::desc& input_conv_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_reorder_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& filter_strides,
const ngraph::Strides& window_dilation_strides_adjusted,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above);
size_t
build_convolution_backward_weights(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& delta_desc,
......
......@@ -163,7 +163,12 @@ bool runtime::cpu::mkldnn_utils::IsMKLDNNOp(ngraph::Node& op)
mkldnn::memory::format runtime::cpu::mkldnn_utils::CreateNativeDataFormat(
const ngraph::runtime::cpu::LayoutDescriptor& layout)
{
switch (layout.get_shape().size())
return CreateNativeDataFormat(layout.get_shape());
}
mkldnn::memory::format runtime::cpu::mkldnn_utils::CreateNativeDataFormat(const Shape& shape)
{
switch (shape.size())
{
case 1: return mkldnn::memory::format::x;
case 2: return mkldnn::memory::format::nc;
......
......@@ -35,6 +35,7 @@ namespace ngraph
bool IsMKLDNNOp(ngraph::Node& op);
mkldnn::memory::format
CreateNativeDataFormat(const ngraph::runtime::cpu::LayoutDescriptor& layout);
mkldnn::memory::format CreateNativeDataFormat(const Shape& shape);
const std::string& get_mkldnn_data_type_string(const ngraph::element::Type& type);
mkldnn::memory::data_type get_mkldnn_data_type(const ngraph::element::Type& type);
const std::string& get_mkldnn_format_string(mkldnn::memory::format fmt);
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <numeric>
#include "group_conv.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/util.hpp"
using namespace std;
using namespace ngraph;
op::GroupConvolution::GroupConvolution(const shared_ptr<Node>& data_batch,
const shared_ptr<Node>& filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
size_t groups,
const Shape& output_shape)
: RequiresTensorViewArgs("GroupConvolution", {data_batch, filters})
, m_window_movement_strides(window_movement_strides)
, m_window_dilation_strides(window_dilation_strides)
, m_padding_below(padding_below)
, m_padding_above(padding_above)
, m_data_dilation_strides(data_dilation_strides)
, m_groups(groups)
{
auto& data_batch_et = data_batch->get_element_type();
auto& filters_et = filters->get_element_type();
//
// Make sure data batch and filter element types match.
//
if (data_batch_et != filters_et)
{
throw ngraph_error("Convolution data batch and filter element types do not match");
}
set_value_type_checked(data_batch_et, output_shape);
}
Shape op::GroupConvolution::get_weights_dimensions() const
{
//reshape weights into 5d tensors that includes groups
const size_t OC = 0;
const size_t IC = 1;
Shape weights_shape_groups{get_inputs().at(1).get_shape()};
//adjust output and channel given a number of groups
weights_shape_groups.at(OC) /= get_groups();
weights_shape_groups.at(IC) = get_inputs().at(0).get_shape().at(IC) / get_groups();
//push_front the number of groups
weights_shape_groups.insert(weights_shape_groups.begin(), get_groups());
return weights_shape_groups;
}
shared_ptr<Node> op::GroupConvolution::copy_with_new_args(const NodeVector& new_args) const
{
if (new_args.size() != 2)
{
throw ngraph_error("Incorrect number of new arguments");
}
return make_shared<op::GroupConvolution>(new_args.at(0),
new_args.at(1),
get_window_movement_strides(),
get_window_dilation_strides(),
get_padding_below(),
get_padding_above(),
get_data_dilation_strides(),
get_groups(),
this->get_shape());
}
void op::GroupConvolution::generate_adjoints(autodiff::Adjoints& adjoints, const NodeVector& deltas)
{
throw ngraph_error("NYI");
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/util/requires_tensor_view_args.hpp"
namespace ngraph
{
namespace op
{
/// \brief Group Convolution
class GroupConvolution : public util::RequiresTensorViewArgs
{
public:
GroupConvolution(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
size_t groups,
const Shape& output_shape);
Shape get_weights_dimensions() const;
const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
const Strides& get_window_dilation_strides() const { return m_window_dilation_strides; }
const CoordinateDiff& get_padding_below() const { return m_padding_below; }
const CoordinateDiff& get_padding_above() const { return m_padding_above; }
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
size_t get_groups() const { return m_groups; }
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
void generate_adjoints(autodiff::Adjoints& adjoints, const NodeVector& deltas) override;
protected:
Strides m_window_movement_strides;
Strides m_window_dilation_strides;
CoordinateDiff m_padding_below;
CoordinateDiff m_padding_above;
Strides m_data_dilation_strides;
size_t m_groups = 1;
};
}
}
......@@ -37,6 +37,7 @@
#include "ngraph/runtime/cpu/op/batch_norm_relu.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
#include "ngraph/runtime/cpu/op/rnn.hpp"
......@@ -121,6 +122,33 @@ namespace ngraph
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::GroupConvolution)
{
auto convolution = static_cast<op::GroupConvolution*>(node);
auto arg0_shape = node->get_input_shape(0);
auto arg1_shape = node->get_input_shape(1);
auto result_shape = node->get_output_shape(0);
auto arg0_rank = arg0_shape.size();
auto arg1_rank = arg1_shape.size();
bool data_dilated = false;
for (size_t s : convolution->get_data_dilation_strides())
{
data_dilated = data_dilated || (s != 1);
}
if (!data_dilated && arg0_rank == 4 && arg1_rank == 4 &&
node->get_input_element_type(0) == element::f32)
{
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
convolution->set_op_annotations(op_annotations);
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::ConvolutionRelu)
{
......@@ -566,6 +594,8 @@ static const runtime::cpu::pass::AssignOpMap s_dispatcher{
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::BatchNormBackprop>},
{TI(ngraph::op::Convolution),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Convolution>},
{TI(ngraph::op::GroupConvolution),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::GroupConvolution>},
{TI(ngraph::op::ConvolutionRelu),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::ConvolutionRelu>},
{TI(ngraph::op::ConvolutionBiasRelu),
......
......@@ -43,6 +43,7 @@
#include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
#include "ngraph/runtime/cpu/op/rnn.hpp"
......@@ -239,7 +240,7 @@ namespace ngraph
{
namespace pass
{
template <typename T, bool use_bias>
template <typename T, bool use_bias, bool default_weights_format>
void ConvolutionLayout(std::shared_ptr<ngraph::Node> node,
vector<memory::format>& prim_input_formats,
vector<memory::format>& prim_output_formats)
......@@ -248,6 +249,12 @@ namespace ngraph
auto arg0_shape = node->get_input_shape(0);
auto arg1_shape = node->get_input_shape(1);
if (default_weights_format)
{
arg1_shape = std::dynamic_pointer_cast<ngraph::op::GroupConvolution>(node)
->get_weights_dimensions();
}
auto result_shape = node->get_output_shape(0);
auto filter_strides = convolution->get_window_movement_strides();
auto padding_below = convolution->get_padding_below();
......@@ -282,37 +289,68 @@ namespace ngraph
auto arg2_shape = node->get_input_shape(2);
memory::dims mkldnn_arg2_shape(arg2_shape.begin(), arg2_shape.end());
const memory::desc bias_desc(mkldnn_arg2_shape, et, memory::format::any);
fwd_desc.reset(new convolution_forward::desc(prop_kind::forward,
algorithm::convolution_direct,
input_data_desc,
weights_desc,
bias_desc, // with bias
result_desc,
mkldnn_filter_strides,
mkldnn_dilated_strides,
mkldnn_padding_below,
mkldnn_padding_above,
padding_kind::zero));
try
{
fwd_desc.reset(
new convolution_forward::desc(prop_kind::forward,
algorithm::convolution_direct,
input_data_desc,
weights_desc,
bias_desc, // with bias
result_desc,
mkldnn_filter_strides,
mkldnn_dilated_strides,
mkldnn_padding_below,
mkldnn_padding_above,
padding_kind::zero));
}
catch (const mkldnn::error& e)
{
throw ngraph_error(
"setting layouts on Convolution failed with MKLDNN error: " +
e.message);
}
}
else
{
fwd_desc.reset(new convolution_forward::desc(prop_kind::forward,
algorithm::convolution_direct,
input_data_desc,
weights_desc,
result_desc,
mkldnn_filter_strides,
mkldnn_dilated_strides,
mkldnn_padding_below,
mkldnn_padding_above,
padding_kind::zero));
try
{
fwd_desc.reset(
new convolution_forward::desc(prop_kind::forward,
algorithm::convolution_direct,
input_data_desc,
weights_desc,
result_desc,
mkldnn_filter_strides,
mkldnn_dilated_strides,
mkldnn_padding_below,
mkldnn_padding_above,
padding_kind::zero));
}
catch (const mkldnn::error& e)
{
throw ngraph_error(
"setting layouts on Convolution failed with MKLDNN error: " +
e.message);
}
}
convolution_forward::primitive_desc prim_desc(*fwd_desc, cpu_engine);
prim_input_formats.push_back(static_cast<memory::format>(
prim_desc.src_primitive_desc().desc().data.format));
prim_input_formats.push_back(static_cast<memory::format>(
prim_desc.weights_primitive_desc().desc().data.format));
if (default_weights_format)
{
//note, we need the original shape (4D) while arg_shape1 is redefined
prim_input_formats.push_back(
runtime::cpu::mkldnn_utils::CreateNativeDataFormat(
node->get_input_shape(1)));
}
else
{
prim_input_formats.push_back(static_cast<memory::format>(
prim_desc.weights_primitive_desc().desc().data.format));
}
if (use_bias)
{
prim_input_formats.push_back(static_cast<memory::format>(
......@@ -329,7 +367,27 @@ namespace ngraph
{
vector<memory::format> prim_input_formats;
vector<memory::format> prim_output_formats;
ConvolutionLayout<ngraph::op::Convolution, false>(
ConvolutionLayout<ngraph::op::Convolution, false, false>(
node, prim_input_formats, prim_output_formats);
node =
insert_input_conversions(external_function, node, prim_input_formats);
set_output_layouts(node, prim_output_formats);
}
else
{
set_default_layouts(external_function, node);
}
}
template <>
void CPULayout::LAYOUT_DECL(ngraph::op::GroupConvolution)
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node.get()))
{
vector<memory::format> prim_input_formats;
vector<memory::format> prim_output_formats;
ConvolutionLayout<ngraph::op::GroupConvolution, false, true>(
node, prim_input_formats, prim_output_formats);
node =
......@@ -349,7 +407,7 @@ namespace ngraph
{
vector<memory::format> prim_input_formats;
vector<memory::format> prim_output_formats;
ConvolutionLayout<ngraph::op::ConvolutionBias, true>(
ConvolutionLayout<ngraph::op::ConvolutionBias, true, false>(
node, prim_input_formats, prim_output_formats);
node =
insert_input_conversions(external_function, node, prim_input_formats);
......@@ -368,7 +426,7 @@ namespace ngraph
{
vector<memory::format> prim_input_formats;
vector<memory::format> prim_output_formats;
ConvolutionLayout<ngraph::op::ConvolutionRelu, false>(
ConvolutionLayout<ngraph::op::ConvolutionRelu, false, false>(
node, prim_input_formats, prim_output_formats);
node =
insert_input_conversions(external_function, node, prim_input_formats);
......@@ -387,7 +445,7 @@ namespace ngraph
{
vector<memory::format> prim_input_formats;
vector<memory::format> prim_output_formats;
ConvolutionLayout<ngraph::op::ConvolutionBiasRelu, true>(
ConvolutionLayout<ngraph::op::ConvolutionBiasRelu, true, false>(
node, prim_input_formats, prim_output_formats);
node =
insert_input_conversions(external_function, node, prim_input_formats);
......@@ -1397,6 +1455,8 @@ static const runtime::cpu::pass::LayoutOpMap s_dispatcher{
{TI(ngraph::op::AvgPoolBackprop),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::AvgPoolBackprop>},
{TI(ngraph::op::Convolution), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Convolution>},
{TI(ngraph::op::GroupConvolution),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::GroupConvolution>},
{TI(ngraph::op::ConvolutionBackpropData),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::ConvolutionBackpropData>},
{TI(ngraph::op::ConvolutionBackpropFilters),
......
......@@ -29,10 +29,10 @@
#include "ngraph/op/dot.hpp"
#include "ngraph/op/reshape.hpp"
#include "ngraph/op/slice.hpp"
#include "ngraph/runtime/cpu/op/batch_dot.hpp"
#include "ngraph/pattern/matcher.hpp"
#include "ngraph/pattern/op/label.hpp"
#include "ngraph/runtime/cpu/op/batch_dot.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp"
using namespace ngraph;
......@@ -204,6 +204,105 @@ bool runtime::cpu::pass::CPURnnMatFusion::run_on_function(std::shared_ptr<Functi
#define TI(x) std::type_index(typeid(x))
std::shared_ptr<Node> set_or_check_if_same(std::shared_ptr<Node> oldn, std::shared_ptr<Node> newn)
{
if (!oldn)
{
return newn;
}
else
{
if (oldn != newn)
{
NGRAPH_DEBUG << " different data nodes";
return nullptr;
}
return oldn;
}
}
static bool is_trivial_convolution(std::shared_ptr<op::Convolution> conv)
{
Strides stride_1{1, 1};
CoordinateDiff pad_0{0, 0};
return conv->get_window_dilation_strides() == stride_1 ||
conv->get_data_dilation_strides() == stride_1 || conv->get_padding_above() == pad_0 ||
conv->get_padding_below() == pad_0;
}
std::shared_ptr<Node> fuse_group_convolution(const std::shared_ptr<Node>& n)
{
Shape win_size_1{1, 1, 1, 1};
auto data_label = std::make_shared<pattern::op::Label>(element::f32, Shape{1, 4, 9});
auto weights_label = std::make_shared<pattern::op::Label>(element::f32, Shape{4, 2, 3});
auto slice_data = std::make_shared<op::Slice>(
data_label, Coordinate{0, 0, 0}, Coordinate{1, 2, 9}, Strides{1, 1, 1});
auto slice_weights = std::make_shared<op::Slice>(
weights_label, Coordinate{0, 0, 0}, Coordinate{2, 2, 3}, Strides{1, 1, 1});
auto conv = std::make_shared<op::Convolution>(slice_data, slice_weights);
auto matcher = std::make_shared<pattern::Matcher>(conv, nullptr);
NGRAPH_DEBUG << "In simplify_concat (group convolution) for " << n->get_name();
std::shared_ptr<Node> data;
std::shared_ptr<Node> weights;
auto concat = std::dynamic_pointer_cast<op::Concat>(n);
std::shared_ptr<op::Convolution> sconv;
const size_t CHANNEL = 1;
if (concat->get_concatenation_axis() != CHANNEL)
{
NGRAPH_DEBUG << "concatenating on an axis different from channel";
return {nullptr};
}
for (auto arg : n->get_arguments())
{
if (!matcher->match(arg))
{
NGRAPH_DEBUG << arg->get_name() << " doesn't match";
return {nullptr};
}
sconv = std::dynamic_pointer_cast<op::Convolution>(arg);
if (arg->get_input_shape(0).size() != 4)
{
NGRAPH_DEBUG << "convolution data's rank isn't equal to 4";
return {nullptr};
}
if (!is_trivial_convolution(std::dynamic_pointer_cast<op::Convolution>(arg)))
{
NGRAPH_DEBUG << arg->get_name() << " isn't trivial convolution";
return {nullptr};
}
auto pattern_map = matcher->get_pattern_map();
data = set_or_check_if_same(data, pattern_map[data_label]);
weights = set_or_check_if_same(weights, pattern_map[weights_label]);
if (!data || !weights)
{
NGRAPH_DEBUG << "data or weights nodes are different among slices";
return {nullptr};
}
}
auto new_conv = std::make_shared<op::GroupConvolution>(data,
weights,
sconv->get_window_movement_strides(),
sconv->get_window_dilation_strides(),
sconv->get_padding_below(),
sconv->get_padding_above(),
sconv->get_data_dilation_strides(),
n->get_arguments().size(),
n->get_shape());
return new_conv;
}
std::shared_ptr<Node> fuse_batch_dot(const std::shared_ptr<Node>& n)
{
const int num_op_branches = 2;
......@@ -279,7 +378,7 @@ std::shared_ptr<Node> fuse_batch_dot(const std::shared_ptr<Node>& n)
return {nullptr};
}
bool runtime::cpu::pass::CPUBatchDotFusion::run_on_function(std::shared_ptr<Function> func)
bool runtime::cpu::pass::CPUBatchFusion::run_on_function(std::shared_ptr<Function> func)
{
bool modified = false;
......@@ -294,6 +393,11 @@ bool runtime::cpu::pass::CPUBatchDotFusion::run_on_function(std::shared_ptr<Func
func->replace_node(n, fused_node);
modified = true;
}
else if (auto fused_conv = fuse_group_convolution(n))
{
func->replace_node(n, fused_conv);
modified = true;
}
}
}
return modified;
......
......@@ -31,7 +31,7 @@ namespace ngraph
public:
bool run_on_function(std::shared_ptr<ngraph::Function> function) override;
};
class CPUBatchDotFusion : public ngraph::pass::FunctionPass
class CPUBatchFusion : public ngraph::pass::FunctionPass
{
public:
bool run_on_function(std::shared_ptr<ngraph::Function> function) override;
......
......@@ -27,6 +27,7 @@
#include "ngraph/log.hpp"
#include "ngraph/ngraph.hpp"
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/max_pool.hpp"
#include "ngraph/op/negative.hpp"
......@@ -47,6 +48,7 @@
#include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/matmul_bias.hpp"
#include "ngraph/runtime/cpu/op/rnn.hpp"
......@@ -69,6 +71,8 @@
#include "util/random.hpp"
#include "util/test_tools.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view.hpp"
using namespace ngraph;
using namespace std;
......@@ -1403,6 +1407,124 @@ TEST(cpu_fusion, batch_norm_folding)
EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0)));
}
TEST(cpu_fusion, group_convolution_fusion)
{
Shape shape_a{1, 32, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_b{2, 16, 1, 1};
auto B = make_shared<op::Parameter>(element::f32, shape_b);
Shape shape_r{1, 2, 2, 2};
auto a_slice0 = std::make_shared<op::Slice>(A, Coordinate{0, 0, 0, 0}, Coordinate{1, 16, 2, 2});
auto a_slice1 =
std::make_shared<op::Slice>(A, Coordinate{0, 16, 0, 0}, Coordinate{1, 32, 2, 2});
auto b_slice0 = std::make_shared<op::Slice>(B, Coordinate{0, 0, 0, 0}, Coordinate{1, 16, 1, 1});
auto b_slice1 = std::make_shared<op::Slice>(B, Coordinate{1, 0, 0, 0}, Coordinate{2, 16, 1, 1});
auto conv_lower = make_shared<op::Convolution>(a_slice0,
b_slice0,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto conv_upper = make_shared<op::Convolution>(a_slice1,
b_slice1,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto concat = make_shared<op::Concat>(NodeVector{conv_lower, conv_upper}, 1);
auto f = make_shared<Function>(NodeVector{concat}, op::ParameterVector{A, B});
pass::Manager pass_manager;
pass_manager.register_pass<pass::VisualizeTree>("before_group.pdf");
pass_manager.register_pass<runtime::cpu::pass::CPUBatchFusion>();
pass_manager.register_pass<pass::VisualizeTree>("after_group.pdf");
pass_manager.run_passes(f);
auto gc =
std::dynamic_pointer_cast<op::GroupConvolution>(f->get_results().at(0)->get_argument(0));
ASSERT_TRUE(gc);
}
TEST(cpu_fusion, group_convolution)
{
auto backend = runtime::Backend::create("CPU");
test::Uniform<float> rng(2.0f, 10.0f);
const size_t GROUPS = 2;
Shape shape_a{1, 32, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_b{2, 16, 1, 1};
auto B = make_shared<op::Parameter>(element::f32, shape_b);
Shape shape_r{1, 2, 2, 2};
auto group_conv = make_shared<op::GroupConvolution>(A,
B,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1},
GROUPS,
shape_r);
Shape shape_c{1, 16, 2, 2};
auto C = make_shared<op::Parameter>(element::f32, shape_c);
Shape shape_d{1, 16, 1, 1};
auto D = make_shared<op::Parameter>(element::f32, shape_d);
auto conv_lower = make_shared<op::Convolution>(C,
D,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto E = make_shared<op::Parameter>(element::f32, shape_c);
auto F = make_shared<op::Parameter>(element::f32, shape_d);
auto conv_upper = make_shared<op::Convolution>(E,
F,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto f = make_shared<Function>(NodeVector{group_conv, conv_lower, conv_upper},
op::ParameterVector{A, B, C, D, E, F});
auto a_ = rng.initialize(backend->create_tensor(element::f32, shape_a));
auto b_ = rng.initialize(backend->create_tensor(element::f32, shape_b));
vector<float> rv(shape_size(shape_r), 0);
auto group_result = std::dynamic_pointer_cast<ngraph::runtime::cpu::CPUTensorView>(
backend->create_tensor(element::f32, shape_r, rv.data()));
auto av = read_vector<float>(a_);
auto bv = read_vector<float>(b_);
auto c_ = backend->create_tensor(element::f32, shape_c, av.data()); //lower data
auto d_ = backend->create_tensor(element::f32, shape_d, bv.data()); //upper data
auto e_ =
backend->create_tensor(element::f32, shape_c, av.data() + av.size() / 2); //lower weights
auto f_ =
backend->create_tensor(element::f32, shape_d, bv.data() + bv.size() / 2); //upper weights
Shape shape_ur{1, 1, 2, 2};
//allocate a contigious storage for both lower and upper halves.
vector<float> erv(shape_size(shape_r), 0);
auto lower_result = std::dynamic_pointer_cast<ngraph::runtime::cpu::CPUTensorView>(
backend->create_tensor(element::f32, shape_ur, erv.data()));
auto upper_result = std::dynamic_pointer_cast<ngraph::runtime::cpu::CPUTensorView>(
backend->create_tensor(element::f32, shape_ur, erv.data() + erv.size() / 2));
backend->call(f, {group_result, lower_result, upper_result}, {a_, b_, c_, d_, e_, f_});
ASSERT_EQ(rv, erv);
}
TEST(cpu_fusion, rnn_fprop_1_lstm_cell)
{
auto src_layer = make_shared<op::Parameter>(element::f32, Shape{10, 100});
......@@ -2040,7 +2162,7 @@ TEST(cpu_fusion, sigmoid_multiply_fusion_backward)
TEST(cpu_fusion, fuse_batch_dot)
{
pass::Manager pass_manager;
pass_manager.register_pass<runtime::cpu::pass::CPUBatchDotFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUBatchFusion>();
const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/batch_dot_3.json");
const string json_string = file_util::read_file_to_string(json_path);
stringstream ss(json_string);
......@@ -2053,7 +2175,7 @@ TEST(cpu_fusion, fuse_batch_dot)
TEST(cpu_fusion, fuse_batch_dot_forward)
{
pass::Manager pass_manager;
pass_manager.register_pass<runtime::cpu::pass::CPUBatchDotFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUBatchFusion>();
const std::string file_name("mxnet/batch_dot_3.json");
auto cpu_f = make_function(file_name);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment