Commit 1a73f10c authored by Nishant Patel's avatar Nishant Patel Committed by Scott Cyphers

Support concat with mkldnn and add a test case (#825)

* Support Concat with mkldnn (two inputs)

* Support concat with mkldnn (multiple inputs)

* Address feedback

* Remove unused variable

* Allow rank two tensor to mkldnn for concat & add a test case for 2D inputs

* Add mkldnn_any layout to concat

* Make API changes to get consistent with master
parent 85ba0160
...@@ -851,25 +851,70 @@ namespace ngraph ...@@ -851,25 +851,70 @@ namespace ngraph
} }
} }
#else #else
auto axis =
(dynamic_cast<const ngraph::op::Concat*>(node))->get_concatenation_axis();
std::vector<std::string> arg_names; if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
std::vector<Shape> arg_shapes;
for (auto arg : args)
{ {
arg_names.push_back(arg.get_name()); std::vector<mkldnn::memory::format> inputs_format;
arg_shapes.push_back(arg.get_shape()); std::vector<mkldnn::memory::desc> inputs_data_desc;
for (size_t i = 0; i < args.size(); i++)
{
inputs_format.push_back(
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node, i));
}
auto result_format =
runtime::cpu::mkldnn_utils::get_output_mkldnn_format(node, 0);
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
for (size_t i = 0; i < args.size(); i++)
{
inputs_data_desc.push_back(
mkldnn_emitter->build_memory_descriptor(args[i], inputs_format[i]));
}
auto result_desc =
mkldnn_emitter->build_memory_descriptor(out[0], result_format);
size_t concat_index = 0;
size_t concat_dim =
(dynamic_cast<const ngraph::op::Concat*>(node))->get_concatenation_axis();
concat_index =
mkldnn_emitter->build_concat(inputs_data_desc, result_desc, concat_dim);
auto& deps = mkldnn_emitter->get_primitive_deps(concat_index);
size_t i;
for (i = 0; i < args.size(); i++)
{
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[i])
<< ", " << args[i].get_name() << ");\n";
}
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[i])
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(concat_index) << ");\n";
} }
else
{
auto axis =
(dynamic_cast<const ngraph::op::Concat*>(node))->get_concatenation_axis();
kernel::emit_concat(writer, std::vector<std::string> arg_names;
args[0].get_element_type().c_type_string(), std::vector<Shape> arg_shapes;
arg_names,
out[0].get_name(), for (auto arg : args)
arg_shapes, {
result_shape, arg_names.push_back(arg.get_name());
axis); arg_shapes.push_back(arg.get_shape());
}
kernel::emit_concat(writer,
args[0].get_element_type().c_type_string(),
arg_names,
out[0].get_name(),
arg_shapes,
result_shape,
axis);
}
#endif #endif
} }
......
...@@ -678,3 +678,41 @@ size_t MKLDNNEmitter::build_batchnorm_backward(const mkldnn::memory::desc& weigh ...@@ -678,3 +678,41 @@ size_t MKLDNNEmitter::build_batchnorm_backward(const mkldnn::memory::desc& weigh
dweights_index}; dweights_index};
return batchnorm_index; return batchnorm_index;
} }
size_t MKLDNNEmitter::build_concat(const std::vector<mkldnn::memory::desc>& inputs_data_desc,
const mkldnn::memory::desc& result_desc,
const size_t concat_dim)
{
std::vector<mkldnn::memory::primitive::at> inputs_primitive;
std::vector<size_t> inputs_data_index;
std::vector<size_t> in_out_index;
std::vector<mkldnn::memory::primitive_desc> inputs_pd;
for (size_t i = 0; i < inputs_data_desc.size(); i++)
{
inputs_pd.push_back(mkldnn::memory::primitive_desc(
inputs_data_desc[i], runtime::cpu::mkldnn_utils::global_cpu_engine));
}
for (size_t i = 0; i < inputs_data_desc.size(); i++)
{
inputs_data_index.push_back(build_memory_primitive(inputs_data_desc[i]));
inputs_primitive.push_back(*m_mkldnn_primitives[inputs_data_index[i]]);
}
size_t result_index = build_memory_primitive(result_desc);
// concat primtive descriptor
mkldnn::concat::primitive_desc concat_pd =
mkldnn::concat::primitive_desc(result_desc, static_cast<int>(concat_dim), inputs_pd);
// concat primitive
size_t concat_index = insert_primitive(
new mkldnn::concat(concat_pd, inputs_primitive, *m_mkldnn_primitives[result_index]));
for (size_t i = 0; i < inputs_data_index.size(); i++)
{
in_out_index.push_back(inputs_data_index[i]);
}
in_out_index.push_back(result_index);
m_primitive_deps[concat_index] = in_out_index;
return concat_index;
}
...@@ -184,6 +184,10 @@ namespace ngraph ...@@ -184,6 +184,10 @@ namespace ngraph
const mkldnn::memory::desc& dweights_desc, const mkldnn::memory::desc& dweights_desc,
const double eps); const double eps);
size_t build_concat(const std::vector<mkldnn::memory::desc>& inputs_data_desc,
const mkldnn::memory::desc& result_desc,
const size_t concat_dim);
private: private:
std::vector<mkldnn::primitive*> m_mkldnn_primitives; std::vector<mkldnn::primitive*> m_mkldnn_primitives;
std::vector<mkldnn::stream> m_mkldnn_streams; std::vector<mkldnn::stream> m_mkldnn_streams;
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "ngraph/op/add.hpp" #include "ngraph/op/add.hpp"
#include "ngraph/op/avg_pool.hpp" #include "ngraph/op/avg_pool.hpp"
#include "ngraph/op/batch_norm.hpp" #include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/convolution.hpp" #include "ngraph/op/convolution.hpp"
#include "ngraph/op/max_pool.hpp" #include "ngraph/op/max_pool.hpp"
#include "ngraph/op/relu.hpp" #include "ngraph/op/relu.hpp"
...@@ -73,6 +74,22 @@ namespace ngraph ...@@ -73,6 +74,22 @@ namespace ngraph
} }
} }
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::Concat)
{
auto concat = static_cast<op::Concat*>(node);
if (node->get_input_element_type(0) == element::f32 &&
((node->get_input_shape(0)).size() == 4 ||
(node->get_input_shape(0)).size() == 2))
{
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
concat->set_op_annotations(op_annotations);
}
}
template <> template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::Convolution) void CPUAssignment::ASSIGN_DECL(ngraph::op::Convolution)
{ {
...@@ -412,6 +429,7 @@ namespace ngraph ...@@ -412,6 +429,7 @@ namespace ngraph
static const runtime::cpu::pass::AssignOpMap s_dispatcher{ static const runtime::cpu::pass::AssignOpMap s_dispatcher{
{TI(ngraph::op::Add), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Add>}, {TI(ngraph::op::Add), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Add>},
{TI(ngraph::op::Concat), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Concat>},
{TI(ngraph::op::AvgPool), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::AvgPool>}, {TI(ngraph::op::AvgPool), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::AvgPool>},
{TI(ngraph::op::AvgPoolBackprop), {TI(ngraph::op::AvgPoolBackprop),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::AvgPoolBackprop>}, &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::AvgPoolBackprop>},
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "ngraph/op/add.hpp" #include "ngraph/op/add.hpp"
#include "ngraph/op/avg_pool.hpp" #include "ngraph/op/avg_pool.hpp"
#include "ngraph/op/batch_norm.hpp" #include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/convolution.hpp" #include "ngraph/op/convolution.hpp"
#include "ngraph/op/get_output_element.hpp" #include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/max_pool.hpp" #include "ngraph/op/max_pool.hpp"
...@@ -1168,6 +1169,72 @@ namespace ngraph ...@@ -1168,6 +1169,72 @@ namespace ngraph
set_default_layouts(external_function, node); set_default_layouts(external_function, node);
} }
} }
template <>
void CPULayout::LAYOUT_DECL(ngraph::op::Concat)
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node.get()))
{
auto concat = static_cast<const ngraph::op::Concat*>(node.get());
auto input0_layout =
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node.get(), 0);
size_t num_inputs = node->get_input_size();
size_t concat_dim = concat->get_concatenation_axis();
auto result_shape = node->get_output_shape(0);
memory::data_type et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type(
node->get_input_element_type(0));
memory::dims mkldnn_result_shape(result_shape.begin(), result_shape.end());
auto result_desc =
memory::desc(mkldnn_result_shape, et, memory::format::any);
std::vector<mkldnn::memory::format> inputs_format;
std::vector<mkldnn::memory::desc> inputs_data_desc;
std::vector<mkldnn::memory::primitive_desc> inputs_pd;
vector<TensorViewWrapper> in;
for (const descriptor::Input& input : node->get_inputs())
{
const descriptor::Output& output = input.get_output();
shared_ptr<descriptor::TensorView> tv = output.get_tensor_view();
in.push_back(TensorViewWrapper(tv, "None"));
}
for (size_t i = 0; i < num_inputs; i++)
{
inputs_format.push_back(
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(concat, i));
}
for (size_t i = 0; i < num_inputs; i++)
{
inputs_data_desc.push_back(mkldnn::memory::desc(
mkldnn::memory::dims(in[i].get_shape().begin(),
in[i].get_shape().end()),
mkldnn_utils::get_mkldnn_data_type(in[i].get_element_type()),
inputs_format[i]));
}
for (size_t i = 0; i < inputs_data_desc.size(); i++)
{
inputs_pd.push_back(mkldnn::memory::primitive_desc(
inputs_data_desc[i],
runtime::cpu::mkldnn_utils::global_cpu_engine));
}
auto prim_desc = concat::primitive_desc(
result_desc, static_cast<int>(concat_dim), inputs_pd);
vector<memory::format> prim_input_formats;
vector<memory::format> prim_output_formats;
for (size_t i = 0; i < num_inputs; i++)
{
prim_input_formats.push_back(input0_layout);
}
prim_output_formats.push_back(static_cast<memory::format>(
prim_desc.dst_primitive_desc().desc().data.format));
node =
insert_input_conversions(external_function, node, prim_input_formats);
set_output_layouts(node, prim_output_formats);
}
else
{
set_default_layouts(external_function, node);
}
}
} }
} }
} }
...@@ -1177,6 +1244,7 @@ namespace ngraph ...@@ -1177,6 +1244,7 @@ namespace ngraph
static const runtime::cpu::pass::LayoutOpMap s_dispatcher{ static const runtime::cpu::pass::LayoutOpMap s_dispatcher{
{TI(ngraph::op::Add), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Add>}, {TI(ngraph::op::Add), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Add>},
{TI(ngraph::op::Concat), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Concat>},
{TI(ngraph::op::AvgPool), &runtime::cpu::pass::CPULayout::layout<ngraph::op::AvgPool>}, {TI(ngraph::op::AvgPool), &runtime::cpu::pass::CPULayout::layout<ngraph::op::AvgPool>},
{TI(ngraph::op::AvgPoolBackprop), {TI(ngraph::op::AvgPoolBackprop),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::AvgPoolBackprop>}, &runtime::cpu::pass::CPULayout::layout<ngraph::op::AvgPoolBackprop>},
......
...@@ -417,6 +417,56 @@ TEST(${BACKEND_NAME}, concat_vector) ...@@ -417,6 +417,56 @@ TEST(${BACKEND_NAME}, concat_vector)
EXPECT_EQ((vector<float>{2, 4, 8, 16, 1, 2, 4, 8, 16, 32, 18, 19}), read_vector<float>(result)); EXPECT_EQ((vector<float>{2, 4, 8, 16, 1, 2, 4, 8, 16, 32, 18, 19}), read_vector<float>(result));
} }
TEST(${BACKEND_NAME}, concat_4d_tensor)
{
Shape shape{1, 1, 1, 1};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto C = make_shared<op::Parameter>(element::f32, shape);
Shape shape_r{3, 1, 1, 1};
auto f = make_shared<Function>(make_shared<op::Concat>(NodeVector{A, B, C}, 0),
op::ParameterVector{A, B, C});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{1});
auto b = backend->create_tensor(element::f32, shape);
copy_data(b, vector<float>{2});
auto c = backend->create_tensor(element::f32, shape);
copy_data(c, vector<float>{3});
auto result = backend->create_tensor(element::f32, shape_r);
backend->call(f, {result}, {a, b, c});
EXPECT_EQ((vector<float>{1, 2, 3}), read_vector<float>(result));
}
TEST(${BACKEND_NAME}, concat_2d_tensor)
{
Shape shape{1, 1};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto C = make_shared<op::Parameter>(element::f32, shape);
Shape shape_r{3, 1};
auto f = make_shared<Function>(make_shared<op::Concat>(NodeVector{A, B, C}, 0),
op::ParameterVector{A, B, C});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{1});
auto b = backend->create_tensor(element::f32, shape);
copy_data(b, vector<float>{2});
auto c = backend->create_tensor(element::f32, shape);
copy_data(c, vector<float>{3});
auto result = backend->create_tensor(element::f32, shape_r);
backend->call(f, {result}, {a, b, c});
EXPECT_EQ((vector<float>{1, 2, 3}), read_vector<float>(result));
}
// from numpy import * // from numpy import *
// a=linspace(1,2*3*4*3*2,2*3*4*3*2) // a=linspace(1,2*3*4*3*2,2*3*4*3*2)
// b=linspace(1000+1,1000+2*3*3*3*2,2*3*3*3*2) // b=linspace(1000+1,1000+2*3*3*3*2,2*3*3*3*2)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment