Commit e8b5d11b authored by Amy Zhuang's avatar Amy Zhuang Committed by Robert Kimball

Create mkldnn primitives at first iteration for codegen - part1. (#2806)

* Create mkldnn primitives at first iteration for CODEGEN.

 OPs: add, lstm, and rnn.

*  OPs: batchnorm.

*  OPs: concat and lrn.

Remove dead code.

* Skip in place concat, relu, reshape, and slice when building node_primitive_string_deps_index map.

* Change NGRAPH_ASSERT to NGRAPH_CHECK.

* Ops: Qconv

* Ops: Convs

* Address PR Feedback.

* Dynamic scale support for qconvs

* updating to amy's recent change

* GroupConv and Cleaning dead code

* Address PR Feedback.

* Remove unused variable.

* Fix a bug.

* Fix style error.
parent d77ace68
...@@ -62,7 +62,7 @@ namespace ngraph ...@@ -62,7 +62,7 @@ namespace ngraph
auto lstm_desc = auto lstm_desc =
mkldnn_emitter->get_rnn_forward_desc<ngraph::op::Lstm>(node, args, out); mkldnn_emitter->get_rnn_forward_desc<ngraph::op::Lstm>(node, args, out);
// Lstm needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias, // Lstm needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias,
// dst_layer, dst_iter, and rnn_forward. // dst_layer, dst_iter, workspace, and rnn_forward.
// It needs a new workspace. // It needs a new workspace.
auto lstm_index = auto lstm_index =
mkldnn_emitter->reserve_primitive_space(9, true /* new workspace */); mkldnn_emitter->reserve_primitive_space(9, true /* new workspace */);
......
...@@ -57,7 +57,7 @@ namespace ngraph ...@@ -57,7 +57,7 @@ namespace ngraph
auto rnn_desc = auto rnn_desc =
mkldnn_emitter->get_rnn_forward_desc<ngraph::op::Rnn>(node, args, out); mkldnn_emitter->get_rnn_forward_desc<ngraph::op::Rnn>(node, args, out);
// Rnn needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias, // Rnn needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias,
// dst_layer, dst_iter, and rnn_forward. // dst_layer, dst_iter, workspace, and rnn_forward.
// It needs a new workspace. // It needs a new workspace.
auto rnn_index = auto rnn_index =
mkldnn_emitter->reserve_primitive_space(9, true /* new workspace */); mkldnn_emitter->reserve_primitive_space(9, true /* new workspace */);
......
...@@ -155,25 +155,41 @@ namespace ngraph ...@@ -155,25 +155,41 @@ namespace ngraph
{ {
namespace cpu namespace cpu
{ {
static void emit_build_primitives(CPU_ExternalFunction* external_function,
const ngraph::Node* node,
CodeWriter& writer,
size_t& index,
std::vector<std::size_t>& deps)
{
writer << "if (ctx->first_iteration)\n";
writer.block_begin();
// get the string, deps, and index from the map
writer << get<0>(external_function->get_primitive_build_tuple(node));
writer.block_end();
deps = get<1>(external_function->get_primitive_build_tuple(node));
index = get<2>(external_function->get_primitive_build_tuple(node));
}
template <> template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::Add) void CPU_Emitter::EMITTER_DECL(ngraph::op::Add)
{ {
writer.block_begin(); writer.block_begin();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); size_t add_index;
size_t add_index = external_function->get_primitive_index(node); std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, add_index, deps);
auto& deps = mkldnn_emitter->get_primitive_deps(add_index); writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) << args[0].get_name() << ");\n";
<< ", " << args[0].get_name() << ");\n"; writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) << args[1].get_name() << ");\n";
<< ", " << args[1].get_name() << ");\n"; writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) << out[0].get_name() << ");\n";
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, " writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(add_index) << ");\n";
<< to_string(add_index) << ");\n";
} }
else else
{ {
...@@ -517,56 +533,55 @@ namespace ngraph ...@@ -517,56 +533,55 @@ namespace ngraph
throw ngraph_error( throw ngraph_error(
"Lstm op doesnt have the required number of inputs to emit MKLDNN kernel"); "Lstm op doesnt have the required number of inputs to emit MKLDNN kernel");
} }
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto lstm_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(lstm_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) << ", " size_t lstm_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, lstm_index, deps);
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n"; << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) << ", " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[1].get_name() << ");\n"; << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) << ", " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< args[2].get_name() << ");\n"; << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3]) << ", " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< args[3].get_name() << ");\n"; << args[3].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[4]) << ", " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[4]) << ", "
<< args[4].get_name() << ");\n"; << args[4].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[5]) << ", " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[5]) << ", "
<< out[0].get_name() << ");\n"; << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[6]) << ", " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[6]) << ", "
<< out[1].get_name() << ");\n"; << out[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[7]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[7])
<< ", ctx->mkldnn_workspaces[" << deps[8] << "]);\n"; << ", cg_ctx->mkldnn_workspaces[" << deps[8] << "]);\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, " writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(lstm_index) << ");\n";
<< to_string(lstm_index) << ");\n";
} }
template <> template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::Rnn) void CPU_Emitter::EMITTER_DECL(ngraph::op::Rnn)
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); size_t rnn_index;
auto rnn_index = external_function->get_primitive_index(node); std::vector<std::size_t> deps;
auto& deps = mkldnn_emitter->get_primitive_deps(rnn_index); emit_build_primitives(external_function, node, writer, rnn_index, deps);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) << ", " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n"; << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) << ", " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[1].get_name() << ");\n"; << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) << ", " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< args[2].get_name() << ");\n"; << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3]) << ", " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< args[3].get_name() << ");\n"; << args[3].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[4]) << ", " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[4]) << ", "
<< args[4].get_name() << ");\n"; << args[4].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[5]) << ", " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[5]) << ", "
<< out[0].get_name() << ");\n"; << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[6]) << ", " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[6]) << ", "
<< out[1].get_name() << ");\n"; << out[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[7]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[7])
<< ", ctx->mkldnn_workspaces[" << deps[8] << "]);\n"; << ", cg_ctx->mkldnn_workspaces[" << deps[8] << "]);\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, " << to_string(rnn_index) writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(rnn_index) << ");\n";
<< ");\n";
} }
template <typename T> template <typename T>
...@@ -588,52 +603,41 @@ namespace ngraph ...@@ -588,52 +603,41 @@ namespace ngraph
<< args[1].get_name() << ", " << args[1].get_name() << ", "
<< args[1].get_size() * args[1].get_element_type().size() << ");\n"; << args[1].get_size() * args[1].get_element_type().size() << ");\n";
const float ops_scale = 1.f; size_t batchnorm_index;
const float ops_alpha = -0.f; // relu negative slope std::vector<std::size_t> deps;
const float ops_beta = 0.f; emit_build_primitives(external_function, node, writer, batchnorm_index, deps);
mkldnn::post_ops ops;
if (append_relu)
{
ops.append_eltwise(
ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
}
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto batchnorm_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index);
if (training && args.size() == 3) if (training && args.size() == 3)
{ {
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< ", " << args[2].get_name() << ");\n"; << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1])
<< ", bn_weights.data());\n"; << ", bn_weights.data());\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< ", " << out[0].get_name() << ");\n"; << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< ", " << out[1].get_name() << ");\n"; << out[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[4]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[4]) << ", "
<< ", " << out[2].get_name() << ");\n"; << out[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, " writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(batchnorm_index)
<< to_string(batchnorm_index) << ");\n"; << ");\n";
} }
else else
{ {
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< ", " << args[2].get_name() << ");\n"; << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< ", " << args[3].get_name() << ");\n"; << args[3].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< ", " << args[4].get_name() << ");\n"; << args[4].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3])
<< ", bn_weights.data());\n"; << ", bn_weights.data());\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[4]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[4]) << ", "
<< ", " << out[0].get_name() << ");\n"; << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, " writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(batchnorm_index)
<< to_string(batchnorm_index) << ");\n"; << ");\n";
} }
writer.block_end(); writer.block_end();
} }
...@@ -741,27 +745,27 @@ namespace ngraph ...@@ -741,27 +745,27 @@ namespace ngraph
<< args[1].get_name() << ", " << args[1].get_name() << ", "
<< args[1].get_size() * args[1].get_element_type().size() << ");\n"; << args[1].get_size() * args[1].get_element_type().size() << ");\n";
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); size_t batchnorm_index;
auto batchnorm_index = external_function->get_primitive_index(node); std::vector<std::size_t> deps;
auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index); emit_build_primitives(external_function, node, writer, batchnorm_index, deps);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0])
<< ", bn_weights.data());\n"; << ", bn_weights.data());\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) << ", " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[2].get_name() << ");\n"; << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) << ", " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< args[3].get_name() << ");\n"; << args[3].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3]) << ", " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< args[4].get_name() << ");\n"; << args[4].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[4]) << ", " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[4]) << ", "
<< args[5].get_name() << ");\n"; << args[5].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[5]) << ", " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[5]) << ", "
<< out[0].get_name() << ");\n"; << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[6]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[6])
<< ", bn_dweights.data());\n"; << ", bn_dweights.data());\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, " writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(batchnorm_index)
<< to_string(batchnorm_index) << ");\n"; << ");\n";
writer << "memcpy(" << out[1].get_name() << ", &bn_dweights[0], " writer << "memcpy(" << out[1].get_name() << ", &bn_dweights[0], "
<< args[0].get_size() * args[0].get_element_type().size() << ");\n"; << args[0].get_size() * args[0].get_element_type().size() << ");\n";
...@@ -982,21 +986,21 @@ namespace ngraph ...@@ -982,21 +986,21 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); size_t concat_index;
size_t concat_index = external_function->get_primitive_index(node); std::vector<std::size_t> deps;
auto& deps = mkldnn_emitter->get_primitive_deps(concat_index); emit_build_primitives(external_function, node, writer, concat_index, deps);
size_t i; size_t i;
for (i = 0; i < args.size(); i++) for (i = 0; i < args.size(); i++)
{ {
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[i]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[i]) << ", "
<< ", " << args[i].get_name() << ");\n"; << args[i].get_name() << ");\n";
} }
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[i]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[i]) << ", "
<< ", " << out[0].get_name() << ");\n"; << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, " writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(concat_index)
<< to_string(concat_index) << ");\n"; << ");\n";
} }
else else
{ {
...@@ -1150,17 +1154,16 @@ namespace ngraph ...@@ -1150,17 +1154,16 @@ namespace ngraph
writer.block_begin(); writer.block_begin();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); size_t lrn_index;
auto lrn_index = external_function->get_primitive_index(node); std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, lrn_index, deps);
auto& deps = mkldnn_emitter->get_primitive_deps(lrn_index); writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) << args[0].get_name() << ");\n";
<< ", " << args[0].get_name() << ");\n"; writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) << out[0].get_name() << ");\n";
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, " writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(lrn_index) << ");\n";
<< to_string(lrn_index) << ");\n";
} }
else else
{ {
...@@ -1984,18 +1987,17 @@ namespace ngraph ...@@ -1984,18 +1987,17 @@ namespace ngraph
{ {
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); size_t conv_index;
auto conv_index = external_function->get_primitive_index(node); std::vector<std::size_t> deps;
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< ", " << args[0].get_name() << ");\n"; << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< ", " << args[1].get_name() << ");\n"; << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< ", " << out[0].get_name() << ");\n"; << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, " writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
<< to_string(conv_index) << ");\n";
} }
} }
...@@ -2004,18 +2006,17 @@ namespace ngraph ...@@ -2004,18 +2006,17 @@ namespace ngraph
{ {
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); size_t conv_index;
auto conv_index = external_function->get_primitive_index(node); std::vector<std::size_t> deps;
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< ", " << args[0].get_name() << ");\n"; << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< ", " << args[1].get_name() << ");\n"; << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< ", " << out[0].get_name() << ");\n"; << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, " writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
<< to_string(conv_index) << ");\n";
} }
else else
{ {
...@@ -2028,18 +2029,17 @@ namespace ngraph ...@@ -2028,18 +2029,17 @@ namespace ngraph
{ {
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); size_t conv_index;
auto conv_index = external_function->get_primitive_index(node); std::vector<std::size_t> deps;
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< ", " << args[0].get_name() << ");\n"; << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< ", " << args[1].get_name() << ");\n"; << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< ", " << out[0].get_name() << ");\n"; << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, " writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
<< to_string(conv_index) << ");\n";
} }
else else
{ {
...@@ -2050,26 +2050,20 @@ namespace ngraph ...@@ -2050,26 +2050,20 @@ namespace ngraph
template <> template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::GroupConvolution) void CPU_Emitter::EMITTER_DECL(ngraph::op::GroupConvolution)
{ {
auto arg0_shape = args[0].get_shape();
auto arg1_shape = args[1].get_shape();
auto result_shape = out[0].get_shape();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
// invoke group convolution // invoke group convolution
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); size_t conv_index;
size_t conv_index = external_function->get_primitive_index(node); std::vector<std::size_t> deps;
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< ", " << args[0].get_name() << ");\n"; << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< ", " << args[1].get_name() << ");\n"; << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< ", " << out[0].get_name() << ");\n"; << out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(conv_index) << ");\n";
} }
else else
{ {
...@@ -2080,29 +2074,22 @@ namespace ngraph ...@@ -2080,29 +2074,22 @@ namespace ngraph
template <> template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::GroupConvolutionBias) void CPU_Emitter::EMITTER_DECL(ngraph::op::GroupConvolutionBias)
{ {
auto arg0_shape = args[0].get_shape();
auto arg1_shape = args[1].get_shape();
auto arg2_shape = args[2].get_shape();
auto result_shape = out[0].get_shape();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); // invoke group convolution
size_t conv_index = external_function->get_primitive_index(node); size_t conv_index;
std::vector<std::size_t> deps;
// invoke group convolution bias emit_build_primitives(external_function, node, writer, conv_index, deps);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< ", " << args[0].get_name() << ");\n"; << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< ", " << args[1].get_name() << ");\n"; << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< ", " << args[2].get_name() << ");\n"; << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< ", " << out[0].get_name() << ");\n"; << out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(conv_index) << ");\n";
} }
else else
{ {
...@@ -2121,19 +2108,17 @@ namespace ngraph ...@@ -2121,19 +2108,17 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); size_t conv_index;
auto conv_index = external_function->get_primitive_index(node); std::vector<std::size_t> deps;
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< to_string(conv_index) << ");\n"; << args[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[1].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
} }
else else
{ {
...@@ -2291,21 +2276,19 @@ namespace ngraph ...@@ -2291,21 +2276,19 @@ namespace ngraph
{ {
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); size_t conv_index;
auto qconv_index = external_function->get_primitive_index(node); std::vector<std::size_t> deps;
auto& deps = mkldnn_emitter->get_primitive_deps(qconv_index); emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3])
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< to_string(qconv_index) << ");\n"; << args[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[1].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< args[2].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
} }
else else
{ {
...@@ -2319,26 +2302,24 @@ namespace ngraph ...@@ -2319,26 +2302,24 @@ namespace ngraph
{ {
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); size_t conv_index;
auto qconv_index = external_function->get_primitive_index(node); std::vector<std::size_t> deps;
auto& deps = mkldnn_emitter->get_primitive_deps(qconv_index); emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "if (" << out[0].get_name() << " != " << args[3].get_name() << ")\n"; writer << "if (" << out[0].get_name() << " != " << args[3].get_name() << ")\n";
writer.block_begin(); writer.block_begin();
writer << "memcpy(" << out[0].get_name() << ", " << args[3].get_name() << ", " writer << "memcpy(" << out[0].get_name() << ", " << args[3].get_name() << ", "
<< args[3].get_size() * args[3].get_element_type().size() << ");\n"; << args[3].get_size() * args[3].get_element_type().size() << ");\n";
writer.block_end(); writer.block_end();
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< ", " << args[0].get_name() << ");\n"; << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< ", " << args[1].get_name() << ");\n"; << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< ", " << args[2].get_name() << ");\n"; << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< ", " << out[0].get_name() << ");\n"; << out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(qconv_index) << ");\n";
} }
else else
{ {
...@@ -2352,26 +2333,24 @@ namespace ngraph ...@@ -2352,26 +2333,24 @@ namespace ngraph
{ {
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); size_t conv_index;
auto qconv_index = external_function->get_primitive_index(node); std::vector<std::size_t> deps;
auto& deps = mkldnn_emitter->get_primitive_deps(qconv_index); emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "if (" << out[0].get_name() << " != " << args[3].get_name() << ")\n"; writer << "if (" << out[0].get_name() << " != " << args[3].get_name() << ")\n";
writer.block_begin(); writer.block_begin();
writer << "memcpy(" << out[0].get_name() << ", " << args[3].get_name() << ", " writer << "memcpy(" << out[0].get_name() << ", " << args[3].get_name() << ", "
<< args[3].get_size() * args[3].get_element_type().size() << ");\n"; << args[3].get_size() * args[3].get_element_type().size() << ");\n";
writer.block_end(); writer.block_end();
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< ", " << args[0].get_name() << ");\n"; << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< ", " << args[1].get_name() << ");\n"; << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< ", " << args[2].get_name() << ");\n"; << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< ", " << out[0].get_name() << ");\n"; << out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(qconv_index) << ");\n";
} }
else else
{ {
...@@ -2446,21 +2425,19 @@ namespace ngraph ...@@ -2446,21 +2425,19 @@ namespace ngraph
{ {
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); size_t conv_index;
auto conv_index = external_function->get_primitive_index(node); std::vector<std::size_t> deps;
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< ", " << args[0].get_name() << ");\n"; << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< ", " << args[1].get_name() << ");\n"; << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< ", " << args[2].get_name() << ");\n"; << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< ", " << out[0].get_name() << ");\n"; << out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(conv_index) << ");\n";
} }
else else
{ {
...@@ -2473,25 +2450,24 @@ namespace ngraph ...@@ -2473,25 +2450,24 @@ namespace ngraph
{ {
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); size_t conv_index;
auto conv_index = external_function->get_primitive_index(node); std::vector<std::size_t> deps;
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "if (" << out[0].get_name() << " != " << args[3].get_name() << ")\n"; writer << "if (" << out[0].get_name() << " != " << args[3].get_name() << ")\n";
writer.block_begin(); writer.block_begin();
writer << "memcpy(" << out[0].get_name() << ", " << args[3].get_name() << ", " writer << "memcpy(" << out[0].get_name() << ", " << args[3].get_name() << ", "
<< args[3].get_size() * args[3].get_element_type().size() << ");\n"; << args[3].get_size() * args[3].get_element_type().size() << ");\n";
writer.block_end(); writer.block_end();
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< ", " << args[0].get_name() << ");\n"; << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< ", " << args[1].get_name() << ");\n"; << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< ", " << args[2].get_name() << ");\n"; << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< ", " << out[0].get_name() << ");\n"; << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, " writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
<< to_string(conv_index) << ");\n";
} }
else else
{ {
...@@ -2504,23 +2480,24 @@ namespace ngraph ...@@ -2504,23 +2480,24 @@ namespace ngraph
{ {
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); size_t conv_index;
auto conv_index = external_function->get_primitive_index(node); std::vector<std::size_t> deps;
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "if (" << out[0].get_name() << " != " << args[2].get_name() << ")\n"; writer << "if (" << out[0].get_name() << " != " << args[2].get_name() << ")\n";
writer.block_begin(); writer.block_begin();
writer << "memcpy(" << out[0].get_name() << ", " << args[2].get_name() << ", " writer << "memcpy(" << out[0].get_name() << ", " << args[2].get_name() << ", "
<< args[2].get_size() * args[2].get_element_type().size() << ");\n"; << args[2].get_size() * args[2].get_element_type().size() << ");\n";
writer.block_end(); writer.block_end();
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< ", " << args[0].get_name() << ");\n"; << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< ", " << args[1].get_name() << ");\n"; << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< ", " << out[0].get_name() << ");\n"; << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, " writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< to_string(conv_index) << ");\n"; << out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
} }
else else
{ {
......
...@@ -155,6 +155,7 @@ ...@@ -155,6 +155,7 @@
#include "ngraph/runtime/cpu/cpu_tensor_view.hpp" #include "ngraph/runtime/cpu/cpu_tensor_view.hpp"
#include "ngraph/runtime/cpu/cpu_tracing.hpp" #include "ngraph/runtime/cpu/cpu_tracing.hpp"
#include "ngraph/runtime/cpu/cpu_visualize_tree.hpp" #include "ngraph/runtime/cpu/cpu_visualize_tree.hpp"
#include "ngraph/runtime/cpu/mkldnn_emitter.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp" #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/batch_mat_mul_transpose.hpp" #include "ngraph/runtime/cpu/op/batch_mat_mul_transpose.hpp"
#include "ngraph/runtime/cpu/op/batch_norm_relu.hpp" #include "ngraph/runtime/cpu/op/batch_norm_relu.hpp"
...@@ -473,7 +474,10 @@ void runtime::cpu::CPU_ExternalFunction::compile(ngraph::pass::PassConfig& pass_ ...@@ -473,7 +474,10 @@ void runtime::cpu::CPU_ExternalFunction::compile(ngraph::pass::PassConfig& pass_
// Build mkldnn primitives for codegen. // Build mkldnn primitives for codegen.
pass_manager.register_pass<runtime::cpu::pass::MKLDNNPrimitiveBuildPass>( pass_manager.register_pass<runtime::cpu::pass::MKLDNNPrimitiveBuildPass>(
*m_mkldnn_emitter, m_node_primitive_idx_map); m_desc_filename,
*m_mkldnn_emitter,
m_node_primitive_idx_map,
m_node_primitive_string_deps_index_map);
unordered_map<Node*, Node*> node_function_map; unordered_map<Node*, Node*> node_function_map;
string common_function_string; string common_function_string;
...@@ -510,13 +514,17 @@ void runtime::cpu::CPU_ExternalFunction::compile(ngraph::pass::PassConfig& pass_ ...@@ -510,13 +514,17 @@ void runtime::cpu::CPU_ExternalFunction::compile(ngraph::pass::PassConfig& pass_
writer += writer +=
R"( R"(
#include <cmath> #include <cmath>
#include <fstream>
#include <mkldnn.hpp>
#include "ngraph/distributed.hpp" #include "ngraph/distributed.hpp"
#include "ngraph/except.hpp" #include "ngraph/except.hpp"
#include "ngraph/runtime/aligned_buffer.hpp" #include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/runtime/cpu/cpu_eigen_utils.hpp" #include "ngraph/runtime/cpu/cpu_eigen_utils.hpp"
#include "ngraph/runtime/cpu/cpu_executor.hpp"
#include "ngraph/runtime/cpu/cpu_kernels.hpp" #include "ngraph/runtime/cpu/cpu_kernels.hpp"
#include "ngraph/runtime/cpu/cpu_runtime_context.hpp" #include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp" #include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/reference/all.hpp" #include "ngraph/runtime/reference/all.hpp"
#include "ngraph/runtime/reference/and.hpp" #include "ngraph/runtime/reference/and.hpp"
#include "ngraph/runtime/reference/any.hpp" #include "ngraph/runtime/reference/any.hpp"
...@@ -668,6 +676,14 @@ using namespace ngraph::runtime; ...@@ -668,6 +676,14 @@ using namespace ngraph::runtime;
writer << common_function_string << "\n"; writer << common_function_string << "\n";
//initiate mkldnn_primitives for CPURuntimeContextCG
writer << "void inline CPURuntimeContextCG::init_mkldnn_primitives()\n";
writer.block_begin();
writer << "mkldnn_primitives = std::vector<mkldnn::primitive*>("
<< to_string(m_mkldnn_emitter->get_mkldnn_primitives_cg().size()) << ");\n";
writer.block_end();
writer << "\n";
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions()) for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
{ {
auto ordered_ops = function_ordered_ops.at(current_function); auto ordered_ops = function_ordered_ops.at(current_function);
...@@ -722,6 +738,16 @@ using namespace ngraph::runtime; ...@@ -722,6 +738,16 @@ using namespace ngraph::runtime;
writer << "extern \"C\" void " << current_function->get_name() << func_params << "\n"; writer << "extern \"C\" void " << current_function->get_name() << func_params << "\n";
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
writer << "std::ifstream desc_file (\"" << m_desc_filename << "\", std::ios::binary);\n";
//deserialize and build mkldnn primitives
writer << "if (ctx->first_iteration)\n";
writer.block_begin();
writer << "// read in memory descriptors and build mkldnn primitives\n";
writer << "deserialize_memory_descs_and_build_memory_primitives(" << m_desc_filename
<< ", cg_ctx, " << to_string(m_mkldnn_emitter->get_mkldnn_descriptors_size())
<< ");\n";
writer.block_end();
// Execution tracing support // Execution tracing support
if (runtime::cpu::IsTracingEnabled() && current_function->get_name() == m_function_name) if (runtime::cpu::IsTracingEnabled() && current_function->get_name() == m_function_name)
......
...@@ -125,6 +125,18 @@ namespace ngraph ...@@ -125,6 +125,18 @@ namespace ngraph
return it->second; return it->second;
} }
// Return the tuple including the string to create mkldnn primitive, the deps and the index in CODEGEN
const std::tuple<std::string, std::vector<size_t>, size_t>&
get_primitive_build_tuple(const Node* node) const
{
auto it = m_node_primitive_string_deps_index_map.find(node);
NGRAPH_CHECK(it != m_node_primitive_string_deps_index_map.end(),
"Primitive build tuple not found for node ",
node->description());
return it->second;
}
size_t add_state(ngraph::State* state) size_t add_state(ngraph::State* state)
{ {
m_states.push_back(state); m_states.push_back(state);
...@@ -318,6 +330,11 @@ namespace ngraph ...@@ -318,6 +330,11 @@ namespace ngraph
/// Map each node with mkldnn implementation to its mkldnn primitive index. /// Map each node with mkldnn implementation to its mkldnn primitive index.
std::unordered_map<const Node*, size_t> m_node_primitive_idx_map; std::unordered_map<const Node*, size_t> m_node_primitive_idx_map;
/// Map each node with mkldnn implementation to its mkldnn primitive creating string, deps, and mkldnn primitive index.
std::map<const Node*, std::tuple<std::string, std::vector<size_t>, size_t>>
m_node_primitive_string_deps_index_map;
/// Name of the file to store descriptors for mkldnn_primitives
const std::string m_desc_filename = "desc_file";
}; };
} }
} }
......
...@@ -80,6 +80,11 @@ std::vector<mkldnn::primitive*>& MKLDNNEmitter::get_mkldnn_primitives() ...@@ -80,6 +80,11 @@ std::vector<mkldnn::primitive*>& MKLDNNEmitter::get_mkldnn_primitives()
return m_mkldnn_primitives; return m_mkldnn_primitives;
} }
const std::vector<mkldnn::primitive*>& MKLDNNEmitter::get_mkldnn_primitives_cg() const
{
return m_mkldnn_primitives_cg;
}
const std::vector<char*>& MKLDNNEmitter::get_mkldnn_workspaces() const std::vector<char*>& MKLDNNEmitter::get_mkldnn_workspaces()
{ {
return m_workspace_bufs; return m_workspace_bufs;
...@@ -98,6 +103,22 @@ size_t MKLDNNEmitter::insert_workspace(std::unique_ptr<MKLDNNWorkspace>& workspa ...@@ -98,6 +103,22 @@ size_t MKLDNNEmitter::insert_workspace(std::unique_ptr<MKLDNNWorkspace>& workspa
return (m_workspaces.size() - 1); return (m_workspaces.size() - 1);
} }
size_t MKLDNNEmitter::reserve_workspace()
{
m_workspaces_size++;
return m_workspaces_size - 1;
}
void MKLDNNEmitter::reserve_descriptor_space(size_t count)
{
m_mkldnn_descriptors_size += count;
}
size_t MKLDNNEmitter::get_mkldnn_descriptors_size()
{
return m_mkldnn_descriptors_size;
}
size_t MKLDNNEmitter::insert_workspace(std::vector<char*>& mkldnn_workspaces, size_t MKLDNNEmitter::insert_workspace(std::vector<char*>& mkldnn_workspaces,
std::unique_ptr<MKLDNNWorkspace>& workspace) std::unique_ptr<MKLDNNWorkspace>& workspace)
{ {
...@@ -111,6 +132,11 @@ const std::vector<size_t>& MKLDNNEmitter::get_primitive_deps(size_t index) const ...@@ -111,6 +132,11 @@ const std::vector<size_t>& MKLDNNEmitter::get_primitive_deps(size_t index) const
return m_primitive_deps.at(index); return m_primitive_deps.at(index);
} }
const std::vector<size_t>& MKLDNNEmitter::get_primitive_deps_cg(size_t index) const
{
return m_primitive_deps_cg.at(index);
}
std::vector<size_t>& MKLDNNEmitter::get_primitive_deps(size_t index) std::vector<size_t>& MKLDNNEmitter::get_primitive_deps(size_t index)
{ {
return m_primitive_deps.at(index); return m_primitive_deps.at(index);
...@@ -401,206 +427,6 @@ size_t MKLDNNEmitter::build_deconvolutionbias_forward(const mkldnn::memory::desc ...@@ -401,206 +427,6 @@ size_t MKLDNNEmitter::build_deconvolutionbias_forward(const mkldnn::memory::desc
return conv_index; return conv_index;
} }
size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const mkldnn::post_ops& pops)
{
size_t input_data_index = build_memory_primitive(input_data_desc);
size_t weights_index = build_memory_primitive(weights_desc);
size_t result_index = build_memory_primitive(result_desc);
mkldnn::primitive_attr conv_attr;
conv_attr.set_post_ops(pops);
mkldnn::algorithm convolution_algo = mkldnn_utils::get_conv_algo();
size_t conv_index = 0;
try
{
auto conv_prim = new mkldnn::convolution_forward(
{{mkldnn::prop_kind::forward_inference,
convolution_algo,
input_data_desc,
weights_desc,
result_desc,
mkldnn::memory::dims(strides.begin(), strides.end()),
mkldnn::memory::dims(dilation_strides.begin(), dilation_strides.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero},
conv_attr,
executor::global_cpu_engine},
*m_mkldnn_primitives[input_data_index],
*m_mkldnn_primitives[weights_index],
*m_mkldnn_primitives[result_index]);
conv_index = insert_primitive(conv_prim);
NGRAPH_CHECK(m_primitive_deps.find(conv_index) == m_primitive_deps.end(),
"Dependencies already created for node");
m_primitive_deps[conv_index] = {input_data_index, weights_index, result_index};
}
catch (const mkldnn::error& e)
{
throw ngraph_error("Could not create mkldnn convolution " + e.message);
}
return conv_index;
}
size_t
MKLDNNEmitter::build_quantized_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const float scale,
const mkldnn::post_ops& pops)
{
size_t input_data_index = build_memory_primitive(input_data_desc);
size_t weights_index = build_memory_primitive(weights_desc);
size_t result_index = build_memory_primitive(result_desc);
std::vector<float> output_scale;
output_scale.push_back(scale);
mkldnn::primitive_attr conv_attr;
conv_attr.set_post_ops(pops);
/* Specify the rounding mode */
conv_attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
/* Specify the scales array and corresponding mask */
conv_attr.set_output_scales(0, output_scale);
size_t conv_index = insert_primitive(new mkldnn::convolution_forward(
{{mkldnn::prop_kind::forward,
mkldnn::algorithm::convolution_direct,
input_data_desc,
weights_desc,
result_desc,
mkldnn::memory::dims(strides.begin(), strides.end()),
mkldnn::memory::dims(dilation_strides.begin(), dilation_strides.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero},
conv_attr,
executor::global_cpu_engine},
*m_mkldnn_primitives[input_data_index],
*m_mkldnn_primitives[weights_index],
*m_mkldnn_primitives[result_index]));
NGRAPH_CHECK(m_primitive_deps.find(conv_index) == m_primitive_deps.end(),
"Dependencies already created for node");
m_primitive_deps[conv_index] = {input_data_index, weights_index, result_index};
return conv_index;
}
size_t
MKLDNNEmitter::build_quantized_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& bias_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const float scale,
const mkldnn::post_ops& pops)
{
size_t input_data_index = build_memory_primitive(input_data_desc);
size_t weights_index = build_memory_primitive(weights_desc);
size_t bias_index = build_memory_primitive(bias_desc);
size_t result_index = build_memory_primitive(result_desc);
std::vector<float> output_scale;
output_scale.push_back(scale);
mkldnn::primitive_attr conv_attr;
conv_attr.set_post_ops(pops);
/* Specify the rounding mode */
conv_attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
/* Specify the scales array and corresponding mask */
conv_attr.set_output_scales(0, output_scale);
size_t conv_index = insert_primitive(new mkldnn::convolution_forward(
{{mkldnn::prop_kind::forward,
mkldnn::algorithm::convolution_direct,
input_data_desc,
weights_desc,
bias_desc,
result_desc,
mkldnn::memory::dims(strides.begin(), strides.end()),
mkldnn::memory::dims(dilation_strides.begin(), dilation_strides.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero},
conv_attr,
executor::global_cpu_engine},
*m_mkldnn_primitives[input_data_index],
*m_mkldnn_primitives[weights_index],
*m_mkldnn_primitives[bias_index],
*m_mkldnn_primitives[result_index]));
NGRAPH_CHECK(m_primitive_deps.find(conv_index) == m_primitive_deps.end(),
"Dependencies already created for node");
m_primitive_deps[conv_index] = {input_data_index, weights_index, bias_index, result_index};
return conv_index;
}
size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& bias_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const mkldnn::post_ops& pops)
{
const size_t input_data_index = build_memory_primitive(input_data_desc);
const size_t weights_index = build_memory_primitive(weights_desc);
const size_t bias_index = build_memory_primitive(bias_desc);
const size_t result_index = build_memory_primitive(result_desc);
mkldnn::primitive_attr conv_attr;
conv_attr.set_post_ops(pops);
mkldnn::algorithm convolution_algo = mkldnn_utils::get_conv_algo();
size_t conv_index = -1;
try
{
conv_index = insert_primitive(new mkldnn::convolution_forward(
{{mkldnn::prop_kind::forward_inference,
convolution_algo,
input_data_desc,
weights_desc,
bias_desc,
result_desc,
mkldnn::memory::dims(strides.begin(), strides.end()),
mkldnn::memory::dims(dilation_strides.begin(), dilation_strides.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero},
conv_attr,
executor::global_cpu_engine},
*m_mkldnn_primitives[input_data_index],
*m_mkldnn_primitives[weights_index],
*m_mkldnn_primitives[bias_index],
*m_mkldnn_primitives[result_index]));
NGRAPH_CHECK(m_primitive_deps.find(conv_index) == m_primitive_deps.end(),
"Dependencies already created for node");
m_primitive_deps[conv_index] = {input_data_index, weights_index, bias_index, result_index};
}
catch (const mkldnn::error& e)
{
throw ngraph_error("Could not create convolution " + e.message);
}
return conv_index;
}
size_t MKLDNNEmitter::build_convolution_backward_weights_bias( size_t MKLDNNEmitter::build_convolution_backward_weights_bias(
const mkldnn::memory::desc& in_data_desc, const mkldnn::memory::desc& in_data_desc,
const mkldnn::memory::desc& in_delta_desc, const mkldnn::memory::desc& in_delta_desc,
...@@ -1226,35 +1052,6 @@ void MKLDNNEmitter::build_reorder(std::vector<mkldnn::primitive*>& mkldnn_primit ...@@ -1226,35 +1052,6 @@ void MKLDNNEmitter::build_reorder(std::vector<mkldnn::primitive*>& mkldnn_primit
new mkldnn::reorder(*mkldnn_primitives[input_index], *mkldnn_primitives[result_index]); new mkldnn::reorder(*mkldnn_primitives[input_index], *mkldnn_primitives[result_index]);
} }
size_t MKLDNNEmitter::build_lrn_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
float alpha,
float beta,
float bias,
int nsize)
{
size_t input_index = build_memory_primitive(input_desc);
size_t result_index = build_memory_primitive(result_desc);
auto lrn_desc = mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring,
mkldnn::algorithm::lrn_across_channels,
input_desc,
nsize,
alpha,
beta,
bias);
auto lrn_prim_desc = mkldnn::lrn_forward::primitive_desc(lrn_desc, executor::global_cpu_engine);
size_t primitive_index = insert_primitive(new mkldnn::lrn_forward(
lrn_prim_desc, *m_mkldnn_primitives[input_index], *m_mkldnn_primitives[result_index]));
NGRAPH_CHECK(m_primitive_deps.find(primitive_index) == m_primitive_deps.end(),
"Dependencies already created for node");
m_primitive_deps[primitive_index] = {input_index, result_index};
return primitive_index;
}
mkldnn::lrn_forward::desc MKLDNNEmitter::get_lrn_forward_desc(const ngraph::Node* node) mkldnn::lrn_forward::desc MKLDNNEmitter::get_lrn_forward_desc(const ngraph::Node* node)
{ {
const ngraph::op::LRN* lrn = static_cast<const ngraph::op::LRN*>(node); const ngraph::op::LRN* lrn = static_cast<const ngraph::op::LRN*>(node);
...@@ -1528,37 +1325,6 @@ void MKLDNNEmitter::build_sigmoid_backward(std::vector<mkldnn::primitive*>& mkld ...@@ -1528,37 +1325,6 @@ void MKLDNNEmitter::build_sigmoid_backward(std::vector<mkldnn::primitive*>& mkld
*mkldnn_primitives[result_index]); *mkldnn_primitives[result_index]);
} }
size_t MKLDNNEmitter::build_elementwise_add(
const mkldnn::memory::desc& input0_data_desc,
const mkldnn::memory::desc& input1_data_desc,
const mkldnn::memory::desc& result_desc,
const std::vector<float>& scale_vector,
const std::vector<mkldnn::memory::primitive_desc>& inputs_pd)
{
std::vector<mkldnn::memory::primitive::at> inputs_primitive;
size_t input0_data_index = build_memory_primitive(input0_data_desc);
size_t input1_data_index = build_memory_primitive(input1_data_desc);
size_t result_index = build_memory_primitive(result_desc);
inputs_primitive.push_back(*m_mkldnn_primitives[input0_data_index]);
inputs_primitive.push_back(*m_mkldnn_primitives[input1_data_index]);
// elementwise sum primtive descriptor
mkldnn::sum::primitive_desc sum_pd =
mkldnn::sum::primitive_desc(result_desc, scale_vector, inputs_pd);
// sum primitive
size_t add_index = insert_primitive(
new mkldnn::sum(sum_pd, inputs_primitive, *m_mkldnn_primitives[result_index]));
NGRAPH_CHECK(m_primitive_deps.find(add_index) == m_primitive_deps.end(),
"Dependencies already created for node");
m_primitive_deps[add_index] = {input0_data_index, input1_data_index, result_index};
return add_index;
}
mkldnn::sum::primitive_desc MKLDNNEmitter::get_elementwise_add_desc(const ngraph::Node* node) mkldnn::sum::primitive_desc MKLDNNEmitter::get_elementwise_add_desc(const ngraph::Node* node)
{ {
std::vector<float> scale_vector(2, 1); std::vector<float> scale_vector(2, 1);
...@@ -1602,66 +1368,6 @@ void MKLDNNEmitter::build_elementwise_add(std::vector<mkldnn::primitive*>& mkldn ...@@ -1602,66 +1368,6 @@ void MKLDNNEmitter::build_elementwise_add(std::vector<mkldnn::primitive*>& mkldn
new mkldnn::sum(sum_pd, inputs_primitive, *mkldnn_primitives[result_index]); new mkldnn::sum(sum_pd, inputs_primitive, *mkldnn_primitives[result_index]);
} }
size_t MKLDNNEmitter::build_batchnorm_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const mkldnn::memory::desc& mean_desc,
const mkldnn::memory::desc& variance_desc,
const double eps,
bool use_global_stats,
bool bn_training_flag,
const mkldnn::post_ops& pops)
{
size_t input_index = build_memory_primitive(input_desc);
size_t weights_index = build_memory_primitive(weights_desc);
size_t result_index = build_memory_primitive(result_desc);
size_t mean_index = build_memory_primitive(mean_desc);
size_t variance_index = build_memory_primitive(variance_desc);
mkldnn::primitive_attr bn_attr;
bn_attr.set_post_ops(pops);
if (bn_training_flag && !use_global_stats)
{
size_t batchnorm_index = insert_primitive(new mkldnn::batch_normalization_forward(
{{mkldnn::prop_kind::forward_training,
input_desc,
eps,
mkldnn::batch_normalization_flag::use_scale_shift},
bn_attr,
executor::global_cpu_engine},
mkldnn::primitive::at(*m_mkldnn_primitives[input_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[weights_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[result_index]),
*m_mkldnn_primitives[mean_index],
*m_mkldnn_primitives[variance_index]));
m_primitive_deps[batchnorm_index] = {
input_index, weights_index, result_index, mean_index, variance_index};
return batchnorm_index;
}
else
{
size_t batchnorm_index = insert_primitive(new mkldnn::batch_normalization_forward(
{{mkldnn::prop_kind::forward_training,
input_desc,
eps,
mkldnn::batch_normalization_flag::use_scale_shift |
mkldnn::batch_normalization_flag::use_global_stats},
bn_attr,
executor::global_cpu_engine},
mkldnn::primitive::at(*m_mkldnn_primitives[input_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[mean_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[variance_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[weights_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[result_index])));
m_primitive_deps[batchnorm_index] = {
input_index, mean_index, variance_index, weights_index, result_index};
return batchnorm_index;
}
}
void MKLDNNEmitter::build_batchnorm_forward( void MKLDNNEmitter::build_batchnorm_forward(
std::vector<mkldnn::primitive*>& mkldnn_primitives, std::vector<mkldnn::primitive*>& mkldnn_primitives,
const mkldnn::batch_normalization_forward::desc& batchnorm_desc, const mkldnn::batch_normalization_forward::desc& batchnorm_desc,
...@@ -1720,56 +1426,6 @@ void MKLDNNEmitter::build_batchnorm_forward( ...@@ -1720,56 +1426,6 @@ void MKLDNNEmitter::build_batchnorm_forward(
} }
} }
size_t MKLDNNEmitter::build_batchnorm_backward(const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& mean_desc,
const mkldnn::memory::desc& variance_desc,
const mkldnn::memory::desc& delta_desc,
const mkldnn::memory::desc& dinput_desc,
const mkldnn::memory::desc& dweights_desc,
const double eps)
{
size_t weights_index = build_memory_primitive(weights_desc);
size_t input_index = build_memory_primitive(input_desc);
size_t mean_index = build_memory_primitive(mean_desc);
size_t variance_index = build_memory_primitive(variance_desc);
size_t delta_index = build_memory_primitive(delta_desc);
size_t dinput_index = build_memory_primitive(dinput_desc);
size_t dweights_index = build_memory_primitive(dweights_desc);
size_t batchnorm_index = insert_primitive(new mkldnn::batch_normalization_backward(
{{mkldnn::prop_kind::backward,
delta_desc,
input_desc,
eps,
mkldnn::batch_normalization_flag::use_scale_shift},
executor::global_cpu_engine,
{{mkldnn::prop_kind::forward_training,
input_desc,
eps,
mkldnn::batch_normalization_flag::use_scale_shift},
executor::global_cpu_engine}},
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[mean_index],
*m_mkldnn_primitives[variance_index],
*m_mkldnn_primitives[delta_index],
*m_mkldnn_primitives[weights_index],
*m_mkldnn_primitives[dinput_index],
*m_mkldnn_primitives[dweights_index]));
NGRAPH_CHECK(m_primitive_deps.find(batchnorm_index) == m_primitive_deps.end(),
"Dependencies already created for node");
m_primitive_deps[batchnorm_index] = {weights_index,
input_index,
mean_index,
variance_index,
delta_index,
dinput_index,
dweights_index};
return batchnorm_index;
}
mkldnn::batch_normalization_backward::desc mkldnn::batch_normalization_backward::desc
MKLDNNEmitter::get_batchnorm_backward_desc(const ngraph::Node* node) MKLDNNEmitter::get_batchnorm_backward_desc(const ngraph::Node* node)
{ {
...@@ -1828,72 +1484,6 @@ void MKLDNNEmitter::build_batchnorm_backward( ...@@ -1828,72 +1484,6 @@ void MKLDNNEmitter::build_batchnorm_backward(
*mkldnn_primitives[dweights_index]); *mkldnn_primitives[dweights_index]);
} }
size_t MKLDNNEmitter::build_rnn_forward(const mkldnn::memory::desc& src_layer_desc,
const mkldnn::memory::desc& src_iter_desc,
const mkldnn::memory::desc& weights_layer_desc,
const mkldnn::memory::desc& weights_iter_desc,
const mkldnn::memory::desc& bias_desc,
const mkldnn::memory::desc& dst_layer_desc,
const mkldnn::memory::desc& dst_iter_desc,
const mkldnn::rnn_direction& rnn_direction,
const mkldnn::algorithm& rnn_algorithm)
{
size_t src_layer_index = build_memory_primitive(src_layer_desc);
size_t src_iter_index = build_memory_primitive(src_iter_desc);
size_t weights_layer_index = build_memory_primitive(weights_layer_desc);
size_t weights_iter_index = build_memory_primitive(weights_iter_desc);
size_t bias_index = build_memory_primitive(bias_desc);
size_t dst_layer_index = build_memory_primitive(dst_layer_desc);
size_t dst_iter_index = build_memory_primitive(dst_iter_desc);
mkldnn::rnn_cell::desc rnn_cell(rnn_algorithm);
mkldnn::rnn_forward::desc rnn_layer_desc(mkldnn::prop_kind::forward_training,
rnn_cell,
rnn_direction,
src_layer_desc,
src_iter_desc,
weights_layer_desc,
weights_iter_desc,
bias_desc,
dst_layer_desc,
dst_iter_desc);
auto rnn_layer_prim_desc =
mkldnn::rnn_forward::primitive_desc(rnn_layer_desc, executor::global_cpu_engine);
auto workspace_index =
build_memory_primitive(rnn_layer_prim_desc.workspace_primitive_desc().desc());
auto workspace = std::unique_ptr<MKLDNNWorkspace>(
new MKLDNNWorkspace(rnn_layer_prim_desc.workspace_primitive_desc().get_size()));
auto workspace_buf_index = insert_workspace(workspace);
size_t rnn_index = insert_primitive(new mkldnn::rnn_forward(
rnn_layer_prim_desc,
mkldnn::primitive::at(*m_mkldnn_primitives[src_layer_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[src_iter_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[weights_layer_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[weights_iter_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[bias_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[dst_layer_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[dst_iter_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[workspace_index])));
NGRAPH_CHECK(m_primitive_deps.find(rnn_index) == m_primitive_deps.end(),
"Dependencies already created for node");
m_primitive_deps[rnn_index] = {src_layer_index,
src_iter_index,
weights_layer_index,
weights_iter_index,
bias_index,
dst_layer_index,
dst_iter_index,
workspace_index,
workspace_buf_index};
return rnn_index;
}
void MKLDNNEmitter::build_rnn_forward(std::vector<mkldnn::primitive*>& mkldnn_primitives, void MKLDNNEmitter::build_rnn_forward(std::vector<mkldnn::primitive*>& mkldnn_primitives,
std::vector<char*>& mkldnn_workspaces, std::vector<char*>& mkldnn_workspaces,
const mkldnn::rnn_forward::desc& rnn_desc, const mkldnn::rnn_forward::desc& rnn_desc,
...@@ -2282,6 +1872,21 @@ size_t MKLDNNEmitter::reserve_primitive_space(size_t count, bool new_workspace) ...@@ -2282,6 +1872,21 @@ size_t MKLDNNEmitter::reserve_primitive_space(size_t count, bool new_workspace)
return m_mkldnn_primitives.size() - 1; return m_mkldnn_primitives.size() - 1;
} }
size_t MKLDNNEmitter::reserve_primitive_space_cg(size_t count, bool new_workspace)
{
size_t size = m_mkldnn_primitives_cg.size();
m_mkldnn_primitives_cg.resize(size + count, nullptr);
for (auto i = 0; i < count - 1; i++)
{
m_primitive_deps_cg[m_mkldnn_primitives_cg.size() - 1].push_back(size + i);
}
if (new_workspace)
{
m_primitive_deps_cg[m_mkldnn_primitives_cg.size() - 1].push_back(0);
}
return m_mkldnn_primitives_cg.size() - 1;
}
size_t MKLDNNEmitter::build_quantized_inner_product_forward( size_t MKLDNNEmitter::build_quantized_inner_product_forward(
const mkldnn::memory::desc& input_data_desc, const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc, const mkldnn::memory::desc& weights_desc,
......
...@@ -127,17 +127,24 @@ namespace ngraph ...@@ -127,17 +127,24 @@ namespace ngraph
MKLDNNEmitter() {} MKLDNNEmitter() {}
~MKLDNNEmitter(); ~MKLDNNEmitter();
const std::vector<mkldnn::primitive*>& get_mkldnn_primitives() const;
const std::vector<mkldnn::primitive*>& get_mkldnn_primitives_cg() const;
std::vector<mkldnn::primitive*>& get_mkldnn_primitives(); std::vector<mkldnn::primitive*>& get_mkldnn_primitives();
const std::vector<char*>& get_mkldnn_workspaces(); const std::vector<char*>& get_mkldnn_workspaces();
// reserve the space for primitives for each op, different op requires different number of primitives. // reserve the space for primitives for each op, different op requires different number of primitives.
// some ops require a new workspace. // some ops require a new workspace.
size_t reserve_primitive_space(size_t count, bool new_workspace = false); size_t reserve_primitive_space(size_t count, bool new_workspace = false);
size_t reserve_primitive_space_cg(size_t count, bool new_workspace = false);
size_t insert_primitive(mkldnn::primitive* primitive); size_t insert_primitive(mkldnn::primitive* primitive);
size_t insert_workspace(std::unique_ptr<MKLDNNWorkspace>& workspace); size_t insert_workspace(std::unique_ptr<MKLDNNWorkspace>& workspace);
size_t insert_workspace(std::vector<char*>& mkldnn_workspaces, size_t insert_workspace(std::vector<char*>& mkldnn_workspaces,
std::unique_ptr<MKLDNNWorkspace>& workspace); std::unique_ptr<MKLDNNWorkspace>& workspace);
const std::vector<size_t>& get_primitive_deps(size_t index) const; const std::vector<size_t>& get_primitive_deps(size_t index) const;
const std::vector<size_t>& get_primitive_deps_cg(size_t index) const;
size_t reserve_workspace();
void reserve_descriptor_space(size_t count);
size_t get_mkldnn_descriptors_size();
std::vector<size_t>& get_primitive_deps(size_t index); std::vector<size_t>& get_primitive_deps(size_t index);
// TODO(jmenon): Get rid of TensorViewWrappers at some point // TODO(jmenon): Get rid of TensorViewWrappers at some point
...@@ -156,51 +163,6 @@ namespace ngraph ...@@ -156,51 +163,6 @@ namespace ngraph
const mkldnn::memory::desc& desc, const mkldnn::memory::desc& desc,
size_t index); size_t index);
size_t build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const mkldnn::post_ops& pops = mkldnn::post_ops());
/**
* Convolution + bias forward
*/
size_t build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& bias_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const mkldnn::post_ops& pops = mkldnn::post_ops());
size_t build_quantized_convolution_forward(
const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const float scale,
const mkldnn::post_ops& pops = mkldnn::post_ops());
size_t build_quantized_convolution_forward(
const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& bias_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const float scale,
const mkldnn::post_ops& pops = mkldnn::post_ops());
size_t build_quantized_inner_product_forward( size_t build_quantized_inner_product_forward(
const mkldnn::memory::desc& input_data_desc, const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc, const mkldnn::memory::desc& weights_desc,
...@@ -216,121 +178,6 @@ namespace ngraph ...@@ -216,121 +178,6 @@ namespace ngraph
const float scale, const float scale,
const mkldnn::post_ops& pops = mkldnn::post_ops()); const mkldnn::post_ops& pops = mkldnn::post_ops());
template <typename OpTy>
size_t build_convolution(const ngraph::Node* node)
{
// For dilation, MKLDNN wants to know how many elements to insert between, not
// how far apart to space the elements like nGraph. So we have to subtract 1
// from each pos.
Strides window_dilation_strides_adjusted;
auto* convolution = static_cast<const OpTy*>(node);
for (size_t s : convolution->get_window_dilation_strides())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
// MKLDNN relies on named formats for kernel selection
if (weights_desc.data.format == mkldnn_nchw)
{
weights_desc.data.format = mkldnn_oihw;
}
if (weights_desc.data.format == mkldnn_ncdhw)
{
weights_desc.data.format = mkldnn_oidhw;
}
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
mkldnn::post_ops ops;
if (std::is_same<OpTy, ngraph::op::ConvolutionBiasAdd>() ||
std::is_same<OpTy, ngraph::op::ConvolutionAdd>())
{
ops.append_sum(1.f);
}
if (std::is_same<OpTy, ngraph::op::QuantizedConvolutionBiasAdd>() ||
std::is_same<OpTy, ngraph::op::QuantizedConvolutionBiasSignedAdd>())
{
auto sum_scale_val =
extract_scale_value<ngraph::op::QuantizedConvolutionBiasAdd>(node, 5);
ops.append_sum(sum_scale_val[0]);
}
if (has_relu<OpTy>(node))
{
const float ops_scale = 1.f;
const float ops_alpha = -0.f; // relu negative slope
const float ops_beta = 0.f;
ops.append_eltwise(
ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
}
if (std::is_same<OpTy, ngraph::op::ConvolutionBias>() ||
std::is_same<OpTy, ngraph::op::ConvolutionBiasAdd>())
{
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
return build_convolution_forward(data_desc,
weights_desc,
bias_desc,
result_desc,
convolution->get_window_movement_strides(),
window_dilation_strides_adjusted,
convolution->get_padding_below(),
convolution->get_padding_above(),
ops);
}
else if (std::is_same<OpTy, ngraph::op::QuantizedConvolution>() ||
std::is_same<OpTy, ngraph::op::QuantizedConvolutionRelu>())
{
auto scale_val = extract_scale_value<OpTy>(node, 2);
return build_quantized_convolution_forward(
data_desc,
weights_desc,
result_desc,
convolution->get_window_movement_strides(),
window_dilation_strides_adjusted,
convolution->get_padding_below(),
convolution->get_padding_above(),
scale_val[0],
ops);
}
else if (std::is_same<OpTy, ngraph::op::QuantizedConvolutionBias>() ||
std::is_same<OpTy, ngraph::op::QuantizedConvolutionBiasAdd>() ||
std::is_same<OpTy, ngraph::op::QuantizedConvolutionBiasSignedAdd>())
{
int index =
std::is_same<OpTy, ngraph::op::QuantizedConvolutionBias>() ? 3 : 4;
auto scale_val = extract_scale_value<OpTy>(node, index);
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
return build_quantized_convolution_forward(
data_desc,
weights_desc,
bias_desc,
result_desc,
convolution->get_window_movement_strides(),
window_dilation_strides_adjusted,
convolution->get_padding_below(),
convolution->get_padding_above(),
scale_val[0],
ops);
}
else
{
return build_convolution_forward(data_desc,
weights_desc,
result_desc,
convolution->get_window_movement_strides(),
window_dilation_strides_adjusted,
convolution->get_padding_below(),
convolution->get_padding_above(),
ops);
}
}
void build_deconvolutionbias_forward( void build_deconvolutionbias_forward(
std::vector<mkldnn::primitive*>& mkldnn_primitives, std::vector<mkldnn::primitive*>& mkldnn_primitives,
const mkldnn::deconvolution_forward::desc& fwd_desc, const mkldnn::deconvolution_forward::desc& fwd_desc,
...@@ -466,31 +313,6 @@ namespace ngraph ...@@ -466,31 +313,6 @@ namespace ngraph
} }
} }
void build_quantized_convolution_forward(
const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const float scale,
const Node* node,
const mkldnn::post_ops& pops = mkldnn::post_ops());
void build_quantized_convolution_forward(
const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& bias_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const float scale,
const Node* node,
const mkldnn::post_ops& pops = mkldnn::post_ops());
mkldnn::memory::format query_convolution_forward_weight_format( mkldnn::memory::format query_convolution_forward_weight_format(
const mkldnn::memory::desc& input_data_desc, const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc_any, const mkldnn::memory::desc& weights_desc_any,
...@@ -809,13 +631,6 @@ namespace ngraph ...@@ -809,13 +631,6 @@ namespace ngraph
const std::vector<size_t>& deps, const std::vector<size_t>& deps,
size_t reorder_index); size_t reorder_index);
size_t build_lrn_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
float alpha,
float beta,
float bias,
int nsize);
mkldnn::lrn_forward::desc get_lrn_forward_desc(const ngraph::Node* node); mkldnn::lrn_forward::desc get_lrn_forward_desc(const ngraph::Node* node);
void build_lrn_forward(std::vector<mkldnn::primitive*>& mkldnn_primitives, void build_lrn_forward(std::vector<mkldnn::primitive*>& mkldnn_primitives,
...@@ -868,79 +683,12 @@ namespace ngraph ...@@ -868,79 +683,12 @@ namespace ngraph
const std::vector<size_t>& deps, const std::vector<size_t>& deps,
size_t sigmoid_index); size_t sigmoid_index);
size_t build_elementwise_add(
const mkldnn::memory::desc& input0_data_desc,
const mkldnn::memory::desc& input1_data_desc,
const mkldnn::memory::desc& result_desc,
const std::vector<float>& scale_vector,
const std::vector<mkldnn::memory::primitive_desc>& input_pd);
mkldnn::sum::primitive_desc get_elementwise_add_desc(const ngraph::Node* node); mkldnn::sum::primitive_desc get_elementwise_add_desc(const ngraph::Node* node);
void build_elementwise_add(std::vector<mkldnn::primitive*>& mkldnn_primitives, void build_elementwise_add(std::vector<mkldnn::primitive*>& mkldnn_primitives,
const mkldnn::sum::primitive_desc& sum_pd, const mkldnn::sum::primitive_desc& sum_pd,
const std::vector<size_t>& deps, const std::vector<size_t>& deps,
size_t add_index); size_t add_index);
template <typename OpTy>
size_t build_batch_norm_primitive(const Node* node,
const bool append_relu,
const bool training)
{
const auto& args = node->get_inputs();
mkldnn::post_ops ops;
if (append_relu)
{
const float ops_scale = 1.f;
const float ops_alpha = -0.f; // relu negative slope
const float ops_beta = 0.f;
ops.append_eltwise(
ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
}
auto weights_shape =
Shape{2, args[0].get_tensor().get_tensor_layout()->get_size()};
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto weights_desc = build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto results_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
bool use_global_stats;
const mkldnn::memory::desc *mean_desc, *variance_desc;
if (training && args.size() == 3)
{
mean_desc = &mkldnn_utils::get_output_mkldnn_md(node, 1);
variance_desc = &mkldnn_utils::get_output_mkldnn_md(node, 2);
use_global_stats = false;
}
else
{
mean_desc = &mkldnn_utils::get_input_mkldnn_md(node, 3);
variance_desc = &mkldnn_utils::get_input_mkldnn_md(node, 4);
use_global_stats = true;
}
const OpTy* batchnorm = static_cast<const OpTy*>(node);
return build_batchnorm_forward(input_desc,
weights_desc,
results_desc,
*mean_desc,
*variance_desc,
batchnorm->get_eps_value(),
use_global_stats,
training,
ops);
}
size_t build_batchnorm_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const mkldnn::memory::desc& mean_desc,
const mkldnn::memory::desc& variance_desc,
const double eps,
bool use_global_stats,
bool bn_training_flag,
const mkldnn::post_ops& pops = mkldnn::post_ops());
template <typename OP> template <typename OP>
mkldnn::batch_normalization_forward::desc mkldnn::batch_normalization_forward::desc
...@@ -979,15 +727,6 @@ namespace ngraph ...@@ -979,15 +727,6 @@ namespace ngraph
size_t batchnorm_index, size_t batchnorm_index,
const mkldnn::post_ops& pops = mkldnn::post_ops()); const mkldnn::post_ops& pops = mkldnn::post_ops());
size_t build_batchnorm_backward(const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& mean_desc,
const mkldnn::memory::desc& variance_desc,
const mkldnn::memory::desc& delta_desc,
const mkldnn::memory::desc& dinput_desc,
const mkldnn::memory::desc& dweights_desc,
const double eps);
mkldnn::batch_normalization_backward::desc mkldnn::batch_normalization_backward::desc
get_batchnorm_backward_desc(const ngraph::Node* node); get_batchnorm_backward_desc(const ngraph::Node* node);
...@@ -999,118 +738,6 @@ namespace ngraph ...@@ -999,118 +738,6 @@ namespace ngraph
const std::vector<size_t>& deps, const std::vector<size_t>& deps,
size_t batchnorm_index); size_t batchnorm_index);
template <typename OP>
size_t build_rnn(const ngraph::Node* node)
{
const auto& out = node->get_outputs();
const auto& args = node->get_inputs();
auto rnn_node = static_cast<const OP*>(node);
auto src_sequence_length_max =
static_cast<unsigned long>(rnn_node->get_src_sequence_length());
auto direction = static_cast<unsigned long>(rnn_node->get_direction());
auto num_fused_layers =
static_cast<unsigned long>(rnn_node->get_num_fused_layers());
auto feature_size =
static_cast<unsigned long>(rnn_node->get_src_iter_feature_size());
auto batch = static_cast<unsigned long>(rnn_node->get_batch_size());
auto rnn_cell_n_gates =
static_cast<unsigned long>(rnn_node->get_gates_per_cell());
auto rnn_cell_n_states =
static_cast<unsigned long>(rnn_node->get_num_cell_states());
auto get_mkldnn_rnn_cell_type = [&]() {
switch (rnn_node->get_rnn_type())
{
case rnn_utils::rnntype::vanilla_rnn: return mkldnn::algorithm::vanilla_rnn;
case rnn_utils::rnntype::vanilla_gru: return mkldnn::algorithm::vanilla_gru;
case rnn_utils::rnntype::vanilla_lstm:
return mkldnn::algorithm::vanilla_lstm;
default: throw ngraph_error("unsupported mkldnn rnn algorithm");
}
};
auto get_mkldnn_rnn_direction = [&]() {
switch (direction)
{
case 1: return mkldnn::rnn_direction::unidirectional_left2right;
case 2: return mkldnn::rnn_direction::bidirectional_concat;
default: throw ngraph_error("unsupported mkldnn rnn direction");
}
};
if (out[0].get_shape().size() == 2 &&
(out[0].get_shape()[1] != direction * feature_size))
{
throw ngraph_error(
"input slc{ht} feature size is not equal to output dlc{ht} feature "
"size ");
}
if (out[1].get_shape().size() == 2 && (out[1].get_shape()[1] != feature_size) &&
rnn_node->get_num_timesteps() != 1)
{
throw ngraph_error(
"input sic{ht_1|ct_1} feature size is not equal to output "
"dlc{ht_1|ct_1} "
"feature size ");
}
Shape src_layer_tz{
src_sequence_length_max,
batch,
static_cast<unsigned long>(rnn_node->get_src_layer_feature_size())};
Shape src_iter_tz{
num_fused_layers, direction, rnn_cell_n_states, batch, feature_size};
Shape wei_layer_tz{
num_fused_layers,
direction,
static_cast<unsigned long>(rnn_node->get_src_layer_feature_size()),
rnn_cell_n_gates,
feature_size};
Shape wei_iter_tz{
num_fused_layers, direction, feature_size, rnn_cell_n_gates, feature_size};
Shape bias_tz{num_fused_layers, direction, rnn_cell_n_gates, feature_size};
Shape dst_layer_tz{src_sequence_length_max, batch, direction * feature_size};
Shape dst_iter_tz{
num_fused_layers, direction, rnn_cell_n_states, batch, feature_size};
// We create the memory descriptors used by the user
auto src_layer_md = build_memory_descriptor(
src_layer_tz, args[0].get_element_type(), mkldnn::memory::format::tnc);
auto src_iter_md = build_memory_descriptor(
src_iter_tz, args[1].get_element_type(), mkldnn::memory::format::ldsnc);
auto wei_layer_md = build_memory_descriptor(
wei_layer_tz, args[2].get_element_type(), mkldnn::memory::format::ldigo);
auto wei_iter_md = build_memory_descriptor(
wei_iter_tz, args[3].get_element_type(), mkldnn::memory::format::ldigo);
auto bias_md = build_memory_descriptor(
bias_tz, args[4].get_element_type(), mkldnn::memory::format::ldgo);
auto dst_layer_md = build_memory_descriptor(
dst_layer_tz, out[0].get_element_type(), mkldnn::memory::format::tnc);
auto dst_iter_md = build_memory_descriptor(
dst_iter_tz, out[1].get_element_type(), mkldnn::memory::format::ldsnc);
return build_rnn_forward(src_layer_md,
src_iter_md,
wei_layer_md,
wei_iter_md,
bias_md,
dst_layer_md,
dst_iter_md,
get_mkldnn_rnn_direction(),
get_mkldnn_rnn_cell_type());
}
size_t build_rnn_forward(const mkldnn::memory::desc& src_layer_desc,
const mkldnn::memory::desc& src_iter_desc,
const mkldnn::memory::desc& weights_layer_desc,
const mkldnn::memory::desc& weights_iter_desc,
const mkldnn::memory::desc& bias_desc,
const mkldnn::memory::desc& dst_layer_desc,
const mkldnn::memory::desc& dst_iter_desc,
const mkldnn::rnn_direction& rnn_direction,
const mkldnn::algorithm& rnn_algorithm);
void build_rnn_forward(std::vector<mkldnn::primitive*>& mkldnn_primitives, void build_rnn_forward(std::vector<mkldnn::primitive*>& mkldnn_primitives,
std::vector<char*>& mkldnn_workspaces, std::vector<char*>& mkldnn_workspaces,
const mkldnn::rnn_forward::desc& desc, const mkldnn::rnn_forward::desc& desc,
...@@ -1886,10 +1513,14 @@ namespace ngraph ...@@ -1886,10 +1513,14 @@ namespace ngraph
private: private:
std::vector<mkldnn::primitive*> m_mkldnn_primitives; std::vector<mkldnn::primitive*> m_mkldnn_primitives;
std::vector<mkldnn::primitive*> m_mkldnn_primitives_cg;
std::vector<mkldnn::stream> m_mkldnn_streams; std::vector<mkldnn::stream> m_mkldnn_streams;
std::unordered_map<size_t, std::vector<size_t>> m_primitive_deps; std::unordered_map<size_t, std::vector<size_t>> m_primitive_deps;
std::unordered_map<size_t, std::vector<size_t>> m_primitive_deps_cg;
std::vector<std::unique_ptr<MKLDNNWorkspace>> m_workspaces; std::vector<std::unique_ptr<MKLDNNWorkspace>> m_workspaces;
std::vector<char*> m_workspace_bufs; std::vector<char*> m_workspace_bufs;
size_t m_workspaces_size = 0;
size_t m_mkldnn_descriptors_size = 0;
}; };
} }
} }
......
...@@ -14,8 +14,11 @@ ...@@ -14,8 +14,11 @@
// limitations under the License. // limitations under the License.
//***************************************************************************** //*****************************************************************************
#include <string>
#include "cpu_mkldnn_primitive_build.hpp" #include "cpu_mkldnn_primitive_build.hpp"
#include "ngraph/code_writer.hpp"
#include "ngraph/op/add.hpp" #include "ngraph/op/add.hpp"
#include "ngraph/op/avg_pool.hpp" #include "ngraph/op/avg_pool.hpp"
#include "ngraph/op/batch_norm.hpp" #include "ngraph/op/batch_norm.hpp"
...@@ -40,6 +43,7 @@ ...@@ -40,6 +43,7 @@
#include "ngraph/op/quantize.hpp" #include "ngraph/op/quantize.hpp"
#include "ngraph/op/relu.hpp" #include "ngraph/op/relu.hpp"
#include "ngraph/op/replace_slice.hpp" #include "ngraph/op/replace_slice.hpp"
#include "ngraph/op/reshape.hpp"
#include "ngraph/op/slice.hpp" #include "ngraph/op/slice.hpp"
#include "ngraph/op/softmax.hpp" #include "ngraph/op/softmax.hpp"
#include "ngraph/runtime/cpu/cpu_executor.hpp" #include "ngraph/runtime/cpu/cpu_executor.hpp"
...@@ -65,70 +69,499 @@ namespace ngraph ...@@ -65,70 +69,499 @@ namespace ngraph
{ {
namespace pass namespace pass
{ {
// serialize memory descriptors
static void serialize_memory_descs(std::ofstream& desc_file,
std::vector<mkldnn::memory::desc>& descs,
size_t primitive_index)
{
for (auto i = 0; i < descs.size(); i++)
{
desc_file << primitive_index;
desc_file.write(reinterpret_cast<char*>(&descs[i]),
sizeof(mkldnn::memory::desc));
primitive_index++;
}
}
// The following functions build the MKLDNN primitive for each type of nGraph Node. // The following functions build the MKLDNN primitive for each type of nGraph Node.
template <> template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Add) void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(Add)
{ {
std::vector<float> scale_vector(2, 1);
std::vector<mkldnn::memory::primitive_desc> inputs_pd;
auto input0_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto input0_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto input1_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 1); auto input1_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
inputs_pd.push_back(mkldnn::memory::primitive_desc(
input0_data_desc, executor::global_cpu_engine));
inputs_pd.push_back(mkldnn::memory::primitive_desc(
input1_data_desc, executor::global_cpu_engine));
return mkldnn_emitter.build_elementwise_add( // Add needs 4 primitives: input0, input1, result, and sum.
input0_data_desc, input1_data_desc, result_desc, scale_vector, inputs_pd); index = mkldnn_emitter.reserve_primitive_space_cg(4);
deps = mkldnn_emitter.get_primitive_deps_cg(index);
CodeWriter writer;
// Write memory descriptors to file
std::vector<mkldnn::memory::desc> descs = {
input0_data_desc, input1_data_desc, result_desc};
auto desc_index = mkldnn_emitter.get_mkldnn_descriptors_size();
mkldnn_emitter.reserve_descriptor_space(descs.size());
serialize_memory_descs(desc_file, descs, deps[0]);
writer << "std::vector<float> scale_vector(2, 1);\n";
writer << "std::vector<mkldnn::memory::primitive_desc> inputs_pd;\n";
writer << "inputs_pd.push_back(mkldnn::memory::primitive_desc("
"*cg_ctx->mkldnn_descriptors["
<< desc_index << "], "
"cg_ctx->global_cpu_engine));\n";
writer << "inputs_pd.push_back(mkldnn::memory::primitive_desc("
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 1 << "], "
"cg_ctx->global_cpu_engine));\n";
// elementwise sum primitive descriptor
writer << "mkldnn::sum::primitive_desc sum_pd = "
"mkldnn::sum::primitive_desc(*cg_ctx->mkldnn_descriptors["
<< desc_index + 2 << "], "
"scale_vector, inputs_pd);\n";
writer << "\n// build sum primitive\n";
writer << "std::vector<mkldnn::memory::primitive::at> inputs_primitive;\n";
//emit_memory_primitive_build(writer, desc_names, deps);
writer << "inputs_primitive.push_back(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[0]) << "]);\n";
writer << "inputs_primitive.push_back(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[1]) << "]);\n";
// sum primitive
writer << "cg_ctx->mkldnn_primitives[" << std::to_string(index)
<< "] = new mkldnn::sum(sum_pd, inputs_primitive, "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[2]) << "]);\n";
construct_string = writer.get_code();
}
template <typename OP>
void construct_primitive_build_string_rnn(
ngraph::runtime::cpu::MKLDNNEmitter& mkldnn_emitter,
ngraph::Node* node,
std::string& construct_string,
std::vector<size_t>& deps,
size_t& index,
std::ofstream& desc_file)
{
const auto& out = node->get_outputs();
const auto& args = node->get_inputs();
auto rnn_node = static_cast<const OP*>(node);
auto src_sequence_length_max =
static_cast<unsigned long>(rnn_node->get_src_sequence_length());
auto direction = static_cast<unsigned long>(rnn_node->get_direction());
auto num_fused_layers =
static_cast<unsigned long>(rnn_node->get_num_fused_layers());
auto feature_size =
static_cast<unsigned long>(rnn_node->get_src_iter_feature_size());
auto batch = static_cast<unsigned long>(rnn_node->get_batch_size());
auto rnn_cell_n_gates =
static_cast<unsigned long>(rnn_node->get_gates_per_cell());
auto rnn_cell_n_states =
static_cast<unsigned long>(rnn_node->get_num_cell_states());
auto get_mkldnn_rnn_cell_type = [&]() {
switch (rnn_node->get_rnn_type())
{
case rnn_utils::rnntype::vanilla_rnn:
return std::string("mkldnn::algorithm::vanilla_rnn");
case rnn_utils::rnntype::vanilla_gru:
return std::string("mkldnn::algorithm::vanilla_gru");
case rnn_utils::rnntype::vanilla_lstm:
return std::string("mkldnn::algorithm::vanilla_lstm");
default: throw ngraph_error("unsupported mkldnn rnn algorithm");
}
};
auto get_mkldnn_rnn_direction = [&]() {
switch (direction)
{
case 1:
return std::string("mkldnn::rnn_direction::unidirectional_left2right");
case 2: return std::string("mkldnn::rnn_direction::bidirectional_concat");
default: throw ngraph_error("unsupported mkldnn rnn direction");
}
};
if (out[0].get_shape().size() == 2 &&
(out[0].get_shape()[1] != direction * feature_size))
{
throw ngraph_error(
"input slc{ht} feature size is not equal to output dlc{ht} feature "
"size ");
}
if (out[1].get_shape().size() == 2 && (out[1].get_shape()[1] != feature_size) &&
rnn_node->get_num_timesteps() != 1)
{
throw ngraph_error(
"input sic{ht_1|ct_1} feature size is not equal to output "
"dlc{ht_1|ct_1} "
"feature size ");
}
Shape src_layer_tz{
src_sequence_length_max,
batch,
static_cast<unsigned long>(rnn_node->get_src_layer_feature_size())};
Shape src_iter_tz{
num_fused_layers, direction, rnn_cell_n_states, batch, feature_size};
Shape wei_layer_tz{
num_fused_layers,
direction,
static_cast<unsigned long>(rnn_node->get_src_layer_feature_size()),
rnn_cell_n_gates,
feature_size};
Shape wei_iter_tz{
num_fused_layers, direction, feature_size, rnn_cell_n_gates, feature_size};
Shape bias_tz{num_fused_layers, direction, rnn_cell_n_gates, feature_size};
Shape dst_layer_tz{src_sequence_length_max, batch, direction * feature_size};
Shape dst_iter_tz{
num_fused_layers, direction, rnn_cell_n_states, batch, feature_size};
// We create the memory descriptors used by the user
auto src_layer_md = mkldnn_emitter.build_memory_descriptor(
src_layer_tz, args[0].get_element_type(), mkldnn::memory::format::tnc);
auto src_iter_md = mkldnn_emitter.build_memory_descriptor(
src_iter_tz, args[1].get_element_type(), mkldnn::memory::format::ldsnc);
auto wei_layer_md = mkldnn_emitter.build_memory_descriptor(
wei_layer_tz, args[2].get_element_type(), mkldnn::memory::format::ldigo);
auto wei_iter_md = mkldnn_emitter.build_memory_descriptor(
wei_iter_tz, args[3].get_element_type(), mkldnn::memory::format::ldigo);
auto bias_md = mkldnn_emitter.build_memory_descriptor(
bias_tz, args[4].get_element_type(), mkldnn::memory::format::ldgo);
auto dst_layer_md = mkldnn_emitter.build_memory_descriptor(
dst_layer_tz, out[0].get_element_type(), mkldnn::memory::format::tnc);
auto dst_iter_md = mkldnn_emitter.build_memory_descriptor(
dst_iter_tz, out[1].get_element_type(), mkldnn::memory::format::ldsnc);
// Lstm/Rnn needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias,
// dst_layer, dst_iter, workspace, and rnn_forward.
// It needs a new workspace.
index = mkldnn_emitter.reserve_primitive_space_cg(9, true /* new workspace */);
deps = mkldnn_emitter.get_primitive_deps_cg(index);
CodeWriter writer;
// Write memory descriptors to file
std::vector<mkldnn::memory::desc> descs = {src_layer_md,
src_iter_md,
wei_layer_md,
wei_iter_md,
bias_md,
dst_layer_md,
dst_iter_md};
auto desc_index = mkldnn_emitter.get_mkldnn_descriptors_size();
mkldnn_emitter.reserve_descriptor_space(descs.size());
serialize_memory_descs(desc_file, descs, deps[0]);
writer << "mkldnn::rnn_cell::desc rnn_cell_desc(" << get_mkldnn_rnn_cell_type()
<< ");\n";
writer << "\n// build lstm/rnn primitive descriptor\n";
writer << "auto rnn_desc = "
"mkldnn::rnn_forward::desc(mkldnn::prop_kind::forward_training, "
"rnn_cell_desc, "
<< get_mkldnn_rnn_direction() << ", "
"*cg_ctx->mkldnn_descriptors["
<< desc_index << "], "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 1 << "], "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 2 << "], "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 3 << "], "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 4 << "], "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 5 << "], "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 6 << "]);\n";
writer << "auto rnn_prim_desc = mkldnn::rnn_forward::primitive_desc(rnn_desc, "
"cg_ctx->global_cpu_engine);\n";
writer << "cg_ctx->mkldnn_primitives[" << std::to_string(deps[7])
<< "] = new "
"mkldnn::memory({rnn_prim_desc.workspace_primitive_desc().desc(), "
"cg_ctx->global_cpu_engine}, nullptr);\n";
writer << "auto workspace = "
"(char*)malloc(rnn_prim_desc.workspace_primitive_desc().get_size());"
"\n";
writer << "if (!workspace)\n";
writer.block_begin();
writer << "throw std::bad_alloc();\n";
writer.block_end();
writer << "cg_ctx->mkldnn_workspaces.push_back(workspace);\n";
deps[8] = mkldnn_emitter.reserve_workspace();
writer << "\n// build lstm/rnn primitive\n";
// lstm/rnn primitive
writer << "cg_ctx->mkldnn_primitives[" << std::to_string(index)
<< "] = new mkldnn::rnn_forward(rnn_prim_desc, "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[0])
<< "]), "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[1])
<< "]), "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[2])
<< "]), "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[3])
<< "]), "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[4])
<< "]), "
"static_cast<mkldnn::memory>(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[5])
<< "]), "
"static_cast<mkldnn::memory>(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[6])
<< "]), "
"static_cast<mkldnn::memory>(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[7]) << "]));\n";
construct_string = writer.get_code();
} }
template <> template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Lstm) void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(Lstm)
{ {
return mkldnn_emitter.build_rnn<Lstm>(node); construct_primitive_build_string_rnn<Lstm>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
} }
template <> template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Rnn) void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(Rnn)
{ {
return mkldnn_emitter.build_rnn<Rnn>(node); construct_primitive_build_string_rnn<Rnn>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
}
template <typename OP>
void construct_primitive_build_string_batchnorm(
ngraph::runtime::cpu::MKLDNNEmitter& mkldnn_emitter,
ngraph::Node* node,
std::string& construct_string,
std::vector<size_t>& deps,
size_t& index,
std::ofstream& desc_file,
const bool append_relu,
const bool training)
{
const auto& args = node->get_inputs();
// batchnorm forward needs 6 primitives: input, weights, result, mean,
// variance, and batch_normalization_forward.
index = mkldnn_emitter.reserve_primitive_space_cg(6);
deps = mkldnn_emitter.get_primitive_deps_cg(index);
CodeWriter writer;
if (append_relu)
{
writer << "mkldnn::post_ops pops;\n";
writer << "const float ops_scale = 1.f;\n";
writer << "const float ops_alpha = -0.f; // relu negative slope\n";
writer << "const float ops_beta = 0.f;\n";
writer << "pops.append_eltwise("
"ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, "
"ops_beta);\n";
}
else
{
writer << "mkldnn::post_ops pops = mkldnn::post_ops();\n";
}
auto weights_shape =
Shape{2, args[0].get_tensor().get_tensor_layout()->get_size()};
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto weights_desc = mkldnn_emitter.build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
bool use_global_stats;
const mkldnn::memory::desc *mean_desc, *variance_desc;
if (training && args.size() == 3)
{
mean_desc = &mkldnn_utils::get_output_mkldnn_md(node, 1);
variance_desc = &mkldnn_utils::get_output_mkldnn_md(node, 2);
use_global_stats = false;
}
else
{
mean_desc = &mkldnn_utils::get_input_mkldnn_md(node, 3);
variance_desc = &mkldnn_utils::get_input_mkldnn_md(node, 4);
use_global_stats = true;
}
auto batchnorm = static_cast<const OP*>(node);
auto eps = batchnorm->get_eps_value();
writer << "mkldnn::primitive_attr bn_attr;\n";
writer << "bn_attr.set_post_ops(pops);\n";
writer << "\n// build batchnorm primitive descriptor\n";
if (use_global_stats)
{
// Write memory descriptors to file;
std::vector<mkldnn::memory::desc> descs = {
input_desc, *mean_desc, *variance_desc, weights_desc, result_desc};
auto desc_index = mkldnn_emitter.get_mkldnn_descriptors_size();
mkldnn_emitter.reserve_descriptor_space(descs.size());
serialize_memory_descs(desc_file, descs, deps[0]);
writer << "auto batchnorm_desc = "
"mkldnn::batch_normalization_forward::desc(mkldnn::prop_kind::"
"forward_training, "
"*cg_ctx->mkldnn_descriptors["
<< desc_index << "], " << eps
<< ", "
"mkldnn::batch_normalization_flag::use_scale_shift | "
"mkldnn::batch_normalization_flag::use_global_stats);\n";
writer << "auto batchnorm_prim_desc = "
"mkldnn::batch_normalization_forward::primitive_desc(batchnorm_"
"desc, "
"bn_attr, cg_ctx->global_cpu_engine);\n";
writer << "\n// build batchnorm primitive\n";
// batchnorm primitive
writer
<< "cg_ctx->mkldnn_primitives[" << std::to_string(index)
<< "] = new mkldnn::batch_normalization_forward(batchnorm_prim_desc, "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[0])
<< "]), "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[1])
<< "]), "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[2])
<< "]), "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[3])
<< "]), "
"static_cast<mkldnn::memory>(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[4]) << "]));\n";
}
else
{
// Write memory descriptors to file;
std::vector<mkldnn::memory::desc> descs = {
input_desc, weights_desc, result_desc, *mean_desc, *variance_desc};
auto desc_index = mkldnn_emitter.get_mkldnn_descriptors_size();
mkldnn_emitter.reserve_descriptor_space(descs.size());
serialize_memory_descs(desc_file, descs, deps[0]);
writer << "auto batchnorm_desc = "
"mkldnn::batch_normalization_forward::desc(mkldnn::prop_kind::"
"forward_training, "
"*cg_ctx->mkldnn_descriptors["
<< desc_index << "], " << eps
<< ", "
"mkldnn::batch_normalization_flag::use_scale_shift);\n";
writer << "auto batchnorm_prim_desc = "
"mkldnn::batch_normalization_forward::primitive_desc(batchnorm_"
"desc, "
"bn_attr, cg_ctx->global_cpu_engine);\n";
writer << "\n// build batchnorm primitive\n";
// batchnorm primitive
writer
<< "cg_ctx->mkldnn_primitives[" << std::to_string(index)
<< "] = new mkldnn::batch_normalization_forward(batchnorm_prim_desc, "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[0])
<< "]), "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[1])
<< "]), "
"static_cast<mkldnn::memory>(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[2]) << "]), "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[3]) << "], "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[4]) << "]);\n";
}
construct_string = writer.get_code();
} }
template <> template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(BatchNormTraining) void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
BatchNormTraining)
{ {
return mkldnn_emitter.build_batch_norm_primitive<BatchNormInference>( construct_primitive_build_string_batchnorm<BatchNormTraining>(
node, false /*Append relu*/, true /*Training*/); mkldnn_emitter,
node,
construct_string,
deps,
index,
desc_file,
false /*Append relu*/,
true /*Training*/);
} }
template <> template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(BatchNormInference) void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
BatchNormInference)
{ {
return mkldnn_emitter.build_batch_norm_primitive<BatchNormInference>( construct_primitive_build_string_batchnorm<BatchNormInference>(
node, false /*Append relu*/, false /*Training*/); mkldnn_emitter,
node,
construct_string,
deps,
index,
desc_file,
false /*Append relu*/,
false /*Training*/);
} }
template <> template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(BatchNormTrainingRelu) void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
BatchNormTrainingRelu)
{ {
return mkldnn_emitter.build_batch_norm_primitive<BatchNormTrainingRelu>( construct_primitive_build_string_batchnorm<BatchNormTrainingRelu>(
node, true /*Append relu*/, true /*Training*/); mkldnn_emitter,
node,
construct_string,
deps,
index,
desc_file,
true /*Append relu*/,
true /*Training*/);
} }
template <> template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(BatchNormInferenceRelu) void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
BatchNormInferenceRelu)
{ {
return mkldnn_emitter.build_batch_norm_primitive<BatchNormInferenceRelu>( construct_primitive_build_string_batchnorm<BatchNormInferenceRelu>(
node, true /*Append relu*/, false /*Training*/); mkldnn_emitter,
node,
construct_string,
deps,
index,
desc_file,
true /*Append relu*/,
false /*Training*/);
} }
template <> template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(BatchNormTrainingBackprop) void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
BatchNormTrainingBackprop)
{ {
const auto& args = node->get_inputs(); const auto& args = node->get_inputs();
const auto* batchnorm = static_cast<const BatchNormTrainingBackprop*>(node);
auto eps = batchnorm->get_eps_value();
auto weights_shape = auto weights_shape =
Shape{2, args[0].get_tensor().get_tensor_layout()->get_size()}; Shape{2, args[0].get_tensor().get_tensor_layout()->get_size()};
auto weights_desc = mkldnn_emitter.build_memory_descriptor( auto weights_desc = mkldnn_emitter.build_memory_descriptor(
...@@ -141,47 +574,162 @@ namespace ngraph ...@@ -141,47 +574,162 @@ namespace ngraph
auto dweights_desc = mkldnn_emitter.build_memory_descriptor( auto dweights_desc = mkldnn_emitter.build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc); weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
const auto* batchnorm = static_cast<const BatchNormTrainingBackprop*>(node); // batchnorm backward needs 8 primitives: weights, input, mean, variance,
return mkldnn_emitter.build_batchnorm_backward(weights_desc, // dinput, dweights, and batch_normalization_backward.
input_desc, index = mkldnn_emitter.reserve_primitive_space_cg(8);
mean_desc, deps = mkldnn_emitter.get_primitive_deps_cg(index);
variance_desc,
delta_desc, CodeWriter writer;
dinput_desc,
dweights_desc, // Write memory descriptors to file;
batchnorm->get_eps_value()); std::vector<mkldnn::memory::desc> descs = {weights_desc,
input_desc,
mean_desc,
variance_desc,
delta_desc,
dinput_desc,
dweights_desc};
auto desc_index = mkldnn_emitter.get_mkldnn_descriptors_size();
mkldnn_emitter.reserve_descriptor_space(descs.size());
serialize_memory_descs(desc_file, descs, deps[0]);
writer << "\n// build batchnorm primitives\n";
writer << "cg_ctx->mkldnn_primitives[" << std::to_string(index)
<< "] = new mkldnn::batch_normalization_backward("
<< "{{mkldnn::prop_kind::backward, "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 4 << "], "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 1 << "], " << eps
<< ", "
"mkldnn::batch_normalization_flag::use_scale_shift}, "
"cg_ctx->global_cpu_engine, "
"{{mkldnn::prop_kind::forward_training, "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 1 << "], " << eps
<< ", "
"mkldnn::batch_normalization_flag::use_scale_shift}, "
"cg_ctx->global_cpu_engine}}, "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[1]) << "], "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[2]) << "], "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[3]) << "], "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[4]) << "], "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[0]) << "], "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[5]) << "], "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[6]) << "]);\n";
construct_string = writer.get_code();
} }
template <> template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Concat) void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(Concat)
{ {
std::vector<mkldnn::memory::desc> inputs_data_desc; auto concat = static_cast<const ngraph::op::Concat*>(node);
for (size_t i = 0, end = node->get_inputs().size(); i < end; i++) size_t concat_dim = concat->get_concatenation_axis();
size_t nargs = node->get_inputs().size();
// Concat needs number of inputs plus 2 primitives; those two are for result and concat.
index = mkldnn_emitter.reserve_primitive_space_cg(nargs + 2);
deps = mkldnn_emitter.get_primitive_deps_cg(index);
CodeWriter writer;
// Write memory descriptors to file;
std::vector<mkldnn::memory::desc> descs;
for (size_t i = 0; i < nargs; i++)
{ {
inputs_data_desc.push_back(mkldnn_utils::get_input_mkldnn_md(node, i)); descs.push_back(mkldnn_utils::get_input_mkldnn_md(node, i));
} }
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
descs.push_back(result_desc);
size_t concat_dim = auto desc_index = mkldnn_emitter.get_mkldnn_descriptors_size();
(static_cast<const Concat*>(node))->get_concatenation_axis(); mkldnn_emitter.reserve_descriptor_space(descs.size());
return mkldnn_emitter.build_concat(inputs_data_desc, result_desc, concat_dim); serialize_memory_descs(desc_file, descs, deps[0]);
writer << "std::vector<mkldnn::memory::primitive::at> inputs_primitive;\n";
writer << "std::vector<mkldnn::memory::primitive_desc> inputs_pd;\n";
writer << "for (size_t i = " << desc_index << "; i < " << desc_index + nargs
<< "; i++)\n";
writer.block_begin();
writer << "inputs_pd.push_back(mkldnn::memory::primitive_desc("
"*cg_ctx->mkldnn_descriptors[i], "
"cg_ctx->global_cpu_engine));\n";
writer.block_end();
writer << "for (size_t i = " << deps[0] << "; i < " << deps[0] + nargs
<< "; i++)\n";
writer.block_begin();
writer << "inputs_primitive.push_back(*cg_ctx->mkldnn_primitives[i]);\n";
writer.block_end();
writer << "auto concat_prim_desc = "
"mkldnn::concat::primitive_desc( "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + nargs << "], "
<< std::to_string(static_cast<int>(concat_dim)) << ", inputs_pd);\n";
writer << "\n// build concat primitive\n";
writer << "cg_ctx->mkldnn_primitives[" << std::to_string(index)
<< "] = new mkldnn::concat(concat_prim_desc, inputs_primitive, "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[nargs]) << "]);\n";
construct_string = writer.get_code();
} }
template <> template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(LRN) void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(LRN)
{ {
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
const auto* lrn = static_cast<const LRN*>(node); // LRN needs 3 primitives: input, result, and lrn_forward.
index = mkldnn_emitter.reserve_primitive_space_cg(3);
deps = mkldnn_emitter.get_primitive_deps_cg(index);
return mkldnn_emitter.build_lrn_forward(input_data_desc, CodeWriter writer;
result_desc,
static_cast<float>(lrn->get_alpha()), // Write memory descriptors to file;
static_cast<float>(lrn->get_beta()), std::vector<mkldnn::memory::desc> descs = {input_desc, result_desc};
static_cast<float>(lrn->get_bias()), auto desc_index = mkldnn_emitter.get_mkldnn_descriptors_size();
static_cast<int>(lrn->get_nsize())); mkldnn_emitter.reserve_descriptor_space(descs.size());
serialize_memory_descs(desc_file, descs, deps[0]);
const auto* lrn = static_cast<const LRN*>(node);
auto alpha = static_cast<float>(lrn->get_alpha());
auto beta = static_cast<float>(lrn->get_beta());
auto bias = static_cast<float>(lrn->get_bias());
auto nsize = static_cast<int>(lrn->get_nsize());
writer << "auto lrn_desc = "
"mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring, "
"mkldnn::algorithm::lrn_across_channels, "
"*cg_ctx->mkldnn_descriptors["
<< desc_index << "], " << nsize << ", " << alpha << ", " << beta << ", "
<< bias << ");\n";
writer << "auto lrn_prim_desc = "
"mkldnn::lrn_forward::primitive_desc(lrn_desc, "
"cg_ctx->global_cpu_engine);\n";
writer << "\n// build lrn primitive\n";
writer << "cg_ctx->mkldnn_primitives[" << std::to_string(index)
<< "] = new mkldnn::lrn_forward(lrn_prim_desc, "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[0]) << "]"
", *cg_ctx->mkldnn_primitives["
<< std::to_string(deps[1]) << "]);\n";
construct_string = writer.get_code();
} }
template <> template <>
...@@ -198,96 +746,313 @@ namespace ngraph ...@@ -198,96 +746,313 @@ namespace ngraph
input_desc, result_desc, lower_bounds, out_shape); input_desc, result_desc, lower_bounds, out_shape);
} }
template <typename OP>
void construct_primitive_build_string_conv(
ngraph::runtime::cpu::MKLDNNEmitter& mkldnn_emitter,
ngraph::Node* node,
std::string& construct_string,
std::vector<size_t>& deps,
size_t& index,
std::ofstream& desc_file)
{
auto convolution = static_cast<const OP*>(node);
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
// MKLDNN relies on named formats for kernel selection
if (weights_desc.data.format == mkldnn_nchw)
{
weights_desc.data.format = mkldnn_oihw;
}
if (weights_desc.data.format == mkldnn_ncdhw)
{
weights_desc.data.format = mkldnn_oidhw;
}
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto strides = convolution->get_window_movement_strides();
auto pad_below = convolution->get_padding_below();
auto pad_above = convolution->get_padding_above();
if (mkldnn_emitter.has_bias<OP>())
{
index = mkldnn_emitter.reserve_primitive_space_cg(5);
}
else
{
index = mkldnn_emitter.reserve_primitive_space_cg(4);
}
deps = mkldnn_emitter.get_primitive_deps_cg(index);
CodeWriter writer;
writer << "// Write in memory descriptors\n";
std::vector<mkldnn::memory::desc> descs = {
data_desc, weights_desc, result_desc};
if (mkldnn_emitter.has_bias<OP>())
{
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
descs.insert(descs.begin() + 2, bias_desc);
}
auto desc_index = mkldnn_emitter.get_mkldnn_descriptors_size();
mkldnn_emitter.reserve_descriptor_space(descs.size());
serialize_memory_descs(desc_file, descs, deps[0]);
writer << "\n// build QConv primitive descriptor\n";
writer << "auto conv_desc = "
"mkldnn::convolution_forward::desc(mkldnn::prop_kind::forward,\n"
"mkldnn::algorithm::convolution_direct,\n"
"*cg_ctx->mkldnn_descriptors["
<< desc_index << "],\n"
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 1 << "],\n";
if (mkldnn_emitter.has_bias<OP>())
{
writer << "*cg_ctx->mkldnn_descriptors[" << desc_index + 2 << "],\n";
}
writer << "*cg_ctx->mkldnn_descriptors[" << desc_index + (descs.size() - 1)
<< "],\n"
"mkldnn::memory::dims{"
<< std::to_string(strides[0]) << ", " << std::to_string(strides[1]);
if (strides.size() == 3)
{
writer << ", " << std::to_string(strides[2]);
}
writer << "},\n"
"mkldnn::memory::dims{"
<< std::to_string(window_dilation_strides_adjusted[0]) << ", "
<< std::to_string(window_dilation_strides_adjusted[1]);
if (window_dilation_strides_adjusted.size() == 3)
{
writer << ", " << std::to_string(window_dilation_strides_adjusted[2]);
}
writer << "},\n"
"mkldnn::memory::dims{"
<< std::to_string(pad_below[0]) << ", " << std::to_string(pad_below[1]);
if (pad_below.size() == 3)
{
writer << ", " << std::to_string(pad_below[2]);
}
writer << "},\n"
"mkldnn::memory::dims{"
<< std::to_string(pad_above[0]) << ", " << std::to_string(pad_above[1]);
if (pad_above.size() == 3)
{
writer << ", " << std::to_string(pad_above[2]);
}
writer << "},\n"
"mkldnn::padding_kind::zero);\n";
writer << "mkldnn::post_ops ops;\n";
if (std::is_same<OP, ngraph::op::ConvolutionBiasAdd>() ||
std::is_same<OP, ngraph::op::ConvolutionAdd>())
{
writer << "ops.append_sum(1.f);\n";
}
if (std::is_same<OP, ngraph::op::QuantizedConvolutionBiasAdd>() ||
std::is_same<OP, ngraph::op::QuantizedConvolutionBiasSignedAdd>())
{
auto sum_scales_size = shape_size(convolution->get_input_shape(5));
const element::Type& et = node->get_input_element_type(5);
std::string type = et.c_type_string();
std::stringstream ss;
writer << "std::vector<float> dyn_post_op_scales;\n";
auto c = std::dynamic_pointer_cast<ngraph::op::Constant>(
node->get_arguments()[5]);
if (c)
{
auto sum_scale_val =
extract_scale_value<ngraph::op::QuantizedConvolutionBiasAdd>(node,
5);
writer << "dyn_post_op_scales.push_back("
<< std::to_string(sum_scale_val[0]) << ");\n";
}
else
{
ss << "((" << type << "*)(pool_base_ptr + "
<< node->get_inputs()[5].get_tensor().get_pool_offset() << "))";
writer << "dyn_post_op_scales.assign(" << ss.str() << ", " << ss.str()
<< " + " << std::to_string(sum_scales_size) << ");\n";
}
writer << "ops.append_sum(dyn_post_op_scales[0]);\n";
}
if (has_relu<OP>(node))
{
writer << "const float ops_scale = 1.f;\n";
writer << "const float ops_alpha = -0.f; // relu negative slope\n";
writer << "const float ops_beta = 0.f;\n";
writer << "ops.append_eltwise("
"ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, "
"ops_beta);\n";
}
writer << "mkldnn::primitive_attr conv_attr;\n";
writer << "conv_attr.set_post_ops(ops);\n";
if (mkldnn_emitter.is_quantized_conv<OP>())
{
auto scale_index = mkldnn_emitter.get_scale_index<OP>();
auto c = std::dynamic_pointer_cast<ngraph::op::Constant>(
node->get_arguments()[scale_index]);
auto scales_size = shape_size(convolution->get_input_shape(scale_index));
const element::Type& et = node->get_input_element_type(scale_index);
std::string type = et.c_type_string();
std::stringstream ss;
if (c)
{
ss << "((" << type << "*)(" << c->get_data_ptr() << "))\n";
}
else
{
ss << "((" << type << "*)(pool_base_ptr + "
<< node->get_inputs()[scale_index].get_tensor().get_pool_offset()
<< "))";
}
writer << "std::vector<float> dyn_scales;\n";
writer << "dyn_scales.assign(" << ss.str() << ", " << ss.str() << " + "
<< std::to_string(scales_size) << ");\n";
writer << "// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a "
"vector \n";
writer << "const int mask = " << std::to_string(scales_size)
<< " == 1 ? 0 : 2;\n";
writer << "conv_attr.set_int_output_round_mode(mkldnn::round_mode::round_"
"nearest);\n";
writer << "conv_attr.set_output_scales(mask, dyn_scales);\n";
}
//emit_memory_primitive_build(writer, desc_names, deps);
writer << "mkldnn::primitive* prim;\n";
if (mkldnn_emitter.has_bias<OP>())
{
writer << "prim = new mkldnn::convolution_forward({conv_desc, conv_attr, "
"cg_ctx->global_cpu_engine},"
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[0]) << "],\n"
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[1]) << "],\n"
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[2]) << "],\n"
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[3]) << "]);\n";
}
else
{
writer << "prim = new mkldnn::convolution_forward({conv_desc, conv_attr, "
"cg_ctx->global_cpu_engine},"
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[0]) << "],\n"
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[1]) << "],\n"
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[2]) << "]);\n";
}
writer << "cg_ctx->mkldnn_primitives[" << std::to_string(index)
<< "] = prim;\n";
construct_string = writer.get_code();
}
template <> template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(ConvolutionRelu) void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(Convolution)
{ {
return mkldnn_emitter.build_convolution<ConvolutionRelu>(node); construct_primitive_build_string_conv<Convolution>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
} }
template <> template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(QuantizedConvolutionRelu) void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
QuantizedConvolution)
{ {
return mkldnn_emitter.build_convolution<QuantizedConvolutionRelu>(node); construct_primitive_build_string_conv<QuantizedConvolution>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
} }
template <> template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(QuantizedConvolution) void
MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(ConvolutionRelu)
{ {
return mkldnn_emitter.build_convolution<QuantizedConvolution>(node); construct_primitive_build_string_conv<ConvolutionRelu>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
} }
template <> template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(GroupConvolution) void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
QuantizedConvolutionRelu)
{ {
Strides window_dilation_strides_adjusted; construct_primitive_build_string_conv<QuantizedConvolutionRelu>(
auto convolution = static_cast<const ngraph::op::GroupConvolution*>(node); mkldnn_emitter, node, construct_string, deps, index, desc_file);
for (size_t s : convolution->get_window_dilation_strides()) }
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); template <>
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 1); void
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(ConvolutionBias)
auto padding_below = convolution->get_padding_below(); {
auto padding_above = convolution->get_padding_above(); construct_primitive_build_string_conv<ConvolutionBias>(
auto filter_strides = convolution->get_window_movement_strides(); mkldnn_emitter, node, construct_string, deps, index, desc_file);
}
return mkldnn_emitter.build_convolution_forward( template <>
input_data_desc, void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
weights_desc, QuantizedConvolutionBias)
result_desc, {
filter_strides, construct_primitive_build_string_conv<QuantizedConvolutionBias>(
window_dilation_strides_adjusted, mkldnn_emitter, node, construct_string, deps, index, desc_file);
padding_below,
padding_above);
} }
template <> template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(GroupConvolutionBias) void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
ConvolutionBiasAdd)
{ {
Strides window_dilation_strides_adjusted; construct_primitive_build_string_conv<ConvolutionBiasAdd>(
auto convolution = static_cast<const ngraph::op::GroupConvolutionBias*>(node); mkldnn_emitter, node, construct_string, deps, index, desc_file);
for (size_t s : convolution->get_window_dilation_strides()) }
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); template <>
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 1); void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2); QuantizedConvolutionBiasAdd)
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); {
auto padding_below = convolution->get_padding_below(); construct_primitive_build_string_conv<QuantizedConvolutionBiasAdd>(
auto padding_above = convolution->get_padding_above(); mkldnn_emitter, node, construct_string, deps, index, desc_file);
auto filter_strides = convolution->get_window_movement_strides(); }
const float ops_scale = 1.f; template <>
const float ops_alpha = -0.f; // relu negative slope void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(ConvolutionAdd)
const float ops_beta = 0.f; {
construct_primitive_build_string_conv<ConvolutionAdd>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
}
mkldnn::post_ops ops; template <>
if (convolution->with_relu()) void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
{ QuantizedConvolutionBiasSignedAdd)
ops.append_eltwise( {
ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta); construct_primitive_build_string_conv<QuantizedConvolutionBiasSignedAdd>(
} mkldnn_emitter, node, construct_string, deps, index, desc_file);
}
return mkldnn_emitter.build_convolution_forward( template <>
input_data_desc, void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
weights_desc, GroupConvolution)
bias_desc, {
result_desc, construct_primitive_build_string_conv<GroupConvolution>(
filter_strides, mkldnn_emitter, node, construct_string, deps, index, desc_file);
window_dilation_strides_adjusted,
padding_below,
padding_above,
ops);
} }
template <> template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Convolution) void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
GroupConvolutionBias)
{ {
return mkldnn_emitter.build_convolution<Convolution>(node); construct_primitive_build_string_conv<GroupConvolutionBias>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
} }
template <typename OpTy> template <typename OpTy>
...@@ -370,44 +1135,6 @@ namespace ngraph ...@@ -370,44 +1135,6 @@ namespace ngraph
node); node);
} }
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(QuantizedConvolutionBias)
{
return mkldnn_emitter.build_convolution<QuantizedConvolutionBias>(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(QuantizedConvolutionBiasAdd)
{
return mkldnn_emitter.build_convolution<QuantizedConvolutionBiasAdd>(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(
QuantizedConvolutionBiasSignedAdd)
{
return mkldnn_emitter.build_convolution<QuantizedConvolutionBiasSignedAdd>(
node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(ConvolutionBias)
{
return mkldnn_emitter.build_convolution<ConvolutionBias>(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(ConvolutionBiasAdd)
{
return mkldnn_emitter.build_convolution<ConvolutionBiasAdd>(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(ConvolutionAdd)
{
return mkldnn_emitter.build_convolution<ConvolutionAdd>(node);
}
template <> template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL( size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(
ConvolutionBiasBackpropFiltersBias) ConvolutionBiasBackpropFiltersBias)
...@@ -717,25 +1444,12 @@ using namespace ngraph::runtime::cpu::pass; ...@@ -717,25 +1444,12 @@ using namespace ngraph::runtime::cpu::pass;
#define TI(x) std::type_index(typeid(x)) #define TI(x) std::type_index(typeid(x))
static const PrimitiveBuildOpMap prim_build_dispatcher{ static const PrimitiveBuildOpMap prim_build_dispatcher{
{TI(Add), &MKLDNNPrimitiveBuildPass::build_primitive<Add>},
{TI(Concat), &MKLDNNPrimitiveBuildPass::build_primitive<Concat>},
{TI(Convert), &MKLDNNPrimitiveBuildPass::build_primitive<Convert>}, {TI(Convert), &MKLDNNPrimitiveBuildPass::build_primitive<Convert>},
{TI(runtime::cpu::op::ConvertLayout), {TI(runtime::cpu::op::ConvertLayout),
&MKLDNNPrimitiveBuildPass::build_primitive<runtime::cpu::op::ConvertLayout>}, &MKLDNNPrimitiveBuildPass::build_primitive<runtime::cpu::op::ConvertLayout>},
{TI(AvgPool), &MKLDNNPrimitiveBuildPass::build_primitive<AvgPool>}, {TI(AvgPool), &MKLDNNPrimitiveBuildPass::build_primitive<AvgPool>},
{TI(AvgPoolBackprop), &MKLDNNPrimitiveBuildPass::build_primitive<AvgPoolBackprop>}, {TI(AvgPoolBackprop), &MKLDNNPrimitiveBuildPass::build_primitive<AvgPoolBackprop>},
{TI(BatchNormTraining), &MKLDNNPrimitiveBuildPass::build_primitive<BatchNormTraining>},
{TI(BatchNormInference), &MKLDNNPrimitiveBuildPass::build_primitive<BatchNormInference>},
{TI(BoundedRelu), &MKLDNNPrimitiveBuildPass::build_primitive<BoundedRelu>}, {TI(BoundedRelu), &MKLDNNPrimitiveBuildPass::build_primitive<BoundedRelu>},
{TI(BatchNormTrainingBackprop),
&MKLDNNPrimitiveBuildPass::build_primitive<BatchNormTrainingBackprop>},
{TI(Convolution), &MKLDNNPrimitiveBuildPass::build_primitive<Convolution>},
{TI(GroupConvolution), &MKLDNNPrimitiveBuildPass::build_primitive<GroupConvolution>},
{TI(ConvolutionRelu), &MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionRelu>},
{TI(ConvolutionBiasAdd), &MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionBiasAdd>},
{TI(BatchNormTrainingRelu), &MKLDNNPrimitiveBuildPass::build_primitive<BatchNormTrainingRelu>},
{TI(BatchNormInferenceRelu),
&MKLDNNPrimitiveBuildPass::build_primitive<BatchNormInferenceRelu>},
{TI(ConvolutionBackpropData), {TI(ConvolutionBackpropData),
&MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionBackpropData>}, &MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionBackpropData>},
{TI(ConvolutionBackpropFilters), {TI(ConvolutionBackpropFilters),
...@@ -745,44 +1459,108 @@ static const PrimitiveBuildOpMap prim_build_dispatcher{ ...@@ -745,44 +1459,108 @@ static const PrimitiveBuildOpMap prim_build_dispatcher{
{TI(MaxPoolBackprop), &MKLDNNPrimitiveBuildPass::build_primitive<MaxPoolBackprop>}, {TI(MaxPoolBackprop), &MKLDNNPrimitiveBuildPass::build_primitive<MaxPoolBackprop>},
{TI(MaxPoolWithIndicesBackprop), {TI(MaxPoolWithIndicesBackprop),
&MKLDNNPrimitiveBuildPass::build_primitive<MaxPoolWithIndicesBackprop>}, &MKLDNNPrimitiveBuildPass::build_primitive<MaxPoolWithIndicesBackprop>},
{TI(ConvolutionBias), &MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionBias>},
{TI(QuantizedConvolution), &MKLDNNPrimitiveBuildPass::build_primitive<QuantizedConvolution>},
{TI(ConvolutionBiasBackpropFiltersBias), {TI(ConvolutionBiasBackpropFiltersBias),
&MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionBiasBackpropFiltersBias>}, &MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionBiasBackpropFiltersBias>},
{TI(LRN), &MKLDNNPrimitiveBuildPass::build_primitive<LRN>},
{TI(Relu), &MKLDNNPrimitiveBuildPass::build_primitive<Relu>}, {TI(Relu), &MKLDNNPrimitiveBuildPass::build_primitive<Relu>},
{TI(ReluBackprop), &MKLDNNPrimitiveBuildPass::build_primitive<ReluBackprop>}, {TI(ReluBackprop), &MKLDNNPrimitiveBuildPass::build_primitive<ReluBackprop>},
{TI(LeakyRelu), &MKLDNNPrimitiveBuildPass::build_primitive<LeakyRelu>}, {TI(LeakyRelu), &MKLDNNPrimitiveBuildPass::build_primitive<LeakyRelu>},
{TI(Sigmoid), &MKLDNNPrimitiveBuildPass::build_primitive<Sigmoid>}, {TI(Sigmoid), &MKLDNNPrimitiveBuildPass::build_primitive<Sigmoid>},
{TI(SigmoidBackprop), &MKLDNNPrimitiveBuildPass::build_primitive<SigmoidBackprop>}, {TI(SigmoidBackprop), &MKLDNNPrimitiveBuildPass::build_primitive<SigmoidBackprop>},
{TI(Lstm), &MKLDNNPrimitiveBuildPass::build_primitive<Lstm>},
{TI(Rnn), &MKLDNNPrimitiveBuildPass::build_primitive<Rnn>},
{TI(QuantizedMaxPool), &MKLDNNPrimitiveBuildPass::build_primitive<QuantizedMaxPool>}, {TI(QuantizedMaxPool), &MKLDNNPrimitiveBuildPass::build_primitive<QuantizedMaxPool>},
{TI(QuantizedAvgPool), &MKLDNNPrimitiveBuildPass::build_primitive<QuantizedAvgPool>}, {TI(QuantizedAvgPool), &MKLDNNPrimitiveBuildPass::build_primitive<QuantizedAvgPool>},
{TI(Softmax), &MKLDNNPrimitiveBuildPass::build_primitive<Softmax>}, {TI(Softmax), &MKLDNNPrimitiveBuildPass::build_primitive<Softmax>},
{TI(Slice), &MKLDNNPrimitiveBuildPass::build_primitive<Slice>}, {TI(Slice), &MKLDNNPrimitiveBuildPass::build_primitive<Slice>},
{TI(ReplaceSlice), &MKLDNNPrimitiveBuildPass::build_primitive<ReplaceSlice>}, {TI(ReplaceSlice), &MKLDNNPrimitiveBuildPass::build_primitive<ReplaceSlice>},
{TI(UpdateSlice), &MKLDNNPrimitiveBuildPass::build_primitive<UpdateSlice>}, {TI(UpdateSlice), &MKLDNNPrimitiveBuildPass::build_primitive<UpdateSlice>},
{TI(ConvolutionAdd), &MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionAdd>},
{TI(QuantizedConvolutionRelu),
&MKLDNNPrimitiveBuildPass::build_primitive<QuantizedConvolutionRelu>},
{TI(QuantizedConvolutionBias),
&MKLDNNPrimitiveBuildPass::build_primitive<QuantizedConvolutionBias>},
{TI(QuantizedConvolutionBiasAdd),
&MKLDNNPrimitiveBuildPass::build_primitive<QuantizedConvolutionBiasAdd>},
{TI(QuantizedConvolutionBiasSignedAdd),
&MKLDNNPrimitiveBuildPass::build_primitive<QuantizedConvolutionBiasSignedAdd>},
{TI(GroupConvolutionBias), &MKLDNNPrimitiveBuildPass::build_primitive<GroupConvolutionBias>},
{TI(Quantize), &MKLDNNPrimitiveBuildPass::build_primitive<Quantize>}, {TI(Quantize), &MKLDNNPrimitiveBuildPass::build_primitive<Quantize>},
{TI(Dequantize), &MKLDNNPrimitiveBuildPass::build_primitive<Dequantize>}, {TI(Dequantize), &MKLDNNPrimitiveBuildPass::build_primitive<Dequantize>},
{TI(QuantizedConcat), &MKLDNNPrimitiveBuildPass::build_primitive<QuantizedConcat>}, {TI(QuantizedConcat), &MKLDNNPrimitiveBuildPass::build_primitive<QuantizedConcat>},
{TI(GetOutputElement), &MKLDNNPrimitiveBuildPass::build_primitive<GetOutputElement>}, {TI(GetOutputElement), &MKLDNNPrimitiveBuildPass::build_primitive<GetOutputElement>},
}; };
static const PrimitiveBuildStringConstructOpMap prim_build_string_construct_dispatcher{
{TI(Add), &MKLDNNPrimitiveBuildPass::construct_primitive_build_string<Add>},
{TI(Concat), &MKLDNNPrimitiveBuildPass::construct_primitive_build_string<Concat>},
{TI(BatchNormInference),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<BatchNormInference>},
{TI(BatchNormTraining),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<BatchNormTraining>},
{TI(BatchNormInferenceRelu),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<BatchNormInferenceRelu>},
{TI(BatchNormTrainingRelu),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<BatchNormTrainingRelu>},
{TI(BatchNormTrainingBackprop),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<BatchNormTrainingBackprop>},
{TI(LRN), &MKLDNNPrimitiveBuildPass::construct_primitive_build_string<LRN>},
{TI(Lstm), &MKLDNNPrimitiveBuildPass::construct_primitive_build_string<Lstm>},
{TI(Rnn), &MKLDNNPrimitiveBuildPass::construct_primitive_build_string<Rnn>},
{TI(Convolution), &MKLDNNPrimitiveBuildPass::construct_primitive_build_string<Convolution>},
{TI(ConvolutionRelu),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<ConvolutionRelu>},
{TI(ConvolutionBias),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<ConvolutionBias>},
{TI(ConvolutionBiasAdd),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<ConvolutionBiasAdd>},
{TI(ConvolutionAdd),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<ConvolutionAdd>},
{TI(GroupConvolution),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<GroupConvolution>},
{TI(GroupConvolutionBias),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<GroupConvolutionBias>},
{TI(QuantizedConvolution),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<QuantizedConvolution>},
{TI(QuantizedConvolutionRelu),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<QuantizedConvolutionRelu>},
{TI(QuantizedConvolutionBias),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<QuantizedConvolutionBias>},
{TI(QuantizedConvolutionBiasAdd),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<QuantizedConvolutionBiasAdd>},
{TI(QuantizedConvolutionBiasSignedAdd),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<
QuantizedConvolutionBiasSignedAdd>},
};
// Check if the node builds primitives at first iteration.
// Needed during transition when there are two maps.
static bool in_new_map(const std::shared_ptr<Node>& node)
{
if (std::dynamic_pointer_cast<ngraph::op::Add>(node) ||
std::dynamic_pointer_cast<ngraph::op::Concat>(node) ||
std::dynamic_pointer_cast<ngraph::op::BatchNormInference>(node) ||
std::dynamic_pointer_cast<ngraph::op::BatchNormTraining>(node) ||
std::dynamic_pointer_cast<ngraph::op::BatchNormInferenceRelu>(node) ||
std::dynamic_pointer_cast<ngraph::op::BatchNormTrainingRelu>(node) ||
std::dynamic_pointer_cast<ngraph::op::BatchNormTrainingBackprop>(node) ||
std::dynamic_pointer_cast<ngraph::op::LRN>(node) ||
std::dynamic_pointer_cast<ngraph::op::Lstm>(node) ||
std::dynamic_pointer_cast<ngraph::op::Rnn>(node) ||
std::dynamic_pointer_cast<ngraph::op::Convolution>(node) ||
std::dynamic_pointer_cast<ngraph::op::ConvolutionRelu>(node) ||
std::dynamic_pointer_cast<ngraph::op::ConvolutionBias>(node) ||
std::dynamic_pointer_cast<ngraph::op::ConvolutionBiasAdd>(node) ||
std::dynamic_pointer_cast<ngraph::op::ConvolutionAdd>(node) ||
std::dynamic_pointer_cast<ngraph::op::QuantizedConvolution>(node) ||
std::dynamic_pointer_cast<ngraph::op::QuantizedConvolutionRelu>(node) ||
std::dynamic_pointer_cast<ngraph::op::QuantizedConvolutionBias>(node) ||
std::dynamic_pointer_cast<ngraph::op::QuantizedConvolutionBiasAdd>(node) ||
std::dynamic_pointer_cast<ngraph::op::QuantizedConvolutionBiasSignedAdd>(node) ||
std::dynamic_pointer_cast<ngraph::op::GroupConvolution>(node) ||
std::dynamic_pointer_cast<ngraph::op::GroupConvolutionBias>(node))
{
return true;
}
return false;
}
bool MKLDNNPrimitiveBuildPass::run_on_call_graph(const std::list<std::shared_ptr<Node>>& nodes) bool MKLDNNPrimitiveBuildPass::run_on_call_graph(const std::list<std::shared_ptr<Node>>& nodes)
{ {
for (const auto& shp_node : nodes) for (const auto& shp_node : nodes)
{ {
if (in_new_map(shp_node))
{
continue;
}
Node* node = shp_node.get(); Node* node = shp_node.get();
if (mkldnn_utils::use_mkldnn_kernel(node)) if (mkldnn_utils::use_mkldnn_kernel(node))
...@@ -798,6 +1576,33 @@ bool MKLDNNPrimitiveBuildPass::run_on_call_graph(const std::list<std::shared_ptr ...@@ -798,6 +1576,33 @@ bool MKLDNNPrimitiveBuildPass::run_on_call_graph(const std::list<std::shared_ptr
} }
} }
std::ofstream desc_file(m_desc_filename, std::ios::out | std::ios::binary);
for (const auto& shp_node : nodes)
{
if (!in_new_map(shp_node))
{
continue;
}
Node* node = shp_node.get();
if (mkldnn_utils::use_mkldnn_kernel(node))
{
auto handler = prim_build_string_construct_dispatcher.find(TI(*node));
NGRAPH_CHECK(handler != prim_build_string_construct_dispatcher.end(),
"Unsupported node '",
node->description(),
"' in MKLDNNPrimitiveBuildPass");
std::string construct_string;
std::vector<size_t> deps;
size_t index;
handler->second(m_mkldnn_emitter, node, construct_string, deps, index, desc_file);
m_node_primitive_string_deps_index_map[node] =
std::tuple<std::string, std::vector<size_t>, size_t>(construct_string, deps, index);
}
}
return false; return false;
} }
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "ngraph/pass/pass.hpp" #include "ngraph/pass/pass.hpp"
#include <fstream>
#include <functional> #include <functional>
#include <typeindex> #include <typeindex>
#include <unordered_map> #include <unordered_map>
...@@ -26,6 +27,15 @@ ...@@ -26,6 +27,15 @@
build_primitive<op_name>(ngraph::runtime::cpu::MKLDNNEmitter & mkldnn_emitter, \ build_primitive<op_name>(ngraph::runtime::cpu::MKLDNNEmitter & mkldnn_emitter, \
ngraph::Node * node) ngraph::Node * node)
#define CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(op_name) \
construct_primitive_build_string<op_name>(ngraph::runtime::cpu::MKLDNNEmitter & \
mkldnn_emitter, \
ngraph::Node * node, \
std::string & construct_string, \
std::vector<size_t> & deps, \
size_t & index, \
std::ofstream & desc_file)
namespace mkldnn namespace mkldnn
{ {
class primitive; class primitive;
...@@ -48,23 +58,46 @@ namespace ngraph ...@@ -48,23 +58,46 @@ namespace ngraph
using PrimitiveBuildOpMap = using PrimitiveBuildOpMap =
std::unordered_map<std::type_index, PrimitiveBuildFunction>; std::unordered_map<std::type_index, PrimitiveBuildFunction>;
using PrimitiveBuildStringConstructFunction =
std::function<void(ngraph::runtime::cpu::MKLDNNEmitter&,
ngraph::Node*,
std::string&,
std::vector<size_t>&,
size_t&,
std::ofstream&)>;
using PrimitiveBuildStringConstructOpMap =
std::unordered_map<std::type_index, PrimitiveBuildStringConstructFunction>;
/// This pass traverses the call graph and creates MKLDNN primitives for those ops /// This pass traverses the call graph and creates MKLDNN primitives for those ops
/// that have been assigned to MKLDNN. /// that have been assigned to MKLDNN.
class MKLDNNPrimitiveBuildPass : public ngraph::pass::CallGraphPass class MKLDNNPrimitiveBuildPass : public ngraph::pass::CallGraphPass
{ {
private: private:
std::string m_desc_filename;
ngraph::runtime::cpu::MKLDNNEmitter& m_mkldnn_emitter; ngraph::runtime::cpu::MKLDNNEmitter& m_mkldnn_emitter;
/// External map to store each node with mkldnn implementation and its mkldnn /// External map to store each node with mkldnn implementation and its mkldnn
/// associated primitive index. /// associated primitive index.
std::unordered_map<const Node*, size_t>& m_node_primitive_idx_map; std::unordered_map<const Node*, size_t>& m_node_primitive_idx_map;
/// External map to store each node with mkldnn implementation and its mkldnn
/// creation string, deps, and mkldnn primitive index.
std::map<const Node*, std::tuple<std::string, std::vector<size_t>, size_t>>&
m_node_primitive_string_deps_index_map;
public: public:
MKLDNNPrimitiveBuildPass( MKLDNNPrimitiveBuildPass(
std::string filename,
ngraph::runtime::cpu::MKLDNNEmitter& mkldnn_emitter, ngraph::runtime::cpu::MKLDNNEmitter& mkldnn_emitter,
std::unordered_map<const Node*, size_t>& node_primitive_idx_map) std::unordered_map<const Node*, size_t>& node_primitive_idx_map,
: m_mkldnn_emitter(mkldnn_emitter) std::map<const Node*, std::tuple<std::string, std::vector<size_t>, size_t>>&
node_primitive_string_deps_index_map)
: m_desc_filename(filename)
, m_mkldnn_emitter(mkldnn_emitter)
, m_node_primitive_idx_map(node_primitive_idx_map) , m_node_primitive_idx_map(node_primitive_idx_map)
, m_node_primitive_string_deps_index_map(
node_primitive_string_deps_index_map)
{ {
} }
...@@ -78,6 +111,19 @@ namespace ngraph ...@@ -78,6 +111,19 @@ namespace ngraph
throw std::runtime_error("Unimplemented op '" + node->description() + throw std::runtime_error("Unimplemented op '" + node->description() +
"' in MKLDNNPrimitiveBuildPass"); "' in MKLDNNPrimitiveBuildPass");
} }
template <typename OP>
static void construct_primitive_build_string(
ngraph::runtime::cpu::MKLDNNEmitter& mkldnn_emitter,
ngraph::Node* node,
std::string& construct_string,
std::vector<size_t>& deps,
size_t& index,
std::ofstream& desc_file)
{
throw std::runtime_error("Unimplemented op '" + node->description() +
"' in MKLDNNPrimitiveBuildPass");
}
}; };
} }
} }
......
...@@ -26,8 +26,35 @@ struct CPURuntimeContextCG ...@@ -26,8 +26,35 @@ struct CPURuntimeContextCG
std::unique_ptr<tbb::flow::graph> tbb_graph; std::unique_ptr<tbb::flow::graph> tbb_graph;
std::unique_ptr<tbb::global_control> tbb_gcontrol; std::unique_ptr<tbb::global_control> tbb_gcontrol;
CPURuntimeContextCG() { init_tbb(); } CPURuntimeContextCG() { init_tbb(); init_mkldnn_primitives();}
~CPURuntimeContextCG() { cleanup_tbb(); } ~CPURuntimeContextCG() { cleanup_tbb(); cleanup_mkldnn_primitives();}
std::vector<mkldnn::primitive*> mkldnn_primitives;
std::vector<char*> mkldnn_workspaces;
std::vector<mkldnn::memory::desc*> mkldnn_descriptors;
mkldnn::engine global_cpu_engine = mkldnn::engine(mkldnn::engine::cpu, 0);
void set_memory_ptr(size_t primitive_index,
void* ptr)
{
auto primitive = static_cast<mkldnn::memory*>(mkldnn_primitives[primitive_index]);
primitive->set_data_handle(ptr);
}
void mkldnn_invoke_primitive(size_t primitive_index)
{
mkldnn::stream s(mkldnn::stream::kind::eager);
try
{
s.submit({*mkldnn_primitives[primitive_index]}).wait();
}
catch (const mkldnn::error& e)
{
throw std::runtime_error("Could not run mkldnn primitive " + e.message);
}
}
private: private:
inline void init_tbb() inline void init_tbb()
...@@ -59,6 +86,35 @@ private: ...@@ -59,6 +86,35 @@ private:
} }
} }
} }
void init_mkldnn_primitives();
inline void cleanup_mkldnn_primitives()
{
for (auto p : mkldnn_primitives)
{
delete p;
}
#ifndef _WIN32
//To avoid memory leak in mkldnn, release any buffers that are not free'd yet.
//https://software.intel.com/en-us/mkl-linux-developer-guide-avoiding-memory-leaks-in-intel-mkl
//mkl_free_buffers() is not exposed at this point, hence using mkl_serv_free_buffers()
ngraph::runtime::cpu::mkldnn_utils::mkl_serv_free_buffers();
#endif
for (auto w : mkldnn_workspaces)
{
free(w);
}
}
inline void cleanup_mkldnn_descriptors()
{
for (auto d : mkldnn_descriptors)
{
free(d);
}
}
}; };
extern "C" CPURuntimeContextCG* init_cg_ctx() extern "C" CPURuntimeContextCG* init_cg_ctx()
...@@ -70,4 +126,25 @@ extern "C" void destroy_cg_ctx(CPURuntimeContextCG* cg_ctx) ...@@ -70,4 +126,25 @@ extern "C" void destroy_cg_ctx(CPURuntimeContextCG* cg_ctx)
{ {
delete cg_ctx; delete cg_ctx;
} }
static void
deserialize_memory_descs_and_build_memory_primitives(std::ifstream& desc_file,
CPURuntimeContextCG* cg_ctx,
size_t descs_count)
{
cg_ctx->mkldnn_descriptors = std::vector<mkldnn::memory::desc*>(descs_count);
for (auto i = 0; i < descs_count; i++)
{
size_t primitive_index;
desc_file >> primitive_index;
auto desc = (mkldnn::memory::desc*)malloc(sizeof(mkldnn::memory::desc));
if (!desc)
{
throw std::bad_alloc();
}
desc_file.read(reinterpret_cast<char*>(desc), sizeof(mkldnn::memory::desc));
cg_ctx->mkldnn_descriptors[i] = desc;
cg_ctx->mkldnn_primitives[primitive_index] = new mkldnn::memory({*cg_ctx->mkldnn_descriptors[i], cg_ctx->global_cpu_engine}, nullptr);
}
};
)" )"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment