Commit e8b5d11b authored by Amy Zhuang's avatar Amy Zhuang Committed by Robert Kimball

Create mkldnn primitives at first iteration for codegen - part1. (#2806)

* Create mkldnn primitives at first iteration for CODEGEN.

 OPs: add, lstm, and rnn.

*  OPs: batchnorm.

*  OPs: concat and lrn.

Remove dead code.

* Skip in place concat, relu, reshape, and slice when building node_primitive_string_deps_index map.

* Change NGRAPH_ASSERT to NGRAPH_CHECK.

* Ops: Qconv

* Ops: Convs

* Address PR Feedback.

* Dynamic scale support for qconvs

* updating to amy's recent change

* GroupConv and Cleaning dead code

* Address PR Feedback.

* Remove unused variable.

* Fix a bug.

* Fix style error.
parent d77ace68
......@@ -62,7 +62,7 @@ namespace ngraph
auto lstm_desc =
mkldnn_emitter->get_rnn_forward_desc<ngraph::op::Lstm>(node, args, out);
// Lstm needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias,
// dst_layer, dst_iter, and rnn_forward.
// dst_layer, dst_iter, workspace, and rnn_forward.
// It needs a new workspace.
auto lstm_index =
mkldnn_emitter->reserve_primitive_space(9, true /* new workspace */);
......
......@@ -57,7 +57,7 @@ namespace ngraph
auto rnn_desc =
mkldnn_emitter->get_rnn_forward_desc<ngraph::op::Rnn>(node, args, out);
// Rnn needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias,
// dst_layer, dst_iter, and rnn_forward.
// dst_layer, dst_iter, workspace, and rnn_forward.
// It needs a new workspace.
auto rnn_index =
mkldnn_emitter->reserve_primitive_space(9, true /* new workspace */);
......
......@@ -155,25 +155,41 @@ namespace ngraph
{
namespace cpu
{
static void emit_build_primitives(CPU_ExternalFunction* external_function,
const ngraph::Node* node,
CodeWriter& writer,
size_t& index,
std::vector<std::size_t>& deps)
{
writer << "if (ctx->first_iteration)\n";
writer.block_begin();
// get the string, deps, and index from the map
writer << get<0>(external_function->get_primitive_build_tuple(node));
writer.block_end();
deps = get<1>(external_function->get_primitive_build_tuple(node));
index = get<2>(external_function->get_primitive_build_tuple(node));
}
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::Add)
{
writer.block_begin();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
size_t add_index = external_function->get_primitive_index(node);
size_t add_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, add_index, deps);
auto& deps = mkldnn_emitter->get_primitive_deps(add_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << out[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[1].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(add_index) << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(add_index) << ");\n";
}
else
{
......@@ -517,56 +533,55 @@ namespace ngraph
throw ngraph_error(
"Lstm op doesnt have the required number of inputs to emit MKLDNN kernel");
}
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto lstm_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(lstm_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) << ", "
size_t lstm_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, lstm_index, deps);
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) << ", "
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) << ", "
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3]) << ", "
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< args[3].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[4]) << ", "
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[4]) << ", "
<< args[4].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[5]) << ", "
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[5]) << ", "
<< out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[6]) << ", "
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[6]) << ", "
<< out[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[7])
<< ", ctx->mkldnn_workspaces[" << deps[8] << "]);\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[7])
<< ", cg_ctx->mkldnn_workspaces[" << deps[8] << "]);\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(lstm_index) << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(lstm_index) << ");\n";
}
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::Rnn)
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto rnn_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(rnn_index);
size_t rnn_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, rnn_index, deps);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) << ", "
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) << ", "
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) << ", "
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3]) << ", "
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< args[3].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[4]) << ", "
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[4]) << ", "
<< args[4].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[5]) << ", "
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[5]) << ", "
<< out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[6]) << ", "
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[6]) << ", "
<< out[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[7])
<< ", ctx->mkldnn_workspaces[" << deps[8] << "]);\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, " << to_string(rnn_index)
<< ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[7])
<< ", cg_ctx->mkldnn_workspaces[" << deps[8] << "]);\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(rnn_index) << ");\n";
}
template <typename T>
......@@ -588,52 +603,41 @@ namespace ngraph
<< args[1].get_name() << ", "
<< args[1].get_size() * args[1].get_element_type().size() << ");\n";
const float ops_scale = 1.f;
const float ops_alpha = -0.f; // relu negative slope
const float ops_beta = 0.f;
mkldnn::post_ops ops;
if (append_relu)
{
ops.append_eltwise(
ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
}
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto batchnorm_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index);
size_t batchnorm_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, batchnorm_index, deps);
if (training && args.size() == 3)
{
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[2].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1])
<< ", bn_weights.data());\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3])
<< ", " << out[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[4])
<< ", " << out[2].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< out[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< out[1].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[4]) << ", "
<< out[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(batchnorm_index) << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(batchnorm_index)
<< ");\n";
}
else
{
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[3].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << args[4].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3])
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[2].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[3].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< args[4].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3])
<< ", bn_weights.data());\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[4])
<< ", " << out[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[4]) << ", "
<< out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(batchnorm_index) << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(batchnorm_index)
<< ");\n";
}
writer.block_end();
}
......@@ -741,27 +745,27 @@ namespace ngraph
<< args[1].get_name() << ", "
<< args[1].get_size() * args[1].get_element_type().size() << ");\n";
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto batchnorm_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index);
size_t batchnorm_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, batchnorm_index, deps);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0])
<< ", bn_weights.data());\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) << ", "
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) << ", "
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< args[3].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3]) << ", "
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< args[4].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[4]) << ", "
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[4]) << ", "
<< args[5].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[5]) << ", "
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[5]) << ", "
<< out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[6])
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[6])
<< ", bn_dweights.data());\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(batchnorm_index) << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(batchnorm_index)
<< ");\n";
writer << "memcpy(" << out[1].get_name() << ", &bn_dweights[0], "
<< args[0].get_size() * args[0].get_element_type().size() << ");\n";
......@@ -982,21 +986,21 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
size_t concat_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(concat_index);
size_t concat_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, concat_index, deps);
size_t i;
for (i = 0; i < args.size(); i++)
{
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[i])
<< ", " << args[i].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[i]) << ", "
<< args[i].get_name() << ");\n";
}
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[i])
<< ", " << out[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[i]) << ", "
<< out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(concat_index) << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(concat_index)
<< ");\n";
}
else
{
......@@ -1150,17 +1154,16 @@ namespace ngraph
writer.block_begin();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto lrn_index = external_function->get_primitive_index(node);
size_t lrn_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, lrn_index, deps);
auto& deps = mkldnn_emitter->get_primitive_deps(lrn_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << out[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(lrn_index) << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(lrn_index) << ");\n";
}
else
{
......@@ -1984,18 +1987,17 @@ namespace ngraph
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
size_t conv_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(conv_index) << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[1].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
}
}
......@@ -2004,18 +2006,17 @@ namespace ngraph
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
size_t conv_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(conv_index) << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[1].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
}
else
{
......@@ -2028,18 +2029,17 @@ namespace ngraph
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
size_t conv_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(conv_index) << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[1].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
}
else
{
......@@ -2050,26 +2050,20 @@ namespace ngraph
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::GroupConvolution)
{
auto arg0_shape = args[0].get_shape();
auto arg1_shape = args[1].get_shape();
auto result_shape = out[0].get_shape();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
// invoke group convolution
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
size_t conv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << out[0].get_name() << ");\n";
size_t conv_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(conv_index) << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[1].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
}
else
{
......@@ -2080,29 +2074,22 @@ namespace ngraph
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::GroupConvolutionBias)
{
auto arg0_shape = args[0].get_shape();
auto arg1_shape = args[1].get_shape();
auto arg2_shape = args[2].get_shape();
auto result_shape = out[0].get_shape();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
size_t conv_index = external_function->get_primitive_index(node);
// invoke group convolution bias
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3])
<< ", " << out[0].get_name() << ");\n";
// invoke group convolution
size_t conv_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(conv_index) << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[1].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< args[2].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
}
else
{
......@@ -2121,19 +2108,17 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << out[0].get_name() << ");\n";
size_t conv_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(conv_index) << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[1].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
}
else
{
......@@ -2291,21 +2276,19 @@ namespace ngraph
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto qconv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(qconv_index);
size_t conv_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3])
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(qconv_index) << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[1].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< args[2].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
}
else
{
......@@ -2319,26 +2302,24 @@ namespace ngraph
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto qconv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(qconv_index);
size_t conv_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "if (" << out[0].get_name() << " != " << args[3].get_name() << ")\n";
writer.block_begin();
writer << "memcpy(" << out[0].get_name() << ", " << args[3].get_name() << ", "
<< args[3].get_size() * args[3].get_element_type().size() << ");\n";
writer.block_end();
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3])
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(qconv_index) << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[1].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< args[2].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
}
else
{
......@@ -2352,26 +2333,24 @@ namespace ngraph
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto qconv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(qconv_index);
size_t conv_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "if (" << out[0].get_name() << " != " << args[3].get_name() << ")\n";
writer.block_begin();
writer << "memcpy(" << out[0].get_name() << ", " << args[3].get_name() << ", "
<< args[3].get_size() * args[3].get_element_type().size() << ");\n";
writer.block_end();
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3])
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(qconv_index) << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[1].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< args[2].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
}
else
{
......@@ -2446,21 +2425,19 @@ namespace ngraph
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
size_t conv_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3])
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(conv_index) << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[1].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< args[2].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
}
else
{
......@@ -2473,25 +2450,24 @@ namespace ngraph
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
size_t conv_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "if (" << out[0].get_name() << " != " << args[3].get_name() << ")\n";
writer.block_begin();
writer << "memcpy(" << out[0].get_name() << ", " << args[3].get_name() << ", "
<< args[3].get_size() * args[3].get_element_type().size() << ");\n";
writer.block_end();
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3])
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(conv_index) << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[1].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< args[2].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
}
else
{
......@@ -2504,23 +2480,24 @@ namespace ngraph
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
size_t conv_index;
std::vector<std::size_t> deps;
emit_build_primitives(external_function, node, writer, conv_index, deps);
writer << "if (" << out[0].get_name() << " != " << args[2].get_name() << ")\n";
writer.block_begin();
writer << "memcpy(" << out[0].get_name() << ", " << args[2].get_name() << ", "
<< args[2].get_size() * args[2].get_element_type().size() << ");\n";
writer.block_end();
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(conv_index) << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[1]) << ", "
<< args[1].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[2]) << ", "
<< args[2].get_name() << ");\n";
writer << "cg_ctx->set_memory_ptr(" << to_string(deps[3]) << ", "
<< out[0].get_name() << ");\n";
writer << "cg_ctx->mkldnn_invoke_primitive(" << to_string(conv_index) << ");\n";
}
else
{
......
......@@ -155,6 +155,7 @@
#include "ngraph/runtime/cpu/cpu_tensor_view.hpp"
#include "ngraph/runtime/cpu/cpu_tracing.hpp"
#include "ngraph/runtime/cpu/cpu_visualize_tree.hpp"
#include "ngraph/runtime/cpu/mkldnn_emitter.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/batch_mat_mul_transpose.hpp"
#include "ngraph/runtime/cpu/op/batch_norm_relu.hpp"
......@@ -473,7 +474,10 @@ void runtime::cpu::CPU_ExternalFunction::compile(ngraph::pass::PassConfig& pass_
// Build mkldnn primitives for codegen.
pass_manager.register_pass<runtime::cpu::pass::MKLDNNPrimitiveBuildPass>(
*m_mkldnn_emitter, m_node_primitive_idx_map);
m_desc_filename,
*m_mkldnn_emitter,
m_node_primitive_idx_map,
m_node_primitive_string_deps_index_map);
unordered_map<Node*, Node*> node_function_map;
string common_function_string;
......@@ -510,13 +514,17 @@ void runtime::cpu::CPU_ExternalFunction::compile(ngraph::pass::PassConfig& pass_
writer +=
R"(
#include <cmath>
#include <fstream>
#include <mkldnn.hpp>
#include "ngraph/distributed.hpp"
#include "ngraph/except.hpp"
#include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/runtime/cpu/cpu_eigen_utils.hpp"
#include "ngraph/runtime/cpu/cpu_executor.hpp"
#include "ngraph/runtime/cpu/cpu_kernels.hpp"
#include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/reference/all.hpp"
#include "ngraph/runtime/reference/and.hpp"
#include "ngraph/runtime/reference/any.hpp"
......@@ -668,6 +676,14 @@ using namespace ngraph::runtime;
writer << common_function_string << "\n";
//initiate mkldnn_primitives for CPURuntimeContextCG
writer << "void inline CPURuntimeContextCG::init_mkldnn_primitives()\n";
writer.block_begin();
writer << "mkldnn_primitives = std::vector<mkldnn::primitive*>("
<< to_string(m_mkldnn_emitter->get_mkldnn_primitives_cg().size()) << ");\n";
writer.block_end();
writer << "\n";
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
{
auto ordered_ops = function_ordered_ops.at(current_function);
......@@ -722,6 +738,16 @@ using namespace ngraph::runtime;
writer << "extern \"C\" void " << current_function->get_name() << func_params << "\n";
writer << "{\n";
writer.indent++;
writer << "std::ifstream desc_file (\"" << m_desc_filename << "\", std::ios::binary);\n";
//deserialize and build mkldnn primitives
writer << "if (ctx->first_iteration)\n";
writer.block_begin();
writer << "// read in memory descriptors and build mkldnn primitives\n";
writer << "deserialize_memory_descs_and_build_memory_primitives(" << m_desc_filename
<< ", cg_ctx, " << to_string(m_mkldnn_emitter->get_mkldnn_descriptors_size())
<< ");\n";
writer.block_end();
// Execution tracing support
if (runtime::cpu::IsTracingEnabled() && current_function->get_name() == m_function_name)
......
......@@ -125,6 +125,18 @@ namespace ngraph
return it->second;
}
// Return the tuple including the string to create mkldnn primitive, the deps and the index in CODEGEN
const std::tuple<std::string, std::vector<size_t>, size_t>&
get_primitive_build_tuple(const Node* node) const
{
auto it = m_node_primitive_string_deps_index_map.find(node);
NGRAPH_CHECK(it != m_node_primitive_string_deps_index_map.end(),
"Primitive build tuple not found for node ",
node->description());
return it->second;
}
size_t add_state(ngraph::State* state)
{
m_states.push_back(state);
......@@ -318,6 +330,11 @@ namespace ngraph
/// Map each node with mkldnn implementation to its mkldnn primitive index.
std::unordered_map<const Node*, size_t> m_node_primitive_idx_map;
/// Map each node with mkldnn implementation to its mkldnn primitive creating string, deps, and mkldnn primitive index.
std::map<const Node*, std::tuple<std::string, std::vector<size_t>, size_t>>
m_node_primitive_string_deps_index_map;
/// Name of the file to store descriptors for mkldnn_primitives
const std::string m_desc_filename = "desc_file";
};
}
}
......
......@@ -80,6 +80,11 @@ std::vector<mkldnn::primitive*>& MKLDNNEmitter::get_mkldnn_primitives()
return m_mkldnn_primitives;
}
const std::vector<mkldnn::primitive*>& MKLDNNEmitter::get_mkldnn_primitives_cg() const
{
return m_mkldnn_primitives_cg;
}
const std::vector<char*>& MKLDNNEmitter::get_mkldnn_workspaces()
{
return m_workspace_bufs;
......@@ -98,6 +103,22 @@ size_t MKLDNNEmitter::insert_workspace(std::unique_ptr<MKLDNNWorkspace>& workspa
return (m_workspaces.size() - 1);
}
size_t MKLDNNEmitter::reserve_workspace()
{
m_workspaces_size++;
return m_workspaces_size - 1;
}
void MKLDNNEmitter::reserve_descriptor_space(size_t count)
{
m_mkldnn_descriptors_size += count;
}
size_t MKLDNNEmitter::get_mkldnn_descriptors_size()
{
return m_mkldnn_descriptors_size;
}
size_t MKLDNNEmitter::insert_workspace(std::vector<char*>& mkldnn_workspaces,
std::unique_ptr<MKLDNNWorkspace>& workspace)
{
......@@ -111,6 +132,11 @@ const std::vector<size_t>& MKLDNNEmitter::get_primitive_deps(size_t index) const
return m_primitive_deps.at(index);
}
const std::vector<size_t>& MKLDNNEmitter::get_primitive_deps_cg(size_t index) const
{
return m_primitive_deps_cg.at(index);
}
std::vector<size_t>& MKLDNNEmitter::get_primitive_deps(size_t index)
{
return m_primitive_deps.at(index);
......@@ -401,206 +427,6 @@ size_t MKLDNNEmitter::build_deconvolutionbias_forward(const mkldnn::memory::desc
return conv_index;
}
size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const mkldnn::post_ops& pops)
{
size_t input_data_index = build_memory_primitive(input_data_desc);
size_t weights_index = build_memory_primitive(weights_desc);
size_t result_index = build_memory_primitive(result_desc);
mkldnn::primitive_attr conv_attr;
conv_attr.set_post_ops(pops);
mkldnn::algorithm convolution_algo = mkldnn_utils::get_conv_algo();
size_t conv_index = 0;
try
{
auto conv_prim = new mkldnn::convolution_forward(
{{mkldnn::prop_kind::forward_inference,
convolution_algo,
input_data_desc,
weights_desc,
result_desc,
mkldnn::memory::dims(strides.begin(), strides.end()),
mkldnn::memory::dims(dilation_strides.begin(), dilation_strides.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero},
conv_attr,
executor::global_cpu_engine},
*m_mkldnn_primitives[input_data_index],
*m_mkldnn_primitives[weights_index],
*m_mkldnn_primitives[result_index]);
conv_index = insert_primitive(conv_prim);
NGRAPH_CHECK(m_primitive_deps.find(conv_index) == m_primitive_deps.end(),
"Dependencies already created for node");
m_primitive_deps[conv_index] = {input_data_index, weights_index, result_index};
}
catch (const mkldnn::error& e)
{
throw ngraph_error("Could not create mkldnn convolution " + e.message);
}
return conv_index;
}
size_t
MKLDNNEmitter::build_quantized_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const float scale,
const mkldnn::post_ops& pops)
{
size_t input_data_index = build_memory_primitive(input_data_desc);
size_t weights_index = build_memory_primitive(weights_desc);
size_t result_index = build_memory_primitive(result_desc);
std::vector<float> output_scale;
output_scale.push_back(scale);
mkldnn::primitive_attr conv_attr;
conv_attr.set_post_ops(pops);
/* Specify the rounding mode */
conv_attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
/* Specify the scales array and corresponding mask */
conv_attr.set_output_scales(0, output_scale);
size_t conv_index = insert_primitive(new mkldnn::convolution_forward(
{{mkldnn::prop_kind::forward,
mkldnn::algorithm::convolution_direct,
input_data_desc,
weights_desc,
result_desc,
mkldnn::memory::dims(strides.begin(), strides.end()),
mkldnn::memory::dims(dilation_strides.begin(), dilation_strides.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero},
conv_attr,
executor::global_cpu_engine},
*m_mkldnn_primitives[input_data_index],
*m_mkldnn_primitives[weights_index],
*m_mkldnn_primitives[result_index]));
NGRAPH_CHECK(m_primitive_deps.find(conv_index) == m_primitive_deps.end(),
"Dependencies already created for node");
m_primitive_deps[conv_index] = {input_data_index, weights_index, result_index};
return conv_index;
}
size_t
MKLDNNEmitter::build_quantized_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& bias_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const float scale,
const mkldnn::post_ops& pops)
{
size_t input_data_index = build_memory_primitive(input_data_desc);
size_t weights_index = build_memory_primitive(weights_desc);
size_t bias_index = build_memory_primitive(bias_desc);
size_t result_index = build_memory_primitive(result_desc);
std::vector<float> output_scale;
output_scale.push_back(scale);
mkldnn::primitive_attr conv_attr;
conv_attr.set_post_ops(pops);
/* Specify the rounding mode */
conv_attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
/* Specify the scales array and corresponding mask */
conv_attr.set_output_scales(0, output_scale);
size_t conv_index = insert_primitive(new mkldnn::convolution_forward(
{{mkldnn::prop_kind::forward,
mkldnn::algorithm::convolution_direct,
input_data_desc,
weights_desc,
bias_desc,
result_desc,
mkldnn::memory::dims(strides.begin(), strides.end()),
mkldnn::memory::dims(dilation_strides.begin(), dilation_strides.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero},
conv_attr,
executor::global_cpu_engine},
*m_mkldnn_primitives[input_data_index],
*m_mkldnn_primitives[weights_index],
*m_mkldnn_primitives[bias_index],
*m_mkldnn_primitives[result_index]));
NGRAPH_CHECK(m_primitive_deps.find(conv_index) == m_primitive_deps.end(),
"Dependencies already created for node");
m_primitive_deps[conv_index] = {input_data_index, weights_index, bias_index, result_index};
return conv_index;
}
size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& bias_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const mkldnn::post_ops& pops)
{
const size_t input_data_index = build_memory_primitive(input_data_desc);
const size_t weights_index = build_memory_primitive(weights_desc);
const size_t bias_index = build_memory_primitive(bias_desc);
const size_t result_index = build_memory_primitive(result_desc);
mkldnn::primitive_attr conv_attr;
conv_attr.set_post_ops(pops);
mkldnn::algorithm convolution_algo = mkldnn_utils::get_conv_algo();
size_t conv_index = -1;
try
{
conv_index = insert_primitive(new mkldnn::convolution_forward(
{{mkldnn::prop_kind::forward_inference,
convolution_algo,
input_data_desc,
weights_desc,
bias_desc,
result_desc,
mkldnn::memory::dims(strides.begin(), strides.end()),
mkldnn::memory::dims(dilation_strides.begin(), dilation_strides.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero},
conv_attr,
executor::global_cpu_engine},
*m_mkldnn_primitives[input_data_index],
*m_mkldnn_primitives[weights_index],
*m_mkldnn_primitives[bias_index],
*m_mkldnn_primitives[result_index]));
NGRAPH_CHECK(m_primitive_deps.find(conv_index) == m_primitive_deps.end(),
"Dependencies already created for node");
m_primitive_deps[conv_index] = {input_data_index, weights_index, bias_index, result_index};
}
catch (const mkldnn::error& e)
{
throw ngraph_error("Could not create convolution " + e.message);
}
return conv_index;
}
size_t MKLDNNEmitter::build_convolution_backward_weights_bias(
const mkldnn::memory::desc& in_data_desc,
const mkldnn::memory::desc& in_delta_desc,
......@@ -1226,35 +1052,6 @@ void MKLDNNEmitter::build_reorder(std::vector<mkldnn::primitive*>& mkldnn_primit
new mkldnn::reorder(*mkldnn_primitives[input_index], *mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_lrn_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
float alpha,
float beta,
float bias,
int nsize)
{
size_t input_index = build_memory_primitive(input_desc);
size_t result_index = build_memory_primitive(result_desc);
auto lrn_desc = mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring,
mkldnn::algorithm::lrn_across_channels,
input_desc,
nsize,
alpha,
beta,
bias);
auto lrn_prim_desc = mkldnn::lrn_forward::primitive_desc(lrn_desc, executor::global_cpu_engine);
size_t primitive_index = insert_primitive(new mkldnn::lrn_forward(
lrn_prim_desc, *m_mkldnn_primitives[input_index], *m_mkldnn_primitives[result_index]));
NGRAPH_CHECK(m_primitive_deps.find(primitive_index) == m_primitive_deps.end(),
"Dependencies already created for node");
m_primitive_deps[primitive_index] = {input_index, result_index};
return primitive_index;
}
mkldnn::lrn_forward::desc MKLDNNEmitter::get_lrn_forward_desc(const ngraph::Node* node)
{
const ngraph::op::LRN* lrn = static_cast<const ngraph::op::LRN*>(node);
......@@ -1528,37 +1325,6 @@ void MKLDNNEmitter::build_sigmoid_backward(std::vector<mkldnn::primitive*>& mkld
*mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_elementwise_add(
const mkldnn::memory::desc& input0_data_desc,
const mkldnn::memory::desc& input1_data_desc,
const mkldnn::memory::desc& result_desc,
const std::vector<float>& scale_vector,
const std::vector<mkldnn::memory::primitive_desc>& inputs_pd)
{
std::vector<mkldnn::memory::primitive::at> inputs_primitive;
size_t input0_data_index = build_memory_primitive(input0_data_desc);
size_t input1_data_index = build_memory_primitive(input1_data_desc);
size_t result_index = build_memory_primitive(result_desc);
inputs_primitive.push_back(*m_mkldnn_primitives[input0_data_index]);
inputs_primitive.push_back(*m_mkldnn_primitives[input1_data_index]);
// elementwise sum primtive descriptor
mkldnn::sum::primitive_desc sum_pd =
mkldnn::sum::primitive_desc(result_desc, scale_vector, inputs_pd);
// sum primitive
size_t add_index = insert_primitive(
new mkldnn::sum(sum_pd, inputs_primitive, *m_mkldnn_primitives[result_index]));
NGRAPH_CHECK(m_primitive_deps.find(add_index) == m_primitive_deps.end(),
"Dependencies already created for node");
m_primitive_deps[add_index] = {input0_data_index, input1_data_index, result_index};
return add_index;
}
mkldnn::sum::primitive_desc MKLDNNEmitter::get_elementwise_add_desc(const ngraph::Node* node)
{
std::vector<float> scale_vector(2, 1);
......@@ -1602,66 +1368,6 @@ void MKLDNNEmitter::build_elementwise_add(std::vector<mkldnn::primitive*>& mkldn
new mkldnn::sum(sum_pd, inputs_primitive, *mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_batchnorm_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const mkldnn::memory::desc& mean_desc,
const mkldnn::memory::desc& variance_desc,
const double eps,
bool use_global_stats,
bool bn_training_flag,
const mkldnn::post_ops& pops)
{
size_t input_index = build_memory_primitive(input_desc);
size_t weights_index = build_memory_primitive(weights_desc);
size_t result_index = build_memory_primitive(result_desc);
size_t mean_index = build_memory_primitive(mean_desc);
size_t variance_index = build_memory_primitive(variance_desc);
mkldnn::primitive_attr bn_attr;
bn_attr.set_post_ops(pops);
if (bn_training_flag && !use_global_stats)
{
size_t batchnorm_index = insert_primitive(new mkldnn::batch_normalization_forward(
{{mkldnn::prop_kind::forward_training,
input_desc,
eps,
mkldnn::batch_normalization_flag::use_scale_shift},
bn_attr,
executor::global_cpu_engine},
mkldnn::primitive::at(*m_mkldnn_primitives[input_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[weights_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[result_index]),
*m_mkldnn_primitives[mean_index],
*m_mkldnn_primitives[variance_index]));
m_primitive_deps[batchnorm_index] = {
input_index, weights_index, result_index, mean_index, variance_index};
return batchnorm_index;
}
else
{
size_t batchnorm_index = insert_primitive(new mkldnn::batch_normalization_forward(
{{mkldnn::prop_kind::forward_training,
input_desc,
eps,
mkldnn::batch_normalization_flag::use_scale_shift |
mkldnn::batch_normalization_flag::use_global_stats},
bn_attr,
executor::global_cpu_engine},
mkldnn::primitive::at(*m_mkldnn_primitives[input_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[mean_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[variance_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[weights_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[result_index])));
m_primitive_deps[batchnorm_index] = {
input_index, mean_index, variance_index, weights_index, result_index};
return batchnorm_index;
}
}
void MKLDNNEmitter::build_batchnorm_forward(
std::vector<mkldnn::primitive*>& mkldnn_primitives,
const mkldnn::batch_normalization_forward::desc& batchnorm_desc,
......@@ -1720,56 +1426,6 @@ void MKLDNNEmitter::build_batchnorm_forward(
}
}
size_t MKLDNNEmitter::build_batchnorm_backward(const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& mean_desc,
const mkldnn::memory::desc& variance_desc,
const mkldnn::memory::desc& delta_desc,
const mkldnn::memory::desc& dinput_desc,
const mkldnn::memory::desc& dweights_desc,
const double eps)
{
size_t weights_index = build_memory_primitive(weights_desc);
size_t input_index = build_memory_primitive(input_desc);
size_t mean_index = build_memory_primitive(mean_desc);
size_t variance_index = build_memory_primitive(variance_desc);
size_t delta_index = build_memory_primitive(delta_desc);
size_t dinput_index = build_memory_primitive(dinput_desc);
size_t dweights_index = build_memory_primitive(dweights_desc);
size_t batchnorm_index = insert_primitive(new mkldnn::batch_normalization_backward(
{{mkldnn::prop_kind::backward,
delta_desc,
input_desc,
eps,
mkldnn::batch_normalization_flag::use_scale_shift},
executor::global_cpu_engine,
{{mkldnn::prop_kind::forward_training,
input_desc,
eps,
mkldnn::batch_normalization_flag::use_scale_shift},
executor::global_cpu_engine}},
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[mean_index],
*m_mkldnn_primitives[variance_index],
*m_mkldnn_primitives[delta_index],
*m_mkldnn_primitives[weights_index],
*m_mkldnn_primitives[dinput_index],
*m_mkldnn_primitives[dweights_index]));
NGRAPH_CHECK(m_primitive_deps.find(batchnorm_index) == m_primitive_deps.end(),
"Dependencies already created for node");
m_primitive_deps[batchnorm_index] = {weights_index,
input_index,
mean_index,
variance_index,
delta_index,
dinput_index,
dweights_index};
return batchnorm_index;
}
mkldnn::batch_normalization_backward::desc
MKLDNNEmitter::get_batchnorm_backward_desc(const ngraph::Node* node)
{
......@@ -1828,72 +1484,6 @@ void MKLDNNEmitter::build_batchnorm_backward(
*mkldnn_primitives[dweights_index]);
}
size_t MKLDNNEmitter::build_rnn_forward(const mkldnn::memory::desc& src_layer_desc,
const mkldnn::memory::desc& src_iter_desc,
const mkldnn::memory::desc& weights_layer_desc,
const mkldnn::memory::desc& weights_iter_desc,
const mkldnn::memory::desc& bias_desc,
const mkldnn::memory::desc& dst_layer_desc,
const mkldnn::memory::desc& dst_iter_desc,
const mkldnn::rnn_direction& rnn_direction,
const mkldnn::algorithm& rnn_algorithm)
{
size_t src_layer_index = build_memory_primitive(src_layer_desc);
size_t src_iter_index = build_memory_primitive(src_iter_desc);
size_t weights_layer_index = build_memory_primitive(weights_layer_desc);
size_t weights_iter_index = build_memory_primitive(weights_iter_desc);
size_t bias_index = build_memory_primitive(bias_desc);
size_t dst_layer_index = build_memory_primitive(dst_layer_desc);
size_t dst_iter_index = build_memory_primitive(dst_iter_desc);
mkldnn::rnn_cell::desc rnn_cell(rnn_algorithm);
mkldnn::rnn_forward::desc rnn_layer_desc(mkldnn::prop_kind::forward_training,
rnn_cell,
rnn_direction,
src_layer_desc,
src_iter_desc,
weights_layer_desc,
weights_iter_desc,
bias_desc,
dst_layer_desc,
dst_iter_desc);
auto rnn_layer_prim_desc =
mkldnn::rnn_forward::primitive_desc(rnn_layer_desc, executor::global_cpu_engine);
auto workspace_index =
build_memory_primitive(rnn_layer_prim_desc.workspace_primitive_desc().desc());
auto workspace = std::unique_ptr<MKLDNNWorkspace>(
new MKLDNNWorkspace(rnn_layer_prim_desc.workspace_primitive_desc().get_size()));
auto workspace_buf_index = insert_workspace(workspace);
size_t rnn_index = insert_primitive(new mkldnn::rnn_forward(
rnn_layer_prim_desc,
mkldnn::primitive::at(*m_mkldnn_primitives[src_layer_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[src_iter_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[weights_layer_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[weights_iter_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[bias_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[dst_layer_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[dst_iter_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[workspace_index])));
NGRAPH_CHECK(m_primitive_deps.find(rnn_index) == m_primitive_deps.end(),
"Dependencies already created for node");
m_primitive_deps[rnn_index] = {src_layer_index,
src_iter_index,
weights_layer_index,
weights_iter_index,
bias_index,
dst_layer_index,
dst_iter_index,
workspace_index,
workspace_buf_index};
return rnn_index;
}
void MKLDNNEmitter::build_rnn_forward(std::vector<mkldnn::primitive*>& mkldnn_primitives,
std::vector<char*>& mkldnn_workspaces,
const mkldnn::rnn_forward::desc& rnn_desc,
......@@ -2282,6 +1872,21 @@ size_t MKLDNNEmitter::reserve_primitive_space(size_t count, bool new_workspace)
return m_mkldnn_primitives.size() - 1;
}
size_t MKLDNNEmitter::reserve_primitive_space_cg(size_t count, bool new_workspace)
{
size_t size = m_mkldnn_primitives_cg.size();
m_mkldnn_primitives_cg.resize(size + count, nullptr);
for (auto i = 0; i < count - 1; i++)
{
m_primitive_deps_cg[m_mkldnn_primitives_cg.size() - 1].push_back(size + i);
}
if (new_workspace)
{
m_primitive_deps_cg[m_mkldnn_primitives_cg.size() - 1].push_back(0);
}
return m_mkldnn_primitives_cg.size() - 1;
}
size_t MKLDNNEmitter::build_quantized_inner_product_forward(
const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
......
......@@ -127,17 +127,24 @@ namespace ngraph
MKLDNNEmitter() {}
~MKLDNNEmitter();
const std::vector<mkldnn::primitive*>& get_mkldnn_primitives() const;
const std::vector<mkldnn::primitive*>& get_mkldnn_primitives_cg() const;
std::vector<mkldnn::primitive*>& get_mkldnn_primitives();
const std::vector<char*>& get_mkldnn_workspaces();
// reserve the space for primitives for each op, different op requires different number of primitives.
// some ops require a new workspace.
size_t reserve_primitive_space(size_t count, bool new_workspace = false);
size_t reserve_primitive_space_cg(size_t count, bool new_workspace = false);
size_t insert_primitive(mkldnn::primitive* primitive);
size_t insert_workspace(std::unique_ptr<MKLDNNWorkspace>& workspace);
size_t insert_workspace(std::vector<char*>& mkldnn_workspaces,
std::unique_ptr<MKLDNNWorkspace>& workspace);
const std::vector<size_t>& get_primitive_deps(size_t index) const;
const std::vector<size_t>& get_primitive_deps_cg(size_t index) const;
size_t reserve_workspace();
void reserve_descriptor_space(size_t count);
size_t get_mkldnn_descriptors_size();
std::vector<size_t>& get_primitive_deps(size_t index);
// TODO(jmenon): Get rid of TensorViewWrappers at some point
......@@ -156,51 +163,6 @@ namespace ngraph
const mkldnn::memory::desc& desc,
size_t index);
size_t build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const mkldnn::post_ops& pops = mkldnn::post_ops());
/**
* Convolution + bias forward
*/
size_t build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& bias_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const mkldnn::post_ops& pops = mkldnn::post_ops());
size_t build_quantized_convolution_forward(
const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const float scale,
const mkldnn::post_ops& pops = mkldnn::post_ops());
size_t build_quantized_convolution_forward(
const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& bias_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const float scale,
const mkldnn::post_ops& pops = mkldnn::post_ops());
size_t build_quantized_inner_product_forward(
const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
......@@ -216,121 +178,6 @@ namespace ngraph
const float scale,
const mkldnn::post_ops& pops = mkldnn::post_ops());
template <typename OpTy>
size_t build_convolution(const ngraph::Node* node)
{
// For dilation, MKLDNN wants to know how many elements to insert between, not
// how far apart to space the elements like nGraph. So we have to subtract 1
// from each pos.
Strides window_dilation_strides_adjusted;
auto* convolution = static_cast<const OpTy*>(node);
for (size_t s : convolution->get_window_dilation_strides())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
// MKLDNN relies on named formats for kernel selection
if (weights_desc.data.format == mkldnn_nchw)
{
weights_desc.data.format = mkldnn_oihw;
}
if (weights_desc.data.format == mkldnn_ncdhw)
{
weights_desc.data.format = mkldnn_oidhw;
}
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
mkldnn::post_ops ops;
if (std::is_same<OpTy, ngraph::op::ConvolutionBiasAdd>() ||
std::is_same<OpTy, ngraph::op::ConvolutionAdd>())
{
ops.append_sum(1.f);
}
if (std::is_same<OpTy, ngraph::op::QuantizedConvolutionBiasAdd>() ||
std::is_same<OpTy, ngraph::op::QuantizedConvolutionBiasSignedAdd>())
{
auto sum_scale_val =
extract_scale_value<ngraph::op::QuantizedConvolutionBiasAdd>(node, 5);
ops.append_sum(sum_scale_val[0]);
}
if (has_relu<OpTy>(node))
{
const float ops_scale = 1.f;
const float ops_alpha = -0.f; // relu negative slope
const float ops_beta = 0.f;
ops.append_eltwise(
ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
}
if (std::is_same<OpTy, ngraph::op::ConvolutionBias>() ||
std::is_same<OpTy, ngraph::op::ConvolutionBiasAdd>())
{
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
return build_convolution_forward(data_desc,
weights_desc,
bias_desc,
result_desc,
convolution->get_window_movement_strides(),
window_dilation_strides_adjusted,
convolution->get_padding_below(),
convolution->get_padding_above(),
ops);
}
else if (std::is_same<OpTy, ngraph::op::QuantizedConvolution>() ||
std::is_same<OpTy, ngraph::op::QuantizedConvolutionRelu>())
{
auto scale_val = extract_scale_value<OpTy>(node, 2);
return build_quantized_convolution_forward(
data_desc,
weights_desc,
result_desc,
convolution->get_window_movement_strides(),
window_dilation_strides_adjusted,
convolution->get_padding_below(),
convolution->get_padding_above(),
scale_val[0],
ops);
}
else if (std::is_same<OpTy, ngraph::op::QuantizedConvolutionBias>() ||
std::is_same<OpTy, ngraph::op::QuantizedConvolutionBiasAdd>() ||
std::is_same<OpTy, ngraph::op::QuantizedConvolutionBiasSignedAdd>())
{
int index =
std::is_same<OpTy, ngraph::op::QuantizedConvolutionBias>() ? 3 : 4;
auto scale_val = extract_scale_value<OpTy>(node, index);
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
return build_quantized_convolution_forward(
data_desc,
weights_desc,
bias_desc,
result_desc,
convolution->get_window_movement_strides(),
window_dilation_strides_adjusted,
convolution->get_padding_below(),
convolution->get_padding_above(),
scale_val[0],
ops);
}
else
{
return build_convolution_forward(data_desc,
weights_desc,
result_desc,
convolution->get_window_movement_strides(),
window_dilation_strides_adjusted,
convolution->get_padding_below(),
convolution->get_padding_above(),
ops);
}
}
void build_deconvolutionbias_forward(
std::vector<mkldnn::primitive*>& mkldnn_primitives,
const mkldnn::deconvolution_forward::desc& fwd_desc,
......@@ -466,31 +313,6 @@ namespace ngraph
}
}
void build_quantized_convolution_forward(
const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const float scale,
const Node* node,
const mkldnn::post_ops& pops = mkldnn::post_ops());
void build_quantized_convolution_forward(
const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& bias_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const float scale,
const Node* node,
const mkldnn::post_ops& pops = mkldnn::post_ops());
mkldnn::memory::format query_convolution_forward_weight_format(
const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc_any,
......@@ -809,13 +631,6 @@ namespace ngraph
const std::vector<size_t>& deps,
size_t reorder_index);
size_t build_lrn_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
float alpha,
float beta,
float bias,
int nsize);
mkldnn::lrn_forward::desc get_lrn_forward_desc(const ngraph::Node* node);
void build_lrn_forward(std::vector<mkldnn::primitive*>& mkldnn_primitives,
......@@ -868,79 +683,12 @@ namespace ngraph
const std::vector<size_t>& deps,
size_t sigmoid_index);
size_t build_elementwise_add(
const mkldnn::memory::desc& input0_data_desc,
const mkldnn::memory::desc& input1_data_desc,
const mkldnn::memory::desc& result_desc,
const std::vector<float>& scale_vector,
const std::vector<mkldnn::memory::primitive_desc>& input_pd);
mkldnn::sum::primitive_desc get_elementwise_add_desc(const ngraph::Node* node);
void build_elementwise_add(std::vector<mkldnn::primitive*>& mkldnn_primitives,
const mkldnn::sum::primitive_desc& sum_pd,
const std::vector<size_t>& deps,
size_t add_index);
template <typename OpTy>
size_t build_batch_norm_primitive(const Node* node,
const bool append_relu,
const bool training)
{
const auto& args = node->get_inputs();
mkldnn::post_ops ops;
if (append_relu)
{
const float ops_scale = 1.f;
const float ops_alpha = -0.f; // relu negative slope
const float ops_beta = 0.f;
ops.append_eltwise(
ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
}
auto weights_shape =
Shape{2, args[0].get_tensor().get_tensor_layout()->get_size()};
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto weights_desc = build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto results_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
bool use_global_stats;
const mkldnn::memory::desc *mean_desc, *variance_desc;
if (training && args.size() == 3)
{
mean_desc = &mkldnn_utils::get_output_mkldnn_md(node, 1);
variance_desc = &mkldnn_utils::get_output_mkldnn_md(node, 2);
use_global_stats = false;
}
else
{
mean_desc = &mkldnn_utils::get_input_mkldnn_md(node, 3);
variance_desc = &mkldnn_utils::get_input_mkldnn_md(node, 4);
use_global_stats = true;
}
const OpTy* batchnorm = static_cast<const OpTy*>(node);
return build_batchnorm_forward(input_desc,
weights_desc,
results_desc,
*mean_desc,
*variance_desc,
batchnorm->get_eps_value(),
use_global_stats,
training,
ops);
}
size_t build_batchnorm_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const mkldnn::memory::desc& mean_desc,
const mkldnn::memory::desc& variance_desc,
const double eps,
bool use_global_stats,
bool bn_training_flag,
const mkldnn::post_ops& pops = mkldnn::post_ops());
template <typename OP>
mkldnn::batch_normalization_forward::desc
......@@ -979,15 +727,6 @@ namespace ngraph
size_t batchnorm_index,
const mkldnn::post_ops& pops = mkldnn::post_ops());
size_t build_batchnorm_backward(const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& mean_desc,
const mkldnn::memory::desc& variance_desc,
const mkldnn::memory::desc& delta_desc,
const mkldnn::memory::desc& dinput_desc,
const mkldnn::memory::desc& dweights_desc,
const double eps);
mkldnn::batch_normalization_backward::desc
get_batchnorm_backward_desc(const ngraph::Node* node);
......@@ -999,118 +738,6 @@ namespace ngraph
const std::vector<size_t>& deps,
size_t batchnorm_index);
template <typename OP>
size_t build_rnn(const ngraph::Node* node)
{
const auto& out = node->get_outputs();
const auto& args = node->get_inputs();
auto rnn_node = static_cast<const OP*>(node);
auto src_sequence_length_max =
static_cast<unsigned long>(rnn_node->get_src_sequence_length());
auto direction = static_cast<unsigned long>(rnn_node->get_direction());
auto num_fused_layers =
static_cast<unsigned long>(rnn_node->get_num_fused_layers());
auto feature_size =
static_cast<unsigned long>(rnn_node->get_src_iter_feature_size());
auto batch = static_cast<unsigned long>(rnn_node->get_batch_size());
auto rnn_cell_n_gates =
static_cast<unsigned long>(rnn_node->get_gates_per_cell());
auto rnn_cell_n_states =
static_cast<unsigned long>(rnn_node->get_num_cell_states());
auto get_mkldnn_rnn_cell_type = [&]() {
switch (rnn_node->get_rnn_type())
{
case rnn_utils::rnntype::vanilla_rnn: return mkldnn::algorithm::vanilla_rnn;
case rnn_utils::rnntype::vanilla_gru: return mkldnn::algorithm::vanilla_gru;
case rnn_utils::rnntype::vanilla_lstm:
return mkldnn::algorithm::vanilla_lstm;
default: throw ngraph_error("unsupported mkldnn rnn algorithm");
}
};
auto get_mkldnn_rnn_direction = [&]() {
switch (direction)
{
case 1: return mkldnn::rnn_direction::unidirectional_left2right;
case 2: return mkldnn::rnn_direction::bidirectional_concat;
default: throw ngraph_error("unsupported mkldnn rnn direction");
}
};
if (out[0].get_shape().size() == 2 &&
(out[0].get_shape()[1] != direction * feature_size))
{
throw ngraph_error(
"input slc{ht} feature size is not equal to output dlc{ht} feature "
"size ");
}
if (out[1].get_shape().size() == 2 && (out[1].get_shape()[1] != feature_size) &&
rnn_node->get_num_timesteps() != 1)
{
throw ngraph_error(
"input sic{ht_1|ct_1} feature size is not equal to output "
"dlc{ht_1|ct_1} "
"feature size ");
}
Shape src_layer_tz{
src_sequence_length_max,
batch,
static_cast<unsigned long>(rnn_node->get_src_layer_feature_size())};
Shape src_iter_tz{
num_fused_layers, direction, rnn_cell_n_states, batch, feature_size};
Shape wei_layer_tz{
num_fused_layers,
direction,
static_cast<unsigned long>(rnn_node->get_src_layer_feature_size()),
rnn_cell_n_gates,
feature_size};
Shape wei_iter_tz{
num_fused_layers, direction, feature_size, rnn_cell_n_gates, feature_size};
Shape bias_tz{num_fused_layers, direction, rnn_cell_n_gates, feature_size};
Shape dst_layer_tz{src_sequence_length_max, batch, direction * feature_size};
Shape dst_iter_tz{
num_fused_layers, direction, rnn_cell_n_states, batch, feature_size};
// We create the memory descriptors used by the user
auto src_layer_md = build_memory_descriptor(
src_layer_tz, args[0].get_element_type(), mkldnn::memory::format::tnc);
auto src_iter_md = build_memory_descriptor(
src_iter_tz, args[1].get_element_type(), mkldnn::memory::format::ldsnc);
auto wei_layer_md = build_memory_descriptor(
wei_layer_tz, args[2].get_element_type(), mkldnn::memory::format::ldigo);
auto wei_iter_md = build_memory_descriptor(
wei_iter_tz, args[3].get_element_type(), mkldnn::memory::format::ldigo);
auto bias_md = build_memory_descriptor(
bias_tz, args[4].get_element_type(), mkldnn::memory::format::ldgo);
auto dst_layer_md = build_memory_descriptor(
dst_layer_tz, out[0].get_element_type(), mkldnn::memory::format::tnc);
auto dst_iter_md = build_memory_descriptor(
dst_iter_tz, out[1].get_element_type(), mkldnn::memory::format::ldsnc);
return build_rnn_forward(src_layer_md,
src_iter_md,
wei_layer_md,
wei_iter_md,
bias_md,
dst_layer_md,
dst_iter_md,
get_mkldnn_rnn_direction(),
get_mkldnn_rnn_cell_type());
}
size_t build_rnn_forward(const mkldnn::memory::desc& src_layer_desc,
const mkldnn::memory::desc& src_iter_desc,
const mkldnn::memory::desc& weights_layer_desc,
const mkldnn::memory::desc& weights_iter_desc,
const mkldnn::memory::desc& bias_desc,
const mkldnn::memory::desc& dst_layer_desc,
const mkldnn::memory::desc& dst_iter_desc,
const mkldnn::rnn_direction& rnn_direction,
const mkldnn::algorithm& rnn_algorithm);
void build_rnn_forward(std::vector<mkldnn::primitive*>& mkldnn_primitives,
std::vector<char*>& mkldnn_workspaces,
const mkldnn::rnn_forward::desc& desc,
......@@ -1886,10 +1513,14 @@ namespace ngraph
private:
std::vector<mkldnn::primitive*> m_mkldnn_primitives;
std::vector<mkldnn::primitive*> m_mkldnn_primitives_cg;
std::vector<mkldnn::stream> m_mkldnn_streams;
std::unordered_map<size_t, std::vector<size_t>> m_primitive_deps;
std::unordered_map<size_t, std::vector<size_t>> m_primitive_deps_cg;
std::vector<std::unique_ptr<MKLDNNWorkspace>> m_workspaces;
std::vector<char*> m_workspace_bufs;
size_t m_workspaces_size = 0;
size_t m_mkldnn_descriptors_size = 0;
};
}
}
......
......@@ -14,8 +14,11 @@
// limitations under the License.
//*****************************************************************************
#include <string>
#include "cpu_mkldnn_primitive_build.hpp"
#include "ngraph/code_writer.hpp"
#include "ngraph/op/add.hpp"
#include "ngraph/op/avg_pool.hpp"
#include "ngraph/op/batch_norm.hpp"
......@@ -40,6 +43,7 @@
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/relu.hpp"
#include "ngraph/op/replace_slice.hpp"
#include "ngraph/op/reshape.hpp"
#include "ngraph/op/slice.hpp"
#include "ngraph/op/softmax.hpp"
#include "ngraph/runtime/cpu/cpu_executor.hpp"
......@@ -65,70 +69,499 @@ namespace ngraph
{
namespace pass
{
// serialize memory descriptors
static void serialize_memory_descs(std::ofstream& desc_file,
std::vector<mkldnn::memory::desc>& descs,
size_t primitive_index)
{
for (auto i = 0; i < descs.size(); i++)
{
desc_file << primitive_index;
desc_file.write(reinterpret_cast<char*>(&descs[i]),
sizeof(mkldnn::memory::desc));
primitive_index++;
}
}
// The following functions build the MKLDNN primitive for each type of nGraph Node.
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Add)
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(Add)
{
std::vector<float> scale_vector(2, 1);
std::vector<mkldnn::memory::primitive_desc> inputs_pd;
auto input0_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto input1_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
inputs_pd.push_back(mkldnn::memory::primitive_desc(
input0_data_desc, executor::global_cpu_engine));
inputs_pd.push_back(mkldnn::memory::primitive_desc(
input1_data_desc, executor::global_cpu_engine));
return mkldnn_emitter.build_elementwise_add(
input0_data_desc, input1_data_desc, result_desc, scale_vector, inputs_pd);
// Add needs 4 primitives: input0, input1, result, and sum.
index = mkldnn_emitter.reserve_primitive_space_cg(4);
deps = mkldnn_emitter.get_primitive_deps_cg(index);
CodeWriter writer;
// Write memory descriptors to file
std::vector<mkldnn::memory::desc> descs = {
input0_data_desc, input1_data_desc, result_desc};
auto desc_index = mkldnn_emitter.get_mkldnn_descriptors_size();
mkldnn_emitter.reserve_descriptor_space(descs.size());
serialize_memory_descs(desc_file, descs, deps[0]);
writer << "std::vector<float> scale_vector(2, 1);\n";
writer << "std::vector<mkldnn::memory::primitive_desc> inputs_pd;\n";
writer << "inputs_pd.push_back(mkldnn::memory::primitive_desc("
"*cg_ctx->mkldnn_descriptors["
<< desc_index << "], "
"cg_ctx->global_cpu_engine));\n";
writer << "inputs_pd.push_back(mkldnn::memory::primitive_desc("
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 1 << "], "
"cg_ctx->global_cpu_engine));\n";
// elementwise sum primitive descriptor
writer << "mkldnn::sum::primitive_desc sum_pd = "
"mkldnn::sum::primitive_desc(*cg_ctx->mkldnn_descriptors["
<< desc_index + 2 << "], "
"scale_vector, inputs_pd);\n";
writer << "\n// build sum primitive\n";
writer << "std::vector<mkldnn::memory::primitive::at> inputs_primitive;\n";
//emit_memory_primitive_build(writer, desc_names, deps);
writer << "inputs_primitive.push_back(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[0]) << "]);\n";
writer << "inputs_primitive.push_back(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[1]) << "]);\n";
// sum primitive
writer << "cg_ctx->mkldnn_primitives[" << std::to_string(index)
<< "] = new mkldnn::sum(sum_pd, inputs_primitive, "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[2]) << "]);\n";
construct_string = writer.get_code();
}
template <typename OP>
void construct_primitive_build_string_rnn(
ngraph::runtime::cpu::MKLDNNEmitter& mkldnn_emitter,
ngraph::Node* node,
std::string& construct_string,
std::vector<size_t>& deps,
size_t& index,
std::ofstream& desc_file)
{
const auto& out = node->get_outputs();
const auto& args = node->get_inputs();
auto rnn_node = static_cast<const OP*>(node);
auto src_sequence_length_max =
static_cast<unsigned long>(rnn_node->get_src_sequence_length());
auto direction = static_cast<unsigned long>(rnn_node->get_direction());
auto num_fused_layers =
static_cast<unsigned long>(rnn_node->get_num_fused_layers());
auto feature_size =
static_cast<unsigned long>(rnn_node->get_src_iter_feature_size());
auto batch = static_cast<unsigned long>(rnn_node->get_batch_size());
auto rnn_cell_n_gates =
static_cast<unsigned long>(rnn_node->get_gates_per_cell());
auto rnn_cell_n_states =
static_cast<unsigned long>(rnn_node->get_num_cell_states());
auto get_mkldnn_rnn_cell_type = [&]() {
switch (rnn_node->get_rnn_type())
{
case rnn_utils::rnntype::vanilla_rnn:
return std::string("mkldnn::algorithm::vanilla_rnn");
case rnn_utils::rnntype::vanilla_gru:
return std::string("mkldnn::algorithm::vanilla_gru");
case rnn_utils::rnntype::vanilla_lstm:
return std::string("mkldnn::algorithm::vanilla_lstm");
default: throw ngraph_error("unsupported mkldnn rnn algorithm");
}
};
auto get_mkldnn_rnn_direction = [&]() {
switch (direction)
{
case 1:
return std::string("mkldnn::rnn_direction::unidirectional_left2right");
case 2: return std::string("mkldnn::rnn_direction::bidirectional_concat");
default: throw ngraph_error("unsupported mkldnn rnn direction");
}
};
if (out[0].get_shape().size() == 2 &&
(out[0].get_shape()[1] != direction * feature_size))
{
throw ngraph_error(
"input slc{ht} feature size is not equal to output dlc{ht} feature "
"size ");
}
if (out[1].get_shape().size() == 2 && (out[1].get_shape()[1] != feature_size) &&
rnn_node->get_num_timesteps() != 1)
{
throw ngraph_error(
"input sic{ht_1|ct_1} feature size is not equal to output "
"dlc{ht_1|ct_1} "
"feature size ");
}
Shape src_layer_tz{
src_sequence_length_max,
batch,
static_cast<unsigned long>(rnn_node->get_src_layer_feature_size())};
Shape src_iter_tz{
num_fused_layers, direction, rnn_cell_n_states, batch, feature_size};
Shape wei_layer_tz{
num_fused_layers,
direction,
static_cast<unsigned long>(rnn_node->get_src_layer_feature_size()),
rnn_cell_n_gates,
feature_size};
Shape wei_iter_tz{
num_fused_layers, direction, feature_size, rnn_cell_n_gates, feature_size};
Shape bias_tz{num_fused_layers, direction, rnn_cell_n_gates, feature_size};
Shape dst_layer_tz{src_sequence_length_max, batch, direction * feature_size};
Shape dst_iter_tz{
num_fused_layers, direction, rnn_cell_n_states, batch, feature_size};
// We create the memory descriptors used by the user
auto src_layer_md = mkldnn_emitter.build_memory_descriptor(
src_layer_tz, args[0].get_element_type(), mkldnn::memory::format::tnc);
auto src_iter_md = mkldnn_emitter.build_memory_descriptor(
src_iter_tz, args[1].get_element_type(), mkldnn::memory::format::ldsnc);
auto wei_layer_md = mkldnn_emitter.build_memory_descriptor(
wei_layer_tz, args[2].get_element_type(), mkldnn::memory::format::ldigo);
auto wei_iter_md = mkldnn_emitter.build_memory_descriptor(
wei_iter_tz, args[3].get_element_type(), mkldnn::memory::format::ldigo);
auto bias_md = mkldnn_emitter.build_memory_descriptor(
bias_tz, args[4].get_element_type(), mkldnn::memory::format::ldgo);
auto dst_layer_md = mkldnn_emitter.build_memory_descriptor(
dst_layer_tz, out[0].get_element_type(), mkldnn::memory::format::tnc);
auto dst_iter_md = mkldnn_emitter.build_memory_descriptor(
dst_iter_tz, out[1].get_element_type(), mkldnn::memory::format::ldsnc);
// Lstm/Rnn needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias,
// dst_layer, dst_iter, workspace, and rnn_forward.
// It needs a new workspace.
index = mkldnn_emitter.reserve_primitive_space_cg(9, true /* new workspace */);
deps = mkldnn_emitter.get_primitive_deps_cg(index);
CodeWriter writer;
// Write memory descriptors to file
std::vector<mkldnn::memory::desc> descs = {src_layer_md,
src_iter_md,
wei_layer_md,
wei_iter_md,
bias_md,
dst_layer_md,
dst_iter_md};
auto desc_index = mkldnn_emitter.get_mkldnn_descriptors_size();
mkldnn_emitter.reserve_descriptor_space(descs.size());
serialize_memory_descs(desc_file, descs, deps[0]);
writer << "mkldnn::rnn_cell::desc rnn_cell_desc(" << get_mkldnn_rnn_cell_type()
<< ");\n";
writer << "\n// build lstm/rnn primitive descriptor\n";
writer << "auto rnn_desc = "
"mkldnn::rnn_forward::desc(mkldnn::prop_kind::forward_training, "
"rnn_cell_desc, "
<< get_mkldnn_rnn_direction() << ", "
"*cg_ctx->mkldnn_descriptors["
<< desc_index << "], "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 1 << "], "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 2 << "], "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 3 << "], "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 4 << "], "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 5 << "], "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 6 << "]);\n";
writer << "auto rnn_prim_desc = mkldnn::rnn_forward::primitive_desc(rnn_desc, "
"cg_ctx->global_cpu_engine);\n";
writer << "cg_ctx->mkldnn_primitives[" << std::to_string(deps[7])
<< "] = new "
"mkldnn::memory({rnn_prim_desc.workspace_primitive_desc().desc(), "
"cg_ctx->global_cpu_engine}, nullptr);\n";
writer << "auto workspace = "
"(char*)malloc(rnn_prim_desc.workspace_primitive_desc().get_size());"
"\n";
writer << "if (!workspace)\n";
writer.block_begin();
writer << "throw std::bad_alloc();\n";
writer.block_end();
writer << "cg_ctx->mkldnn_workspaces.push_back(workspace);\n";
deps[8] = mkldnn_emitter.reserve_workspace();
writer << "\n// build lstm/rnn primitive\n";
// lstm/rnn primitive
writer << "cg_ctx->mkldnn_primitives[" << std::to_string(index)
<< "] = new mkldnn::rnn_forward(rnn_prim_desc, "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[0])
<< "]), "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[1])
<< "]), "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[2])
<< "]), "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[3])
<< "]), "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[4])
<< "]), "
"static_cast<mkldnn::memory>(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[5])
<< "]), "
"static_cast<mkldnn::memory>(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[6])
<< "]), "
"static_cast<mkldnn::memory>(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[7]) << "]));\n";
construct_string = writer.get_code();
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Lstm)
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(Lstm)
{
return mkldnn_emitter.build_rnn<Lstm>(node);
construct_primitive_build_string_rnn<Lstm>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Rnn)
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(Rnn)
{
return mkldnn_emitter.build_rnn<Rnn>(node);
construct_primitive_build_string_rnn<Rnn>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
}
template <typename OP>
void construct_primitive_build_string_batchnorm(
ngraph::runtime::cpu::MKLDNNEmitter& mkldnn_emitter,
ngraph::Node* node,
std::string& construct_string,
std::vector<size_t>& deps,
size_t& index,
std::ofstream& desc_file,
const bool append_relu,
const bool training)
{
const auto& args = node->get_inputs();
// batchnorm forward needs 6 primitives: input, weights, result, mean,
// variance, and batch_normalization_forward.
index = mkldnn_emitter.reserve_primitive_space_cg(6);
deps = mkldnn_emitter.get_primitive_deps_cg(index);
CodeWriter writer;
if (append_relu)
{
writer << "mkldnn::post_ops pops;\n";
writer << "const float ops_scale = 1.f;\n";
writer << "const float ops_alpha = -0.f; // relu negative slope\n";
writer << "const float ops_beta = 0.f;\n";
writer << "pops.append_eltwise("
"ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, "
"ops_beta);\n";
}
else
{
writer << "mkldnn::post_ops pops = mkldnn::post_ops();\n";
}
auto weights_shape =
Shape{2, args[0].get_tensor().get_tensor_layout()->get_size()};
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto weights_desc = mkldnn_emitter.build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
bool use_global_stats;
const mkldnn::memory::desc *mean_desc, *variance_desc;
if (training && args.size() == 3)
{
mean_desc = &mkldnn_utils::get_output_mkldnn_md(node, 1);
variance_desc = &mkldnn_utils::get_output_mkldnn_md(node, 2);
use_global_stats = false;
}
else
{
mean_desc = &mkldnn_utils::get_input_mkldnn_md(node, 3);
variance_desc = &mkldnn_utils::get_input_mkldnn_md(node, 4);
use_global_stats = true;
}
auto batchnorm = static_cast<const OP*>(node);
auto eps = batchnorm->get_eps_value();
writer << "mkldnn::primitive_attr bn_attr;\n";
writer << "bn_attr.set_post_ops(pops);\n";
writer << "\n// build batchnorm primitive descriptor\n";
if (use_global_stats)
{
// Write memory descriptors to file;
std::vector<mkldnn::memory::desc> descs = {
input_desc, *mean_desc, *variance_desc, weights_desc, result_desc};
auto desc_index = mkldnn_emitter.get_mkldnn_descriptors_size();
mkldnn_emitter.reserve_descriptor_space(descs.size());
serialize_memory_descs(desc_file, descs, deps[0]);
writer << "auto batchnorm_desc = "
"mkldnn::batch_normalization_forward::desc(mkldnn::prop_kind::"
"forward_training, "
"*cg_ctx->mkldnn_descriptors["
<< desc_index << "], " << eps
<< ", "
"mkldnn::batch_normalization_flag::use_scale_shift | "
"mkldnn::batch_normalization_flag::use_global_stats);\n";
writer << "auto batchnorm_prim_desc = "
"mkldnn::batch_normalization_forward::primitive_desc(batchnorm_"
"desc, "
"bn_attr, cg_ctx->global_cpu_engine);\n";
writer << "\n// build batchnorm primitive\n";
// batchnorm primitive
writer
<< "cg_ctx->mkldnn_primitives[" << std::to_string(index)
<< "] = new mkldnn::batch_normalization_forward(batchnorm_prim_desc, "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[0])
<< "]), "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[1])
<< "]), "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[2])
<< "]), "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[3])
<< "]), "
"static_cast<mkldnn::memory>(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[4]) << "]));\n";
}
else
{
// Write memory descriptors to file;
std::vector<mkldnn::memory::desc> descs = {
input_desc, weights_desc, result_desc, *mean_desc, *variance_desc};
auto desc_index = mkldnn_emitter.get_mkldnn_descriptors_size();
mkldnn_emitter.reserve_descriptor_space(descs.size());
serialize_memory_descs(desc_file, descs, deps[0]);
writer << "auto batchnorm_desc = "
"mkldnn::batch_normalization_forward::desc(mkldnn::prop_kind::"
"forward_training, "
"*cg_ctx->mkldnn_descriptors["
<< desc_index << "], " << eps
<< ", "
"mkldnn::batch_normalization_flag::use_scale_shift);\n";
writer << "auto batchnorm_prim_desc = "
"mkldnn::batch_normalization_forward::primitive_desc(batchnorm_"
"desc, "
"bn_attr, cg_ctx->global_cpu_engine);\n";
writer << "\n// build batchnorm primitive\n";
// batchnorm primitive
writer
<< "cg_ctx->mkldnn_primitives[" << std::to_string(index)
<< "] = new mkldnn::batch_normalization_forward(batchnorm_prim_desc, "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[0])
<< "]), "
"mkldnn::primitive::at(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[1])
<< "]), "
"static_cast<mkldnn::memory>(*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[2]) << "]), "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[3]) << "], "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[4]) << "]);\n";
}
construct_string = writer.get_code();
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(BatchNormTraining)
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
BatchNormTraining)
{
return mkldnn_emitter.build_batch_norm_primitive<BatchNormInference>(
node, false /*Append relu*/, true /*Training*/);
construct_primitive_build_string_batchnorm<BatchNormTraining>(
mkldnn_emitter,
node,
construct_string,
deps,
index,
desc_file,
false /*Append relu*/,
true /*Training*/);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(BatchNormInference)
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
BatchNormInference)
{
return mkldnn_emitter.build_batch_norm_primitive<BatchNormInference>(
node, false /*Append relu*/, false /*Training*/);
construct_primitive_build_string_batchnorm<BatchNormInference>(
mkldnn_emitter,
node,
construct_string,
deps,
index,
desc_file,
false /*Append relu*/,
false /*Training*/);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(BatchNormTrainingRelu)
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
BatchNormTrainingRelu)
{
return mkldnn_emitter.build_batch_norm_primitive<BatchNormTrainingRelu>(
node, true /*Append relu*/, true /*Training*/);
construct_primitive_build_string_batchnorm<BatchNormTrainingRelu>(
mkldnn_emitter,
node,
construct_string,
deps,
index,
desc_file,
true /*Append relu*/,
true /*Training*/);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(BatchNormInferenceRelu)
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
BatchNormInferenceRelu)
{
return mkldnn_emitter.build_batch_norm_primitive<BatchNormInferenceRelu>(
node, true /*Append relu*/, false /*Training*/);
construct_primitive_build_string_batchnorm<BatchNormInferenceRelu>(
mkldnn_emitter,
node,
construct_string,
deps,
index,
desc_file,
true /*Append relu*/,
false /*Training*/);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(BatchNormTrainingBackprop)
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
BatchNormTrainingBackprop)
{
const auto& args = node->get_inputs();
const auto* batchnorm = static_cast<const BatchNormTrainingBackprop*>(node);
auto eps = batchnorm->get_eps_value();
auto weights_shape =
Shape{2, args[0].get_tensor().get_tensor_layout()->get_size()};
auto weights_desc = mkldnn_emitter.build_memory_descriptor(
......@@ -141,47 +574,162 @@ namespace ngraph
auto dweights_desc = mkldnn_emitter.build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
const auto* batchnorm = static_cast<const BatchNormTrainingBackprop*>(node);
return mkldnn_emitter.build_batchnorm_backward(weights_desc,
// batchnorm backward needs 8 primitives: weights, input, mean, variance,
// dinput, dweights, and batch_normalization_backward.
index = mkldnn_emitter.reserve_primitive_space_cg(8);
deps = mkldnn_emitter.get_primitive_deps_cg(index);
CodeWriter writer;
// Write memory descriptors to file;
std::vector<mkldnn::memory::desc> descs = {weights_desc,
input_desc,
mean_desc,
variance_desc,
delta_desc,
dinput_desc,
dweights_desc,
batchnorm->get_eps_value());
dweights_desc};
auto desc_index = mkldnn_emitter.get_mkldnn_descriptors_size();
mkldnn_emitter.reserve_descriptor_space(descs.size());
serialize_memory_descs(desc_file, descs, deps[0]);
writer << "\n// build batchnorm primitives\n";
writer << "cg_ctx->mkldnn_primitives[" << std::to_string(index)
<< "] = new mkldnn::batch_normalization_backward("
<< "{{mkldnn::prop_kind::backward, "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 4 << "], "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 1 << "], " << eps
<< ", "
"mkldnn::batch_normalization_flag::use_scale_shift}, "
"cg_ctx->global_cpu_engine, "
"{{mkldnn::prop_kind::forward_training, "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 1 << "], " << eps
<< ", "
"mkldnn::batch_normalization_flag::use_scale_shift}, "
"cg_ctx->global_cpu_engine}}, "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[1]) << "], "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[2]) << "], "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[3]) << "], "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[4]) << "], "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[0]) << "], "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[5]) << "], "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[6]) << "]);\n";
construct_string = writer.get_code();
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Concat)
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(Concat)
{
std::vector<mkldnn::memory::desc> inputs_data_desc;
for (size_t i = 0, end = node->get_inputs().size(); i < end; i++)
auto concat = static_cast<const ngraph::op::Concat*>(node);
size_t concat_dim = concat->get_concatenation_axis();
size_t nargs = node->get_inputs().size();
// Concat needs number of inputs plus 2 primitives; those two are for result and concat.
index = mkldnn_emitter.reserve_primitive_space_cg(nargs + 2);
deps = mkldnn_emitter.get_primitive_deps_cg(index);
CodeWriter writer;
// Write memory descriptors to file;
std::vector<mkldnn::memory::desc> descs;
for (size_t i = 0; i < nargs; i++)
{
inputs_data_desc.push_back(mkldnn_utils::get_input_mkldnn_md(node, i));
descs.push_back(mkldnn_utils::get_input_mkldnn_md(node, i));
}
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t concat_dim =
(static_cast<const Concat*>(node))->get_concatenation_axis();
return mkldnn_emitter.build_concat(inputs_data_desc, result_desc, concat_dim);
descs.push_back(result_desc);
auto desc_index = mkldnn_emitter.get_mkldnn_descriptors_size();
mkldnn_emitter.reserve_descriptor_space(descs.size());
serialize_memory_descs(desc_file, descs, deps[0]);
writer << "std::vector<mkldnn::memory::primitive::at> inputs_primitive;\n";
writer << "std::vector<mkldnn::memory::primitive_desc> inputs_pd;\n";
writer << "for (size_t i = " << desc_index << "; i < " << desc_index + nargs
<< "; i++)\n";
writer.block_begin();
writer << "inputs_pd.push_back(mkldnn::memory::primitive_desc("
"*cg_ctx->mkldnn_descriptors[i], "
"cg_ctx->global_cpu_engine));\n";
writer.block_end();
writer << "for (size_t i = " << deps[0] << "; i < " << deps[0] + nargs
<< "; i++)\n";
writer.block_begin();
writer << "inputs_primitive.push_back(*cg_ctx->mkldnn_primitives[i]);\n";
writer.block_end();
writer << "auto concat_prim_desc = "
"mkldnn::concat::primitive_desc( "
"*cg_ctx->mkldnn_descriptors["
<< desc_index + nargs << "], "
<< std::to_string(static_cast<int>(concat_dim)) << ", inputs_pd);\n";
writer << "\n// build concat primitive\n";
writer << "cg_ctx->mkldnn_primitives[" << std::to_string(index)
<< "] = new mkldnn::concat(concat_prim_desc, inputs_primitive, "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[nargs]) << "]);\n";
construct_string = writer.get_code();
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(LRN)
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(LRN)
{
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
// LRN needs 3 primitives: input, result, and lrn_forward.
index = mkldnn_emitter.reserve_primitive_space_cg(3);
deps = mkldnn_emitter.get_primitive_deps_cg(index);
CodeWriter writer;
// Write memory descriptors to file;
std::vector<mkldnn::memory::desc> descs = {input_desc, result_desc};
auto desc_index = mkldnn_emitter.get_mkldnn_descriptors_size();
mkldnn_emitter.reserve_descriptor_space(descs.size());
serialize_memory_descs(desc_file, descs, deps[0]);
const auto* lrn = static_cast<const LRN*>(node);
auto alpha = static_cast<float>(lrn->get_alpha());
auto beta = static_cast<float>(lrn->get_beta());
auto bias = static_cast<float>(lrn->get_bias());
auto nsize = static_cast<int>(lrn->get_nsize());
return mkldnn_emitter.build_lrn_forward(input_data_desc,
result_desc,
static_cast<float>(lrn->get_alpha()),
static_cast<float>(lrn->get_beta()),
static_cast<float>(lrn->get_bias()),
static_cast<int>(lrn->get_nsize()));
writer << "auto lrn_desc = "
"mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring, "
"mkldnn::algorithm::lrn_across_channels, "
"*cg_ctx->mkldnn_descriptors["
<< desc_index << "], " << nsize << ", " << alpha << ", " << beta << ", "
<< bias << ");\n";
writer << "auto lrn_prim_desc = "
"mkldnn::lrn_forward::primitive_desc(lrn_desc, "
"cg_ctx->global_cpu_engine);\n";
writer << "\n// build lrn primitive\n";
writer << "cg_ctx->mkldnn_primitives[" << std::to_string(index)
<< "] = new mkldnn::lrn_forward(lrn_prim_desc, "
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[0]) << "]"
", *cg_ctx->mkldnn_primitives["
<< std::to_string(deps[1]) << "]);\n";
construct_string = writer.get_code();
}
template <>
......@@ -198,96 +746,313 @@ namespace ngraph
input_desc, result_desc, lower_bounds, out_shape);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(ConvolutionRelu)
template <typename OP>
void construct_primitive_build_string_conv(
ngraph::runtime::cpu::MKLDNNEmitter& mkldnn_emitter,
ngraph::Node* node,
std::string& construct_string,
std::vector<size_t>& deps,
size_t& index,
std::ofstream& desc_file)
{
return mkldnn_emitter.build_convolution<ConvolutionRelu>(node);
auto convolution = static_cast<const OP*>(node);
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(QuantizedConvolutionRelu)
auto data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
// MKLDNN relies on named formats for kernel selection
if (weights_desc.data.format == mkldnn_nchw)
{
weights_desc.data.format = mkldnn_oihw;
}
if (weights_desc.data.format == mkldnn_ncdhw)
{
weights_desc.data.format = mkldnn_oidhw;
}
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto strides = convolution->get_window_movement_strides();
auto pad_below = convolution->get_padding_below();
auto pad_above = convolution->get_padding_above();
if (mkldnn_emitter.has_bias<OP>())
{
index = mkldnn_emitter.reserve_primitive_space_cg(5);
}
else
{
index = mkldnn_emitter.reserve_primitive_space_cg(4);
}
deps = mkldnn_emitter.get_primitive_deps_cg(index);
CodeWriter writer;
writer << "// Write in memory descriptors\n";
std::vector<mkldnn::memory::desc> descs = {
data_desc, weights_desc, result_desc};
if (mkldnn_emitter.has_bias<OP>())
{
return mkldnn_emitter.build_convolution<QuantizedConvolutionRelu>(node);
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
descs.insert(descs.begin() + 2, bias_desc);
}
auto desc_index = mkldnn_emitter.get_mkldnn_descriptors_size();
mkldnn_emitter.reserve_descriptor_space(descs.size());
serialize_memory_descs(desc_file, descs, deps[0]);
writer << "\n// build QConv primitive descriptor\n";
writer << "auto conv_desc = "
"mkldnn::convolution_forward::desc(mkldnn::prop_kind::forward,\n"
"mkldnn::algorithm::convolution_direct,\n"
"*cg_ctx->mkldnn_descriptors["
<< desc_index << "],\n"
"*cg_ctx->mkldnn_descriptors["
<< desc_index + 1 << "],\n";
if (mkldnn_emitter.has_bias<OP>())
{
writer << "*cg_ctx->mkldnn_descriptors[" << desc_index + 2 << "],\n";
}
writer << "*cg_ctx->mkldnn_descriptors[" << desc_index + (descs.size() - 1)
<< "],\n"
"mkldnn::memory::dims{"
<< std::to_string(strides[0]) << ", " << std::to_string(strides[1]);
if (strides.size() == 3)
{
writer << ", " << std::to_string(strides[2]);
}
writer << "},\n"
"mkldnn::memory::dims{"
<< std::to_string(window_dilation_strides_adjusted[0]) << ", "
<< std::to_string(window_dilation_strides_adjusted[1]);
if (window_dilation_strides_adjusted.size() == 3)
{
writer << ", " << std::to_string(window_dilation_strides_adjusted[2]);
}
writer << "},\n"
"mkldnn::memory::dims{"
<< std::to_string(pad_below[0]) << ", " << std::to_string(pad_below[1]);
if (pad_below.size() == 3)
{
writer << ", " << std::to_string(pad_below[2]);
}
writer << "},\n"
"mkldnn::memory::dims{"
<< std::to_string(pad_above[0]) << ", " << std::to_string(pad_above[1]);
if (pad_above.size() == 3)
{
writer << ", " << std::to_string(pad_above[2]);
}
writer << "},\n"
"mkldnn::padding_kind::zero);\n";
writer << "mkldnn::post_ops ops;\n";
if (std::is_same<OP, ngraph::op::ConvolutionBiasAdd>() ||
std::is_same<OP, ngraph::op::ConvolutionAdd>())
{
writer << "ops.append_sum(1.f);\n";
}
if (std::is_same<OP, ngraph::op::QuantizedConvolutionBiasAdd>() ||
std::is_same<OP, ngraph::op::QuantizedConvolutionBiasSignedAdd>())
{
auto sum_scales_size = shape_size(convolution->get_input_shape(5));
const element::Type& et = node->get_input_element_type(5);
std::string type = et.c_type_string();
std::stringstream ss;
writer << "std::vector<float> dyn_post_op_scales;\n";
auto c = std::dynamic_pointer_cast<ngraph::op::Constant>(
node->get_arguments()[5]);
if (c)
{
auto sum_scale_val =
extract_scale_value<ngraph::op::QuantizedConvolutionBiasAdd>(node,
5);
writer << "dyn_post_op_scales.push_back("
<< std::to_string(sum_scale_val[0]) << ");\n";
}
else
{
ss << "((" << type << "*)(pool_base_ptr + "
<< node->get_inputs()[5].get_tensor().get_pool_offset() << "))";
writer << "dyn_post_op_scales.assign(" << ss.str() << ", " << ss.str()
<< " + " << std::to_string(sum_scales_size) << ");\n";
}
writer << "ops.append_sum(dyn_post_op_scales[0]);\n";
}
if (has_relu<OP>(node))
{
writer << "const float ops_scale = 1.f;\n";
writer << "const float ops_alpha = -0.f; // relu negative slope\n";
writer << "const float ops_beta = 0.f;\n";
writer << "ops.append_eltwise("
"ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, "
"ops_beta);\n";
}
writer << "mkldnn::primitive_attr conv_attr;\n";
writer << "conv_attr.set_post_ops(ops);\n";
if (mkldnn_emitter.is_quantized_conv<OP>())
{
auto scale_index = mkldnn_emitter.get_scale_index<OP>();
auto c = std::dynamic_pointer_cast<ngraph::op::Constant>(
node->get_arguments()[scale_index]);
auto scales_size = shape_size(convolution->get_input_shape(scale_index));
const element::Type& et = node->get_input_element_type(scale_index);
std::string type = et.c_type_string();
std::stringstream ss;
if (c)
{
ss << "((" << type << "*)(" << c->get_data_ptr() << "))\n";
}
else
{
ss << "((" << type << "*)(pool_base_ptr + "
<< node->get_inputs()[scale_index].get_tensor().get_pool_offset()
<< "))";
}
writer << "std::vector<float> dyn_scales;\n";
writer << "dyn_scales.assign(" << ss.str() << ", " << ss.str() << " + "
<< std::to_string(scales_size) << ");\n";
writer << "// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a "
"vector \n";
writer << "const int mask = " << std::to_string(scales_size)
<< " == 1 ? 0 : 2;\n";
writer << "conv_attr.set_int_output_round_mode(mkldnn::round_mode::round_"
"nearest);\n";
writer << "conv_attr.set_output_scales(mask, dyn_scales);\n";
}
//emit_memory_primitive_build(writer, desc_names, deps);
writer << "mkldnn::primitive* prim;\n";
if (mkldnn_emitter.has_bias<OP>())
{
writer << "prim = new mkldnn::convolution_forward({conv_desc, conv_attr, "
"cg_ctx->global_cpu_engine},"
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[0]) << "],\n"
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[1]) << "],\n"
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[2]) << "],\n"
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[3]) << "]);\n";
}
else
{
writer << "prim = new mkldnn::convolution_forward({conv_desc, conv_attr, "
"cg_ctx->global_cpu_engine},"
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[0]) << "],\n"
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[1]) << "],\n"
"*cg_ctx->mkldnn_primitives["
<< std::to_string(deps[2]) << "]);\n";
}
writer << "cg_ctx->mkldnn_primitives[" << std::to_string(index)
<< "] = prim;\n";
construct_string = writer.get_code();
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(QuantizedConvolution)
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(Convolution)
{
return mkldnn_emitter.build_convolution<QuantizedConvolution>(node);
construct_primitive_build_string_conv<Convolution>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(GroupConvolution)
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
QuantizedConvolution)
{
Strides window_dilation_strides_adjusted;
auto convolution = static_cast<const ngraph::op::GroupConvolution*>(node);
for (size_t s : convolution->get_window_dilation_strides())
construct_primitive_build_string_conv<QuantizedConvolution>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
}
template <>
void
MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(ConvolutionRelu)
{
window_dilation_strides_adjusted.push_back(s - 1);
construct_primitive_build_string_conv<ConvolutionRelu>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
}
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto padding_below = convolution->get_padding_below();
auto padding_above = convolution->get_padding_above();
auto filter_strides = convolution->get_window_movement_strides();
template <>
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
QuantizedConvolutionRelu)
{
construct_primitive_build_string_conv<QuantizedConvolutionRelu>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
}
return mkldnn_emitter.build_convolution_forward(
input_data_desc,
weights_desc,
result_desc,
filter_strides,
window_dilation_strides_adjusted,
padding_below,
padding_above);
template <>
void
MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(ConvolutionBias)
{
construct_primitive_build_string_conv<ConvolutionBias>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(GroupConvolutionBias)
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
QuantizedConvolutionBias)
{
Strides window_dilation_strides_adjusted;
auto convolution = static_cast<const ngraph::op::GroupConvolutionBias*>(node);
for (size_t s : convolution->get_window_dilation_strides())
construct_primitive_build_string_conv<QuantizedConvolutionBias>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
}
template <>
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
ConvolutionBiasAdd)
{
window_dilation_strides_adjusted.push_back(s - 1);
construct_primitive_build_string_conv<ConvolutionBiasAdd>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
}
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto padding_below = convolution->get_padding_below();
auto padding_above = convolution->get_padding_above();
auto filter_strides = convolution->get_window_movement_strides();
template <>
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
QuantizedConvolutionBiasAdd)
{
construct_primitive_build_string_conv<QuantizedConvolutionBiasAdd>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
}
const float ops_scale = 1.f;
const float ops_alpha = -0.f; // relu negative slope
const float ops_beta = 0.f;
template <>
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(ConvolutionAdd)
{
construct_primitive_build_string_conv<ConvolutionAdd>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
}
mkldnn::post_ops ops;
if (convolution->with_relu())
template <>
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
QuantizedConvolutionBiasSignedAdd)
{
ops.append_eltwise(
ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
construct_primitive_build_string_conv<QuantizedConvolutionBiasSignedAdd>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
}
return mkldnn_emitter.build_convolution_forward(
input_data_desc,
weights_desc,
bias_desc,
result_desc,
filter_strides,
window_dilation_strides_adjusted,
padding_below,
padding_above,
ops);
template <>
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
GroupConvolution)
{
construct_primitive_build_string_conv<GroupConvolution>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Convolution)
void MKLDNNPrimitiveBuildPass::CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(
GroupConvolutionBias)
{
return mkldnn_emitter.build_convolution<Convolution>(node);
construct_primitive_build_string_conv<GroupConvolutionBias>(
mkldnn_emitter, node, construct_string, deps, index, desc_file);
}
template <typename OpTy>
......@@ -370,44 +1135,6 @@ namespace ngraph
node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(QuantizedConvolutionBias)
{
return mkldnn_emitter.build_convolution<QuantizedConvolutionBias>(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(QuantizedConvolutionBiasAdd)
{
return mkldnn_emitter.build_convolution<QuantizedConvolutionBiasAdd>(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(
QuantizedConvolutionBiasSignedAdd)
{
return mkldnn_emitter.build_convolution<QuantizedConvolutionBiasSignedAdd>(
node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(ConvolutionBias)
{
return mkldnn_emitter.build_convolution<ConvolutionBias>(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(ConvolutionBiasAdd)
{
return mkldnn_emitter.build_convolution<ConvolutionBiasAdd>(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(ConvolutionAdd)
{
return mkldnn_emitter.build_convolution<ConvolutionAdd>(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(
ConvolutionBiasBackpropFiltersBias)
......@@ -717,25 +1444,12 @@ using namespace ngraph::runtime::cpu::pass;
#define TI(x) std::type_index(typeid(x))
static const PrimitiveBuildOpMap prim_build_dispatcher{
{TI(Add), &MKLDNNPrimitiveBuildPass::build_primitive<Add>},
{TI(Concat), &MKLDNNPrimitiveBuildPass::build_primitive<Concat>},
{TI(Convert), &MKLDNNPrimitiveBuildPass::build_primitive<Convert>},
{TI(runtime::cpu::op::ConvertLayout),
&MKLDNNPrimitiveBuildPass::build_primitive<runtime::cpu::op::ConvertLayout>},
{TI(AvgPool), &MKLDNNPrimitiveBuildPass::build_primitive<AvgPool>},
{TI(AvgPoolBackprop), &MKLDNNPrimitiveBuildPass::build_primitive<AvgPoolBackprop>},
{TI(BatchNormTraining), &MKLDNNPrimitiveBuildPass::build_primitive<BatchNormTraining>},
{TI(BatchNormInference), &MKLDNNPrimitiveBuildPass::build_primitive<BatchNormInference>},
{TI(BoundedRelu), &MKLDNNPrimitiveBuildPass::build_primitive<BoundedRelu>},
{TI(BatchNormTrainingBackprop),
&MKLDNNPrimitiveBuildPass::build_primitive<BatchNormTrainingBackprop>},
{TI(Convolution), &MKLDNNPrimitiveBuildPass::build_primitive<Convolution>},
{TI(GroupConvolution), &MKLDNNPrimitiveBuildPass::build_primitive<GroupConvolution>},
{TI(ConvolutionRelu), &MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionRelu>},
{TI(ConvolutionBiasAdd), &MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionBiasAdd>},
{TI(BatchNormTrainingRelu), &MKLDNNPrimitiveBuildPass::build_primitive<BatchNormTrainingRelu>},
{TI(BatchNormInferenceRelu),
&MKLDNNPrimitiveBuildPass::build_primitive<BatchNormInferenceRelu>},
{TI(ConvolutionBackpropData),
&MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionBackpropData>},
{TI(ConvolutionBackpropFilters),
......@@ -745,44 +1459,108 @@ static const PrimitiveBuildOpMap prim_build_dispatcher{
{TI(MaxPoolBackprop), &MKLDNNPrimitiveBuildPass::build_primitive<MaxPoolBackprop>},
{TI(MaxPoolWithIndicesBackprop),
&MKLDNNPrimitiveBuildPass::build_primitive<MaxPoolWithIndicesBackprop>},
{TI(ConvolutionBias), &MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionBias>},
{TI(QuantizedConvolution), &MKLDNNPrimitiveBuildPass::build_primitive<QuantizedConvolution>},
{TI(ConvolutionBiasBackpropFiltersBias),
&MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionBiasBackpropFiltersBias>},
{TI(LRN), &MKLDNNPrimitiveBuildPass::build_primitive<LRN>},
{TI(Relu), &MKLDNNPrimitiveBuildPass::build_primitive<Relu>},
{TI(ReluBackprop), &MKLDNNPrimitiveBuildPass::build_primitive<ReluBackprop>},
{TI(LeakyRelu), &MKLDNNPrimitiveBuildPass::build_primitive<LeakyRelu>},
{TI(Sigmoid), &MKLDNNPrimitiveBuildPass::build_primitive<Sigmoid>},
{TI(SigmoidBackprop), &MKLDNNPrimitiveBuildPass::build_primitive<SigmoidBackprop>},
{TI(Lstm), &MKLDNNPrimitiveBuildPass::build_primitive<Lstm>},
{TI(Rnn), &MKLDNNPrimitiveBuildPass::build_primitive<Rnn>},
{TI(QuantizedMaxPool), &MKLDNNPrimitiveBuildPass::build_primitive<QuantizedMaxPool>},
{TI(QuantizedAvgPool), &MKLDNNPrimitiveBuildPass::build_primitive<QuantizedAvgPool>},
{TI(Softmax), &MKLDNNPrimitiveBuildPass::build_primitive<Softmax>},
{TI(Slice), &MKLDNNPrimitiveBuildPass::build_primitive<Slice>},
{TI(ReplaceSlice), &MKLDNNPrimitiveBuildPass::build_primitive<ReplaceSlice>},
{TI(UpdateSlice), &MKLDNNPrimitiveBuildPass::build_primitive<UpdateSlice>},
{TI(ConvolutionAdd), &MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionAdd>},
{TI(QuantizedConvolutionRelu),
&MKLDNNPrimitiveBuildPass::build_primitive<QuantizedConvolutionRelu>},
{TI(QuantizedConvolutionBias),
&MKLDNNPrimitiveBuildPass::build_primitive<QuantizedConvolutionBias>},
{TI(QuantizedConvolutionBiasAdd),
&MKLDNNPrimitiveBuildPass::build_primitive<QuantizedConvolutionBiasAdd>},
{TI(QuantizedConvolutionBiasSignedAdd),
&MKLDNNPrimitiveBuildPass::build_primitive<QuantizedConvolutionBiasSignedAdd>},
{TI(GroupConvolutionBias), &MKLDNNPrimitiveBuildPass::build_primitive<GroupConvolutionBias>},
{TI(Quantize), &MKLDNNPrimitiveBuildPass::build_primitive<Quantize>},
{TI(Dequantize), &MKLDNNPrimitiveBuildPass::build_primitive<Dequantize>},
{TI(QuantizedConcat), &MKLDNNPrimitiveBuildPass::build_primitive<QuantizedConcat>},
{TI(GetOutputElement), &MKLDNNPrimitiveBuildPass::build_primitive<GetOutputElement>},
};
static const PrimitiveBuildStringConstructOpMap prim_build_string_construct_dispatcher{
{TI(Add), &MKLDNNPrimitiveBuildPass::construct_primitive_build_string<Add>},
{TI(Concat), &MKLDNNPrimitiveBuildPass::construct_primitive_build_string<Concat>},
{TI(BatchNormInference),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<BatchNormInference>},
{TI(BatchNormTraining),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<BatchNormTraining>},
{TI(BatchNormInferenceRelu),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<BatchNormInferenceRelu>},
{TI(BatchNormTrainingRelu),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<BatchNormTrainingRelu>},
{TI(BatchNormTrainingBackprop),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<BatchNormTrainingBackprop>},
{TI(LRN), &MKLDNNPrimitiveBuildPass::construct_primitive_build_string<LRN>},
{TI(Lstm), &MKLDNNPrimitiveBuildPass::construct_primitive_build_string<Lstm>},
{TI(Rnn), &MKLDNNPrimitiveBuildPass::construct_primitive_build_string<Rnn>},
{TI(Convolution), &MKLDNNPrimitiveBuildPass::construct_primitive_build_string<Convolution>},
{TI(ConvolutionRelu),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<ConvolutionRelu>},
{TI(ConvolutionBias),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<ConvolutionBias>},
{TI(ConvolutionBiasAdd),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<ConvolutionBiasAdd>},
{TI(ConvolutionAdd),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<ConvolutionAdd>},
{TI(GroupConvolution),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<GroupConvolution>},
{TI(GroupConvolutionBias),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<GroupConvolutionBias>},
{TI(QuantizedConvolution),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<QuantizedConvolution>},
{TI(QuantizedConvolutionRelu),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<QuantizedConvolutionRelu>},
{TI(QuantizedConvolutionBias),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<QuantizedConvolutionBias>},
{TI(QuantizedConvolutionBiasAdd),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<QuantizedConvolutionBiasAdd>},
{TI(QuantizedConvolutionBiasSignedAdd),
&MKLDNNPrimitiveBuildPass::construct_primitive_build_string<
QuantizedConvolutionBiasSignedAdd>},
};
// Check if the node builds primitives at first iteration.
// Needed during transition when there are two maps.
static bool in_new_map(const std::shared_ptr<Node>& node)
{
if (std::dynamic_pointer_cast<ngraph::op::Add>(node) ||
std::dynamic_pointer_cast<ngraph::op::Concat>(node) ||
std::dynamic_pointer_cast<ngraph::op::BatchNormInference>(node) ||
std::dynamic_pointer_cast<ngraph::op::BatchNormTraining>(node) ||
std::dynamic_pointer_cast<ngraph::op::BatchNormInferenceRelu>(node) ||
std::dynamic_pointer_cast<ngraph::op::BatchNormTrainingRelu>(node) ||
std::dynamic_pointer_cast<ngraph::op::BatchNormTrainingBackprop>(node) ||
std::dynamic_pointer_cast<ngraph::op::LRN>(node) ||
std::dynamic_pointer_cast<ngraph::op::Lstm>(node) ||
std::dynamic_pointer_cast<ngraph::op::Rnn>(node) ||
std::dynamic_pointer_cast<ngraph::op::Convolution>(node) ||
std::dynamic_pointer_cast<ngraph::op::ConvolutionRelu>(node) ||
std::dynamic_pointer_cast<ngraph::op::ConvolutionBias>(node) ||
std::dynamic_pointer_cast<ngraph::op::ConvolutionBiasAdd>(node) ||
std::dynamic_pointer_cast<ngraph::op::ConvolutionAdd>(node) ||
std::dynamic_pointer_cast<ngraph::op::QuantizedConvolution>(node) ||
std::dynamic_pointer_cast<ngraph::op::QuantizedConvolutionRelu>(node) ||
std::dynamic_pointer_cast<ngraph::op::QuantizedConvolutionBias>(node) ||
std::dynamic_pointer_cast<ngraph::op::QuantizedConvolutionBiasAdd>(node) ||
std::dynamic_pointer_cast<ngraph::op::QuantizedConvolutionBiasSignedAdd>(node) ||
std::dynamic_pointer_cast<ngraph::op::GroupConvolution>(node) ||
std::dynamic_pointer_cast<ngraph::op::GroupConvolutionBias>(node))
{
return true;
}
return false;
}
bool MKLDNNPrimitiveBuildPass::run_on_call_graph(const std::list<std::shared_ptr<Node>>& nodes)
{
for (const auto& shp_node : nodes)
{
if (in_new_map(shp_node))
{
continue;
}
Node* node = shp_node.get();
if (mkldnn_utils::use_mkldnn_kernel(node))
......@@ -798,6 +1576,33 @@ bool MKLDNNPrimitiveBuildPass::run_on_call_graph(const std::list<std::shared_ptr
}
}
std::ofstream desc_file(m_desc_filename, std::ios::out | std::ios::binary);
for (const auto& shp_node : nodes)
{
if (!in_new_map(shp_node))
{
continue;
}
Node* node = shp_node.get();
if (mkldnn_utils::use_mkldnn_kernel(node))
{
auto handler = prim_build_string_construct_dispatcher.find(TI(*node));
NGRAPH_CHECK(handler != prim_build_string_construct_dispatcher.end(),
"Unsupported node '",
node->description(),
"' in MKLDNNPrimitiveBuildPass");
std::string construct_string;
std::vector<size_t> deps;
size_t index;
handler->second(m_mkldnn_emitter, node, construct_string, deps, index, desc_file);
m_node_primitive_string_deps_index_map[node] =
std::tuple<std::string, std::vector<size_t>, size_t>(construct_string, deps, index);
}
}
return false;
}
......
......@@ -18,6 +18,7 @@
#include "ngraph/pass/pass.hpp"
#include <fstream>
#include <functional>
#include <typeindex>
#include <unordered_map>
......@@ -26,6 +27,15 @@
build_primitive<op_name>(ngraph::runtime::cpu::MKLDNNEmitter & mkldnn_emitter, \
ngraph::Node * node)
#define CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(op_name) \
construct_primitive_build_string<op_name>(ngraph::runtime::cpu::MKLDNNEmitter & \
mkldnn_emitter, \
ngraph::Node * node, \
std::string & construct_string, \
std::vector<size_t> & deps, \
size_t & index, \
std::ofstream & desc_file)
namespace mkldnn
{
class primitive;
......@@ -48,23 +58,46 @@ namespace ngraph
using PrimitiveBuildOpMap =
std::unordered_map<std::type_index, PrimitiveBuildFunction>;
using PrimitiveBuildStringConstructFunction =
std::function<void(ngraph::runtime::cpu::MKLDNNEmitter&,
ngraph::Node*,
std::string&,
std::vector<size_t>&,
size_t&,
std::ofstream&)>;
using PrimitiveBuildStringConstructOpMap =
std::unordered_map<std::type_index, PrimitiveBuildStringConstructFunction>;
/// This pass traverses the call graph and creates MKLDNN primitives for those ops
/// that have been assigned to MKLDNN.
class MKLDNNPrimitiveBuildPass : public ngraph::pass::CallGraphPass
{
private:
std::string m_desc_filename;
ngraph::runtime::cpu::MKLDNNEmitter& m_mkldnn_emitter;
/// External map to store each node with mkldnn implementation and its mkldnn
/// associated primitive index.
std::unordered_map<const Node*, size_t>& m_node_primitive_idx_map;
/// External map to store each node with mkldnn implementation and its mkldnn
/// creation string, deps, and mkldnn primitive index.
std::map<const Node*, std::tuple<std::string, std::vector<size_t>, size_t>>&
m_node_primitive_string_deps_index_map;
public:
MKLDNNPrimitiveBuildPass(
std::string filename,
ngraph::runtime::cpu::MKLDNNEmitter& mkldnn_emitter,
std::unordered_map<const Node*, size_t>& node_primitive_idx_map)
: m_mkldnn_emitter(mkldnn_emitter)
std::unordered_map<const Node*, size_t>& node_primitive_idx_map,
std::map<const Node*, std::tuple<std::string, std::vector<size_t>, size_t>>&
node_primitive_string_deps_index_map)
: m_desc_filename(filename)
, m_mkldnn_emitter(mkldnn_emitter)
, m_node_primitive_idx_map(node_primitive_idx_map)
, m_node_primitive_string_deps_index_map(
node_primitive_string_deps_index_map)
{
}
......@@ -78,6 +111,19 @@ namespace ngraph
throw std::runtime_error("Unimplemented op '" + node->description() +
"' in MKLDNNPrimitiveBuildPass");
}
template <typename OP>
static void construct_primitive_build_string(
ngraph::runtime::cpu::MKLDNNEmitter& mkldnn_emitter,
ngraph::Node* node,
std::string& construct_string,
std::vector<size_t>& deps,
size_t& index,
std::ofstream& desc_file)
{
throw std::runtime_error("Unimplemented op '" + node->description() +
"' in MKLDNNPrimitiveBuildPass");
}
};
}
}
......
......@@ -26,8 +26,35 @@ struct CPURuntimeContextCG
std::unique_ptr<tbb::flow::graph> tbb_graph;
std::unique_ptr<tbb::global_control> tbb_gcontrol;
CPURuntimeContextCG() { init_tbb(); }
~CPURuntimeContextCG() { cleanup_tbb(); }
CPURuntimeContextCG() { init_tbb(); init_mkldnn_primitives();}
~CPURuntimeContextCG() { cleanup_tbb(); cleanup_mkldnn_primitives();}
std::vector<mkldnn::primitive*> mkldnn_primitives;
std::vector<char*> mkldnn_workspaces;
std::vector<mkldnn::memory::desc*> mkldnn_descriptors;
mkldnn::engine global_cpu_engine = mkldnn::engine(mkldnn::engine::cpu, 0);
void set_memory_ptr(size_t primitive_index,
void* ptr)
{
auto primitive = static_cast<mkldnn::memory*>(mkldnn_primitives[primitive_index]);
primitive->set_data_handle(ptr);
}
void mkldnn_invoke_primitive(size_t primitive_index)
{
mkldnn::stream s(mkldnn::stream::kind::eager);
try
{
s.submit({*mkldnn_primitives[primitive_index]}).wait();
}
catch (const mkldnn::error& e)
{
throw std::runtime_error("Could not run mkldnn primitive " + e.message);
}
}
private:
inline void init_tbb()
......@@ -59,6 +86,35 @@ private:
}
}
}
void init_mkldnn_primitives();
inline void cleanup_mkldnn_primitives()
{
for (auto p : mkldnn_primitives)
{
delete p;
}
#ifndef _WIN32
//To avoid memory leak in mkldnn, release any buffers that are not free'd yet.
//https://software.intel.com/en-us/mkl-linux-developer-guide-avoiding-memory-leaks-in-intel-mkl
//mkl_free_buffers() is not exposed at this point, hence using mkl_serv_free_buffers()
ngraph::runtime::cpu::mkldnn_utils::mkl_serv_free_buffers();
#endif
for (auto w : mkldnn_workspaces)
{
free(w);
}
}
inline void cleanup_mkldnn_descriptors()
{
for (auto d : mkldnn_descriptors)
{
free(d);
}
}
};
extern "C" CPURuntimeContextCG* init_cg_ctx()
......@@ -70,4 +126,25 @@ extern "C" void destroy_cg_ctx(CPURuntimeContextCG* cg_ctx)
{
delete cg_ctx;
}
static void
deserialize_memory_descs_and_build_memory_primitives(std::ifstream& desc_file,
CPURuntimeContextCG* cg_ctx,
size_t descs_count)
{
cg_ctx->mkldnn_descriptors = std::vector<mkldnn::memory::desc*>(descs_count);
for (auto i = 0; i < descs_count; i++)
{
size_t primitive_index;
desc_file >> primitive_index;
auto desc = (mkldnn::memory::desc*)malloc(sizeof(mkldnn::memory::desc));
if (!desc)
{
throw std::bad_alloc();
}
desc_file.read(reinterpret_cast<char*>(desc), sizeof(mkldnn::memory::desc));
cg_ctx->mkldnn_descriptors[i] = desc;
cg_ctx->mkldnn_primitives[primitive_index] = new mkldnn::memory({*cg_ctx->mkldnn_descriptors[i], cg_ctx->global_cpu_engine}, nullptr);
}
};
)"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment