Commit 14a2aeae authored by Diego Caballero's avatar Diego Caballero Committed by Scott Cyphers

[Standalone] Decouple MKLDNN primitive build from code generation (#2701)

* [Standalone] Decouple MKLDNN primitive build from code generation

This patch introduces a new pass, MKLDNNPrimitiveBuildPass, which
iterates over all the ops assigned to MKLDNN and builds their
corresponding primitives. Primitive indexes are stored in MKLDNNEmitter
and can easily be retrieved with the get_primitive_index(node)
interface. This decouples the creation of primitives from codegen and
fixes the problem of MKLDNN primitives being created twice
(CommonFunctionCollection pass and codegen).

Current assertions only allow the creation of a single primitive per
node but it should be simple to remove this when needed. Using a pass
might not be the best approach here but I found it convenient for the
current needs and it should be straightforward to convert into a utility,
if needed.

These changes caused a conflict with recently introduced
'build_quantized_inner_product*'. These new build methods will be ported
in a follow up patch to new build approach.

* Remove unrelated comment

* Remove TensorView code

* Set m_node_primitive_map from MKLDNNPrimitiveBuildPass

* Move node->primitive map from mkldnn pass to external function

* Fix struct/class inconsistency en fw declaration
parent 6a0101a2
......@@ -113,6 +113,7 @@ set(SRC
pass/cpu_mat_fusion.cpp
pass/cpu_memory_assignment.cpp
pass/cpu_memory_optimization.cpp
pass/cpu_mkldnn_primitive_build.cpp
pass/cpu_post_layout_optimizations.cpp
pass/cpu_rnn_fusion.cpp
pass/cpu_workspace_insertion.cpp
......
......@@ -166,21 +166,9 @@ namespace ngraph
writer.block_begin();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
std::vector<float> scale_vector(2, 1);
std::vector<mkldnn::memory::primitive_desc> inputs_pd;
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input0_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto input1_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
inputs_pd.push_back(mkldnn::memory::primitive_desc(
input0_data_desc, runtime::cpu::executor::global_cpu_engine));
inputs_pd.push_back(mkldnn::memory::primitive_desc(
input1_data_desc, runtime::cpu::executor::global_cpu_engine));
size_t add_index = 0;
add_index = mkldnn_emitter->build_elementwise_add(
input0_data_desc, input1_data_desc, result_desc, scale_vector, inputs_pd);
size_t add_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(add_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
......@@ -582,7 +570,7 @@ namespace ngraph
"Lstm op doesnt have the required number of inputs to emit MKLDNN kernel");
}
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto lstm_index = mkldnn_emitter->build_rnn<ngraph::op::Lstm>(node, args, out);
auto lstm_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(lstm_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) << ", "
......@@ -610,7 +598,7 @@ namespace ngraph
void CPU_Emitter::EMITTER_DECL(ngraph::op::Rnn)
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto rnn_index = mkldnn_emitter->build_rnn<ngraph::op::Rnn>(node, args, out);
auto rnn_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(rnn_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) << ", "
......@@ -642,8 +630,6 @@ namespace ngraph
bool append_relu,
bool training)
{
const T* batchnorm = static_cast<const T*>(node);
writer.block_begin();
// define weights
writer << "std::vector<" << args[0].get_element_type().c_type_string()
......@@ -665,29 +651,12 @@ namespace ngraph
ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
}
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto batchnorm_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index);
if (training && args.size() == 3)
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto weights_shape = Shape{2, args[0].get_size()};
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto weights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto results_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto mean_desc = mkldnn_utils::get_output_mkldnn_md(node, 1);
auto variance_desc = mkldnn_utils::get_output_mkldnn_md(node, 2);
auto batchnorm_index =
mkldnn_emitter->build_batchnorm_forward(input_desc,
weights_desc,
results_desc,
mean_desc,
variance_desc,
batchnorm->get_eps_value(),
false,
training,
ops);
auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
......@@ -704,27 +673,6 @@ namespace ngraph
}
else
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto weights_shape = Shape{2, args[0].get_size()};
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto weights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto mean_desc = mkldnn_utils::get_input_mkldnn_md(node, 3);
auto variance_desc = mkldnn_utils::get_input_mkldnn_md(node, 4);
auto results_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto batchnorm_index =
mkldnn_emitter->build_batchnorm_forward(input_desc,
weights_desc,
results_desc,
mean_desc,
variance_desc,
batchnorm->get_eps_value(),
true,
training,
ops);
auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
......@@ -832,9 +780,6 @@ namespace ngraph
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::BatchNormTrainingBackprop)
{
const ngraph::op::BatchNormTrainingBackprop* batchnorm =
static_cast<const ngraph::op::BatchNormTrainingBackprop*>(node);
writer.block_begin();
// define weights
writer << "std::vector<" << args[0].get_element_type().c_type_string()
......@@ -849,28 +794,9 @@ namespace ngraph
<< args[1].get_size() * args[1].get_element_type().size() << ");\n";
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto weights_shape = Shape{2, args[0].get_size()};
auto weights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto mean_desc = mkldnn_utils::get_input_mkldnn_md(node, 3);
auto variance_desc = mkldnn_utils::get_input_mkldnn_md(node, 4);
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 5);
auto dinput_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto dweights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto batchnorm_index =
mkldnn_emitter->build_batchnorm_backward(weights_desc,
input_desc,
mean_desc,
variance_desc,
delta_desc,
dinput_desc,
dweights_desc,
batchnorm->get_eps_value());
auto batchnorm_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", bn_weights.data());\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) << ", "
......@@ -1109,20 +1035,9 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
std::vector<mkldnn::memory::desc> inputs_data_desc;
for (size_t i = 0; i < args.size(); i++)
{
inputs_data_desc.push_back(mkldnn_utils::get_input_mkldnn_md(node, i));
}
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t concat_index = 0;
size_t concat_dim =
(static_cast<const ngraph::op::Concat*>(node))->get_concatenation_axis();
concat_index =
mkldnn_emitter->build_concat(inputs_data_desc, result_desc, concat_dim);
size_t concat_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(concat_index);
size_t i;
for (i = 0; i < args.size(); i++)
{
......@@ -1288,16 +1203,7 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_data_desc = runtime::cpu::mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = runtime::cpu::mkldnn_utils::get_output_mkldnn_md(node, 0);
auto lrn_index =
mkldnn_emitter->build_lrn_forward(input_data_desc,
result_desc,
static_cast<float>(lrn->get_alpha()),
static_cast<float>(lrn->get_beta()),
static_cast<float>(lrn->get_bias()),
static_cast<int>(lrn->get_nsize()));
auto lrn_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(lrn_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
......@@ -1580,11 +1486,7 @@ namespace ngraph
auto lower_bounds = slice->get_lower_bounds();
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto slice_index = mkldnn_emitter->build_slice(
input_desc, result_desc, lower_bounds, out_shape);
auto slice_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(slice_index);
writer.block_begin();
......@@ -2091,9 +1993,7 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index =
mkldnn_emitter->build_convolution<ngraph::op::ConvolutionRelu>(
node, args, out);
auto conv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
......@@ -2113,9 +2013,7 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index =
mkldnn_emitter->build_convolution<ngraph::op::QuantizedConvolutionRelu>(
node, args, out);
auto conv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
......@@ -2139,9 +2037,7 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index =
mkldnn_emitter->build_convolution<ngraph::op::QuantizedConvolution>(
node, args, out);
auto conv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
......@@ -2162,41 +2058,17 @@ namespace ngraph
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::GroupConvolution)
{
auto convolution = static_cast<const ngraph::op::GroupConvolution*>(node);
auto arg0_shape = args[0].get_shape();
auto arg1_shape = args[1].get_shape();
auto result_shape = out[0].get_shape();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto padding_below = convolution->get_padding_below();
auto padding_above = convolution->get_padding_above();
auto filter_strides = convolution->get_window_movement_strides();
size_t conv_index =
mkldnn_emitter->build_convolution_forward(input_data_desc,
weights_desc,
result_desc,
filter_strides,
window_dilation_strides_adjusted,
padding_below,
padding_above);
// invoke group convolution
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
size_t conv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
......@@ -2216,8 +2088,6 @@ namespace ngraph
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::GroupConvolutionBias)
{
auto convolution = static_cast<const ngraph::op::GroupConvolutionBias*>(node);
auto arg0_shape = args[0].get_shape();
auto arg1_shape = args[1].get_shape();
auto arg2_shape = args[2].get_shape();
......@@ -2225,44 +2095,8 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto padding_below = convolution->get_padding_below();
auto padding_above = convolution->get_padding_above();
auto filter_strides = convolution->get_window_movement_strides();
const float ops_scale = 1.f;
const float ops_alpha = -0.f; // relu negative slope
const float ops_beta = 0.f;
mkldnn::post_ops ops;
if (convolution->with_relu())
{
ops.append_eltwise(
ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
}
size_t conv_index =
mkldnn_emitter->build_convolution_forward(input_data_desc,
weights_desc,
bias_desc,
result_desc,
filter_strides,
window_dilation_strides_adjusted,
padding_below,
padding_above,
ops);
size_t conv_index = external_function->get_primitive_index(node);
// invoke group convolution bias
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
......@@ -2296,8 +2130,7 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index =
mkldnn_emitter->build_convolution<ngraph::op::Convolution>(node, args, out);
auto conv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
......@@ -2344,10 +2177,7 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index =
mkldnn_emitter
->build_convolution_backward<ngraph::op::ConvolutionBackpropFilters>(
node, args, out);
auto conv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
......@@ -2394,10 +2224,7 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index =
mkldnn_emitter
->build_convolution_backward<ngraph::op::ConvolutionBackpropData>(
node, args, out);
auto conv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
......@@ -2439,9 +2266,7 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto qconv_index =
mkldnn_emitter->build_convolution<ngraph::op::QuantizedConvolutionBias>(
node, args, out);
auto qconv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(qconv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
......@@ -2469,9 +2294,7 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto qconv_index =
mkldnn_emitter->build_convolution<ngraph::op::QuantizedConvolutionBiasAdd>(
node, args, out);
auto qconv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(qconv_index);
writer << "if (" << out[0].get_name() << " != " << args[3].get_name() << ")\n";
......@@ -2504,10 +2327,7 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto qconv_index =
mkldnn_emitter
->build_convolution<ngraph::op::QuantizedConvolutionBiasSignedAdd>(
node, args, out);
auto qconv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(qconv_index);
writer << "if (" << out[0].get_name() << " != " << args[3].get_name() << ")\n";
......@@ -2594,9 +2414,7 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index =
mkldnn_emitter->build_convolution<ngraph::op::ConvolutionBias>(
node, args, out);
auto conv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
......@@ -2623,9 +2441,7 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index =
mkldnn_emitter->build_convolution<ngraph::op::ConvolutionBiasAdd>(
node, args, out);
auto conv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "if (" << out[0].get_name() << " != " << args[3].get_name() << ")\n";
......@@ -2656,8 +2472,7 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = mkldnn_emitter->build_convolution<ngraph::op::ConvolutionAdd>(
node, args, out);
auto conv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "if (" << out[0].get_name() << " != " << args[2].get_name() << ")\n";
......@@ -2686,8 +2501,7 @@ namespace ngraph
if (mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = mkldnn_emitter->build_convolution_backward<
ngraph::op::ConvolutionBiasBackpropFiltersBias>(node, args, out);
auto conv_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
......@@ -2721,27 +2535,15 @@ namespace ngraph
void CPU_Emitter::EMITTER_DECL(ngraph::op::MaxPool)
{
auto max_pool = static_cast<const ngraph::op::MaxPool*>(node);
auto arg_shape = args[0].get_shape();
auto result_shape = out[0].get_shape();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t max_pool_index = mkldnn_emitter->build_pooling_forward(
mkldnn::algorithm::pooling_max,
input_desc,
result_desc,
max_pool->get_window_movement_strides(),
max_pool->get_window_shape(),
max_pool->get_padding_below(),
max_pool->get_padding_above());
size_t max_pool_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(max_pool_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
......@@ -2772,7 +2574,7 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
size_t qmax_pool_index = mkldnn_emitter->build_quantized_max_pool(node);
size_t qmax_pool_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(qmax_pool_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
......@@ -2787,13 +2589,14 @@ namespace ngraph
throw ngraph_error("unsupported parameters for QuantizedMaxPool");
}
}
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::QuantizedAvgPool)
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
size_t qavg_pool_index = mkldnn_emitter->build_quantized_avg_pool(node);
size_t qavg_pool_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(qavg_pool_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
......@@ -2811,24 +2614,12 @@ namespace ngraph
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::MaxPoolWithIndices)
{
auto max_pool = static_cast<const ngraph::op::MaxPoolWithIndices*>(node);
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t max_pool_index = mkldnn_emitter->build_max_pooling_with_indices_forward(
mkldnn::algorithm::pooling_max,
input_desc,
result_desc,
max_pool->get_window_movement_strides(),
max_pool->get_window_shape(),
max_pool->get_padding_below(),
max_pool->get_padding_above());
size_t max_pool_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(max_pool_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
......@@ -2948,21 +2739,9 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t avg_pool_index = mkldnn_emitter->build_pooling_forward(
(avg_pool->get_include_padding_in_avg_computation()
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding),
input_desc,
result_desc,
avg_pool->get_window_movement_strides(),
avg_pool->get_window_shape(),
avg_pool->get_padding_below(),
avg_pool->get_padding_above());
size_t avg_pool_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(avg_pool_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
......@@ -3050,21 +2829,9 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto diff_dst_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto diff_src_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t avg_pool_index = mkldnn_emitter->build_pooling_backward(
(apb->get_include_padding_in_avg_computation()
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding),
diff_dst_desc,
diff_src_desc,
apb->get_window_movement_strides(),
apb->get_window_shape(),
apb->get_padding_below(),
apb->get_padding_above());
size_t avg_pool_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(avg_pool_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
......@@ -3104,21 +2871,9 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto fprop_src_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto diff_dst_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto diff_src_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t max_pool_index = mkldnn_emitter->build_max_pooling_backward(
mkldnn::algorithm::pooling_max,
fprop_src_desc,
diff_dst_desc,
diff_src_desc,
mpb->get_window_movement_strides(),
mpb->get_window_shape(),
mpb->get_padding_below(),
mpb->get_padding_above());
size_t max_pool_index = external_function->get_primitive_index(node);
auto& fdeps = mkldnn_emitter->get_primitive_deps(max_pool_index - 1);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(fdeps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(fdeps[1])
......@@ -3159,23 +2914,10 @@ namespace ngraph
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::MaxPoolWithIndicesBackprop)
{
auto mpb = static_cast<const ngraph::op::MaxPoolWithIndicesBackprop*>(node);
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto diff_dst_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto diff_src_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t max_pool_index = mkldnn_emitter->build_max_pooling_with_indices_backward(
mkldnn::algorithm::pooling_max,
diff_dst_desc,
diff_src_desc,
mpb->get_window_movement_strides(),
mpb->get_window_shape(),
mpb->get_padding_below(),
mpb->get_padding_above());
size_t max_pool_index = external_function->get_primitive_index(node);
auto& bdeps = mkldnn_emitter->get_primitive_deps(max_pool_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(bdeps[0])
......@@ -3318,9 +3060,9 @@ namespace ngraph
mkldnn::memory::format::goihw);
}
size_t reorder_index = mkldnn_emitter->build_reorder(input_desc, result_desc);
size_t reorder_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(reorder_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) << ", "
......@@ -3336,12 +3078,7 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t relu_index =
mkldnn_emitter->build_relu_backward(input_desc, delta_desc, result_desc);
size_t relu_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(relu_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
......@@ -3371,12 +3108,9 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t relu_index = mkldnn_emitter->build_relu_forward(input_desc, result_desc);
size_t relu_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(relu_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
......@@ -3404,10 +3138,7 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto leaky_relu_index =
mkldnn_emitter->build_leaky_relu(input_desc, result_desc, alpha);
auto leaky_relu_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(leaky_relu_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
......@@ -3437,10 +3168,7 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto bounded_relu_index =
mkldnn_emitter->build_bounded_relu(input_desc, result_desc, alpha);
auto bounded_relu_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(bounded_relu_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
......@@ -3470,14 +3198,9 @@ namespace ngraph
auto result_shape = out[0].get_shape();
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t sigmoid_index =
mkldnn_emitter->build_sigmoid_forward(input_desc, result_desc);
size_t sigmoid_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(sigmoid_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) << ", "
......@@ -3495,15 +3218,9 @@ namespace ngraph
auto result_shape = out[0].get_shape();
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t sigmoid_index =
mkldnn_emitter->build_sigmoid_backward(input_desc, delta_desc, result_desc);
size_t sigmoid_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(sigmoid_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) << ", "
......@@ -3673,21 +3390,10 @@ namespace ngraph
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto softmax = static_cast<const ngraph::op::Softmax*>(node);
if (softmax->get_axes().size() != 1)
{
throw ngraph_error("MKLDNN supports softmax only across single axis");
}
int softmax_axis = static_cast<int>(*(softmax->get_axes().begin()));
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t softmax_index = mkldnn_emitter->build_softmax_forward(
input_desc, result_desc, softmax_axis);
size_t softmax_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(softmax_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
......@@ -4094,12 +3800,9 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t dequantize_index =
mkldnn_emitter->build_dequantization(node, input_data_desc, result_desc);
size_t dequantize_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(dequantize_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
......@@ -4128,24 +3831,9 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto scale_const_op =
std::dynamic_pointer_cast<ngraph::op::Constant>(quantize->get_argument(1));
if (scale_const_op == nullptr)
{
throw ngraph_error("Quantize scale must be a constant");
}
auto scale = scale_const_op->get_vector<float>();
std::vector<float> scales;
scales.push_back(1.0 / scale[0]);
size_t quantize_index = 0;
quantize_index = mkldnn_emitter->build_quantize_reorder(
input_data_desc, result_desc, scales);
size_t quantize_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(quantize_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
......@@ -4174,20 +3862,9 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
std::vector<mkldnn::memory::desc> inputs_data_desc;
for (size_t i = 0; i < args.size(); i++)
{
inputs_data_desc.push_back(mkldnn_utils::get_input_mkldnn_md(node, i));
}
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t concat_index = 0;
size_t concat_dim = (static_cast<const ngraph::op::QuantizedConcat*>(node))
->get_concatenation_axis();
concat_index =
mkldnn_emitter->build_concat(inputs_data_desc, result_desc, concat_dim);
size_t concat_index = external_function->get_primitive_index(node);
auto& deps = mkldnn_emitter->get_primitive_deps(concat_index);
size_t i;
for (i = 0; i < args.size(); i++)
{
......
......@@ -175,6 +175,7 @@
#include "ngraph/runtime/cpu/pass/cpu_mat_fusion.hpp"
#include "ngraph/runtime/cpu/pass/cpu_memory_assignment.hpp"
#include "ngraph/runtime/cpu/pass/cpu_memory_optimization.hpp"
#include "ngraph/runtime/cpu/pass/cpu_mkldnn_primitive_build.hpp"
#include "ngraph/runtime/cpu/pass/cpu_post_layout_optimizations.hpp"
#include "ngraph/runtime/cpu/pass/cpu_rnn_fusion.hpp"
#include "ngraph/runtime/cpu/pass/cpu_workspace_insertion.hpp"
......@@ -464,6 +465,11 @@ void runtime::cpu::CPU_ExternalFunction::compile(ngraph::pass::PassConfig& pass_
ngraph::pass::Manager pass_manager;
register_common_passes(pass_manager, pass_config);
// Build mkldnn primitives for codegen.
pass_manager.register_pass<runtime::cpu::pass::MKLDNNPrimitiveBuildPass>(
*m_mkldnn_emitter, m_node_primitive_idx_map);
unordered_map<Node*, Node*> node_function_map;
string common_function_string;
auto femitter = bind(&ngraph::runtime::cpu::CPU_ExternalFunction::emit_op_as_function,
......@@ -1156,6 +1162,7 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes(
pass_config.get_pass_attribute("ReuseMemory");
pass_manager.register_pass<runtime::cpu::pass::CPUMemoryAssignment>(
bufferID_to_tensorSets, tensor_to_bufferID, size_t(s_memory_pool_alignment), !reuse_memory);
pass_manager.get_state().set_visualize_tree_ops_map(runtime::cpu::get_visualize_tree_ops_map());
}
......
......@@ -114,6 +114,16 @@ namespace ngraph
return m_mkldnn_emitter;
}
/// Returns the index of the mkldnn primitive previously created for \p node.
size_t get_primitive_index(const Node* node) const
{
auto it = m_node_primitive_idx_map.find(node);
NGRAPH_ASSERT(it != m_node_primitive_idx_map.end())
<< "Primitive not found for node " << node->description();
return it->second;
}
size_t add_state(ngraph::State* state)
{
m_states.push_back(state);
......@@ -296,6 +306,9 @@ namespace ngraph
std::unordered_map<std::string, int> subgraph_param_sizes;
std::unordered_map<std::string, std::reference_wrapper<void*>> subgraph_param_ptrs;
#endif
/// Map each node with mkldnn implementation to its mkldnn primitive index.
std::unordered_map<const Node*, size_t> m_node_primitive_idx_map;
};
}
}
......
......@@ -19,18 +19,47 @@
#include "mkldnn_emitter.hpp"
#include "ngraph/op/add.hpp"
#include "ngraph/op/avg_pool.hpp"
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convert.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/experimental/quantized_avg_pool.hpp"
#include "ngraph/op/experimental/quantized_avg_pool.hpp"
#include "ngraph/op/experimental/quantized_concat.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/op/experimental/quantized_max_pool.hpp"
#include "ngraph/op/experimental/quantized_max_pool.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/lrn.hpp"
#include "ngraph/op/max_pool.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/relu.hpp"
#include "ngraph/op/replace_slice.hpp"
#include "ngraph/op/slice.hpp"
#include "ngraph/op/softmax.hpp"
#include "ngraph/runtime/cpu/cpu_executor.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
#include "ngraph/runtime/cpu/op/rnn.hpp"
#include "ngraph/runtime/cpu/op/sigmoid.hpp"
#include "ngraph/runtime/cpu/op/update_slice.hpp"
#include "ngraph/type/element_type.hpp"
using namespace ngraph;
using namespace ngraph::op;
using namespace ngraph::runtime::cpu;
MKLDNNEmitter::~MKLDNNEmitter()
......@@ -150,6 +179,10 @@ size_t MKLDNNEmitter::build_quantize_reorder(const mkldnn::memory::desc& input_d
attr);
size_t primitive_index = insert_primitive(new mkldnn::reorder(
reorder_desc, *m_mkldnn_primitives[input_index], *m_mkldnn_primitives[result_index]));
NGRAPH_ASSERT(m_primitive_deps.find(primitive_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[primitive_index] = {input_index, result_index};
return primitive_index;
}
......@@ -301,6 +334,9 @@ size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& inpu
conv_index = insert_primitive(conv_prim);
NGRAPH_ASSERT(m_primitive_deps.find(conv_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[conv_index] = {input_data_index, weights_index, result_index};
}
catch (const mkldnn::error& e)
......@@ -349,6 +385,10 @@ size_t
*m_mkldnn_primitives[input_data_index],
*m_mkldnn_primitives[weights_index],
*m_mkldnn_primitives[result_index]));
NGRAPH_ASSERT(m_primitive_deps.find(conv_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[conv_index] = {input_data_index, weights_index, result_index};
return conv_index;
}
......@@ -396,6 +436,10 @@ size_t
*m_mkldnn_primitives[weights_index],
*m_mkldnn_primitives[bias_index],
*m_mkldnn_primitives[result_index]));
NGRAPH_ASSERT(m_primitive_deps.find(conv_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[conv_index] = {input_data_index, weights_index, bias_index, result_index};
return conv_index;
}
......@@ -441,6 +485,9 @@ size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& inpu
*m_mkldnn_primitives[bias_index],
*m_mkldnn_primitives[result_index]));
NGRAPH_ASSERT(m_primitive_deps.find(conv_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[conv_index] = {input_data_index, weights_index, bias_index, result_index};
}
catch (const mkldnn::error& e)
......@@ -503,6 +550,9 @@ size_t MKLDNNEmitter::build_convolution_backward_weights_bias(
*m_mkldnn_primitives[out_weights_delta_index],
*m_mkldnn_primitives[out_bias_delta_index]));
NGRAPH_ASSERT(m_primitive_deps.find(conv_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[conv_index] = {
in_data_index, in_delta_index, out_weights_delta_index, out_bias_delta_index};
return conv_index;
......@@ -576,6 +626,9 @@ size_t
*m_mkldnn_primitives[delta_index],
*m_mkldnn_primitives[result_index]));
NGRAPH_ASSERT(m_primitive_deps.find(primitive_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[primitive_index] = {input_index, delta_index, result_index};
return primitive_index;
}
......@@ -693,6 +746,9 @@ size_t MKLDNNEmitter::build_pooling_forward(mkldnn::algorithm pooling_algorithm,
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[result_index]));
NGRAPH_ASSERT(m_primitive_deps.find(primitive_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[primitive_index] = {input_index, result_index};
return primitive_index;
}
......@@ -745,6 +801,9 @@ size_t MKLDNNEmitter::build_pooling_backward(mkldnn::algorithm pooling_algorithm
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[result_index]));
NGRAPH_ASSERT(m_primitive_deps.find(primitive_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[primitive_index] = {input_index, result_index};
return primitive_index;
}
......@@ -821,6 +880,10 @@ size_t MKLDNNEmitter::build_max_pooling_backward(mkldnn::algorithm pooling_algor
*m_mkldnn_primitives[ws_index],
*m_mkldnn_primitives[diff_src_index]));
NGRAPH_ASSERT(m_primitive_deps.find(fwd_primitive_index) == m_primitive_deps.end() &&
m_primitive_deps.find(bwd_primitive_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[fwd_primitive_index] = {
fprop_src_index, diff_src_index, ws_index, ws_buf_index};
m_primitive_deps[bwd_primitive_index] = {
......@@ -901,6 +964,9 @@ size_t MKLDNNEmitter::build_max_pooling_with_indices_forward(mkldnn::algorithm p
*m_mkldnn_primitives[dst_index],
*m_mkldnn_primitives[ws_index]));
NGRAPH_ASSERT(m_primitive_deps.find(fwd_primitive_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[fwd_primitive_index] = {src_index, dst_index, ws_index};
return fwd_primitive_index;
}
......@@ -966,6 +1032,9 @@ size_t MKLDNNEmitter::build_max_pooling_with_indices_backward(
*m_mkldnn_primitives[fprop_ws_index],
*m_mkldnn_primitives[diff_src_index]));
NGRAPH_ASSERT(m_primitive_deps.find(bwd_primitive_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[bwd_primitive_index] = {diff_dst_index, fprop_ws_index, diff_src_index};
return bwd_primitive_index;
}
......@@ -1003,6 +1072,10 @@ size_t MKLDNNEmitter::build_reorder(const mkldnn::memory::desc& input_desc,
{
primitive_index = insert_primitive(new mkldnn::reorder(*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[result_index]));
NGRAPH_ASSERT(m_primitive_deps.find(primitive_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[primitive_index] = {input_index, result_index};
}
catch (const mkldnn::error& e)
......@@ -1048,6 +1121,9 @@ size_t MKLDNNEmitter::build_lrn_forward(const mkldnn::memory::desc& input_desc,
size_t primitive_index = insert_primitive(new mkldnn::lrn_forward(
lrn_prim_desc, *m_mkldnn_primitives[input_index], *m_mkldnn_primitives[result_index]));
NGRAPH_ASSERT(m_primitive_deps.find(primitive_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[primitive_index] = {input_index, result_index};
return primitive_index;
}
......@@ -1099,6 +1175,9 @@ size_t MKLDNNEmitter::build_relu_forward(const mkldnn::memory::desc& input_desc,
size_t primitive_index = insert_primitive(new mkldnn::eltwise_forward(
relu_pd, *m_mkldnn_primitives[input_index], *m_mkldnn_primitives[result_index]));
NGRAPH_ASSERT(m_primitive_deps.find(primitive_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[primitive_index] = {input_index, result_index};
return primitive_index;
}
......@@ -1153,6 +1232,9 @@ size_t MKLDNNEmitter::build_relu_backward(const mkldnn::memory::desc& input_desc
*m_mkldnn_primitives[delta_index],
*m_mkldnn_primitives[result_index]));
NGRAPH_ASSERT(m_primitive_deps.find(primitive_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[primitive_index] = {input_index, delta_index, result_index};
return primitive_index;
}
......@@ -1208,6 +1290,9 @@ size_t MKLDNNEmitter::build_sigmoid_forward(const mkldnn::memory::desc& input_de
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[result_index]));
NGRAPH_ASSERT(m_primitive_deps.find(primitive_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[primitive_index] = {input_index, result_index};
return primitive_index;
}
......@@ -1268,6 +1353,9 @@ size_t MKLDNNEmitter::build_sigmoid_backward(const mkldnn::memory::desc& input_d
*m_mkldnn_primitives[delta_index],
*m_mkldnn_primitives[result_index]));
NGRAPH_ASSERT(m_primitive_deps.find(primitive_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[primitive_index] = {input_index, delta_index, result_index};
return primitive_index;
}
......@@ -1327,6 +1415,9 @@ size_t MKLDNNEmitter::build_elementwise_add(
size_t add_index = insert_primitive(
new mkldnn::sum(sum_pd, inputs_primitive, *m_mkldnn_primitives[result_index]));
NGRAPH_ASSERT(m_primitive_deps.find(add_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[add_index] = {input0_data_index, input1_data_index, result_index};
return add_index;
}
......@@ -1521,6 +1612,9 @@ size_t MKLDNNEmitter::build_batchnorm_backward(const mkldnn::memory::desc& weigh
*m_mkldnn_primitives[dinput_index],
*m_mkldnn_primitives[dweights_index]));
NGRAPH_ASSERT(m_primitive_deps.find(batchnorm_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[batchnorm_index] = {weights_index,
input_index,
mean_index,
......@@ -1636,6 +1730,10 @@ size_t MKLDNNEmitter::build_rnn_forward(const mkldnn::memory::desc& src_layer_de
static_cast<mkldnn::memory>(*m_mkldnn_primitives[dst_layer_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[dst_iter_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[workspace_index])));
NGRAPH_ASSERT(m_primitive_deps.find(rnn_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[rnn_index] = {src_layer_index,
src_iter_index,
weights_layer_index,
......@@ -1721,6 +1819,10 @@ size_t MKLDNNEmitter::build_concat(const std::vector<mkldnn::memory::desc>& inpu
in_out_index.push_back(inputs_data_index[i]);
}
in_out_index.push_back(result_index);
NGRAPH_ASSERT(m_primitive_deps.find(concat_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[concat_index] = in_out_index;
return concat_index;
}
......@@ -1797,8 +1899,12 @@ size_t MKLDNNEmitter::build_slice(const mkldnn::memory::desc& input_desc,
size_t reorder_index = insert_primitive(new mkldnn::reorder(
reorder_pd, *m_mkldnn_primitives[input_index], *m_mkldnn_primitives[result_index]));
NGRAPH_ASSERT(m_primitive_deps.find(reorder_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
in_out_index.push_back(input_index);
in_out_index.push_back(result_index);
m_primitive_deps[reorder_index] = in_out_index;
return reorder_index;
}
......@@ -1845,6 +1951,9 @@ size_t MKLDNNEmitter::build_softmax_forward(const mkldnn::memory::desc& input_de
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[result_index]));
NGRAPH_ASSERT(m_primitive_deps.find(primitive_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[primitive_index] = {input_index, result_index};
return primitive_index;
}
......@@ -1897,13 +2006,16 @@ size_t MKLDNNEmitter::build_leaky_relu(const mkldnn::memory::desc& input_desc,
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[result_index]));
NGRAPH_ASSERT(m_primitive_deps.find(primitive_index) == m_primitive_deps.end())
<< "Dependencies already created for node";
m_primitive_deps[primitive_index] = {input_index, result_index};
return primitive_index;
}
mkldnn::eltwise_forward::desc MKLDNNEmitter::get_leaky_relu_desc(const ngraph::Node* node)
{
auto alpha = static_cast<const op::LeakyRelu*>(node)->get_alpha();
auto alpha = static_cast<const ngraph::op::LeakyRelu*>(node)->get_alpha();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
......@@ -1951,7 +2063,7 @@ size_t MKLDNNEmitter::build_bounded_relu(const mkldnn::memory::desc& input_desc,
mkldnn::eltwise_forward::desc MKLDNNEmitter::get_bounded_relu_desc(const ngraph::Node* node)
{
auto alpha = static_cast<const op::BoundedRelu*>(node)->get_alpha();
auto alpha = static_cast<const ngraph::op::BoundedRelu*>(node)->get_alpha();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
......
......@@ -70,6 +70,44 @@ namespace ngraph
class CPU_ExternalFunction;
class TensorViewWrapper;
// TODO (nbpatel) Templatize the return type when we have double scales
template <typename OP>
static std::vector<float> extract_scale_value(const ngraph::Node* node, int index)
{
auto qc = static_cast<const OP*>(node);
std::vector<float> scale_val = {1.0f};
auto scale_const_op =
std::dynamic_pointer_cast<ngraph::op::Constant>(qc->get_arguments()[index]);
if (scale_const_op != nullptr)
{
scale_val = scale_const_op->template get_vector<float>();
}
return scale_val;
}
template <typename OP,
typename std::enable_if<
(std::is_same<OP, ngraph::op::Convolution>::value ||
std::is_same<OP, ngraph::op::QuantizedConvolution>::value ||
std::is_same<OP, ngraph::op::GroupConvolution>::value),
std::nullptr_t>::type = nullptr>
bool has_relu(const ngraph::Node* node)
{
return false;
}
template <typename OP,
typename std::enable_if<
(!std::is_same<OP, ngraph::op::Convolution>::value &&
!std::is_same<OP, ngraph::op::QuantizedConvolution>::value &&
!std::is_same<OP, ngraph::op::GroupConvolution>::value),
std::nullptr_t>::type = nullptr>
bool has_relu(const ngraph::Node* node)
{
return static_cast<const OP*>(node)->with_relu();
}
class MKLDNNWorkspace
{
public:
......@@ -171,33 +209,15 @@ namespace ngraph
const float scale,
const mkldnn::post_ops& pops = mkldnn::post_ops());
// TODO (nbpatel) Templatize the return type when we have double scales
template <typename OP>
std::vector<float> extract_scale_value(const ngraph::Node* node, int index)
template <typename OpTy>
size_t build_convolution(const ngraph::Node* node)
{
auto qc = static_cast<const OP*>(node);
std::vector<float> scale_val = {1.0f};
auto scale_const_op =
std::dynamic_pointer_cast<ngraph::op::Constant>(qc->get_arguments()[index]);
if (scale_const_op != nullptr)
{
scale_val = scale_const_op->template get_vector<float>();
}
return scale_val;
}
template <typename OP>
size_t build_convolution(const ngraph::Node* node,
const std::vector<TensorViewWrapper>& args,
const std::vector<TensorViewWrapper>& out)
{
auto convolution = static_cast<const OP*>(node);
// For dilation, MKLDNN wants to know how many elements to insert between, not how far
// apart to space the elements like nGraph. So we have to subtract 1 from each pos.
// For dilation, MKLDNN wants to know how many elements to insert between, not
// how far apart to space the elements like nGraph. So we have to subtract 1
// from each pos.
Strides window_dilation_strides_adjusted;
auto* convolution = static_cast<const OpTy*>(node);
for (size_t s : convolution->get_window_dilation_strides())
{
window_dilation_strides_adjusted.push_back(s - 1);
......@@ -219,22 +239,21 @@ namespace ngraph
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
mkldnn::post_ops ops;
if (std::is_same<OP, ngraph::op::ConvolutionBiasAdd>() ||
std::is_same<OP, ngraph::op::ConvolutionAdd>())
if (std::is_same<OpTy, ngraph::op::ConvolutionBiasAdd>() ||
std::is_same<OpTy, ngraph::op::ConvolutionAdd>())
{
ops.append_sum(1.f);
}
if (std::is_same<OP, ngraph::op::QuantizedConvolutionBiasAdd>() ||
std::is_same<OP, ngraph::op::QuantizedConvolutionBiasSignedAdd>())
if (std::is_same<OpTy, ngraph::op::QuantizedConvolutionBiasAdd>() ||
std::is_same<OpTy, ngraph::op::QuantizedConvolutionBiasSignedAdd>())
{
auto sum_scale_val =
extract_scale_value<ngraph::op::QuantizedConvolutionBiasAdd>(node, 5);
ops.append_sum(sum_scale_val[0]);
}
if (has_relu<OP>(node))
if (has_relu<OpTy>(node))
{
const float ops_scale = 1.f;
const float ops_alpha = -0.f; // relu negative slope
......@@ -243,8 +262,8 @@ namespace ngraph
ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
}
if (std::is_same<OP, ngraph::op::ConvolutionBias>() ||
std::is_same<OP, ngraph::op::ConvolutionBiasAdd>())
if (std::is_same<OpTy, ngraph::op::ConvolutionBias>() ||
std::is_same<OpTy, ngraph::op::ConvolutionBiasAdd>())
{
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
return build_convolution_forward(data_desc,
......@@ -257,10 +276,10 @@ namespace ngraph
convolution->get_padding_above(),
ops);
}
else if (std::is_same<OP, ngraph::op::QuantizedConvolution>() ||
std::is_same<OP, ngraph::op::QuantizedConvolutionRelu>())
else if (std::is_same<OpTy, ngraph::op::QuantizedConvolution>() ||
std::is_same<OpTy, ngraph::op::QuantizedConvolutionRelu>())
{
auto scale_val = extract_scale_value<OP>(node, 2);
auto scale_val = extract_scale_value<OpTy>(node, 2);
return build_quantized_convolution_forward(
data_desc,
weights_desc,
......@@ -272,13 +291,13 @@ namespace ngraph
scale_val[0],
ops);
}
else if (std::is_same<OP, ngraph::op::QuantizedConvolutionBias>() ||
std::is_same<OP, ngraph::op::QuantizedConvolutionBiasAdd>() ||
std::is_same<OP, ngraph::op::QuantizedConvolutionBiasSignedAdd>())
else if (std::is_same<OpTy, ngraph::op::QuantizedConvolutionBias>() ||
std::is_same<OpTy, ngraph::op::QuantizedConvolutionBiasAdd>() ||
std::is_same<OpTy, ngraph::op::QuantizedConvolutionBiasSignedAdd>())
{
int index =
std::is_same<OP, ngraph::op::QuantizedConvolutionBias>() ? 3 : 4;
auto scale_val = extract_scale_value<OP>(node, index);
std::is_same<OpTy, ngraph::op::QuantizedConvolutionBias>() ? 3 : 4;
auto scale_val = extract_scale_value<OpTy>(node, index);
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
return build_quantized_convolution_forward(
data_desc,
......@@ -355,6 +374,31 @@ namespace ngraph
}
}
void build_quantized_convolution_forward(
const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const float scale,
const Node* node,
const mkldnn::post_ops& pops = mkldnn::post_ops());
void build_quantized_convolution_forward(
const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& bias_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above,
const float scale,
const Node* node,
const mkldnn::post_ops& pops = mkldnn::post_ops());
mkldnn::memory::format query_convolution_forward_weight_format(
const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc_any,
......@@ -409,81 +453,6 @@ namespace ngraph
const mkldnn::convolution_forward::desc& fwd_desc,
size_t conv_index);
template <typename OP>
size_t build_convolution_backward(const ngraph::Node* node,
const std::vector<TensorViewWrapper>& args,
const std::vector<TensorViewWrapper>& out)
{
auto convolution = static_cast<const OP*>(node);
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides_forward())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto arg0_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto arg1_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto out0_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
if (std::is_same<OP, ngraph::op::ConvolutionBackpropData>())
{
// MKLDNN relies on named formats for kernel selection
if (arg0_desc.data.format == mkldnn_nchw)
{
arg0_desc.data.format = mkldnn_oihw;
}
if (arg0_desc.data.format == mkldnn_ncdhw)
{
arg0_desc.data.format = mkldnn_oidhw;
}
return build_convolution_backward_data(
arg0_desc,
arg1_desc,
out0_desc,
convolution->get_window_movement_strides_forward(),
window_dilation_strides_adjusted,
convolution->get_padding_below_forward(),
convolution->get_padding_above_forward());
}
if (std::is_same<OP, ngraph::op::ConvolutionBackpropFilters>())
{
return build_convolution_backward_weights(
arg0_desc,
arg1_desc,
out0_desc,
convolution->get_window_movement_strides_forward(),
window_dilation_strides_adjusted,
convolution->get_padding_below_forward(),
convolution->get_padding_above_forward());
}
if (std::is_same<OP, ngraph::op::ConvolutionBiasBackpropFiltersBias>())
{
auto out1_desc = mkldnn_utils::get_output_mkldnn_md(node, 1);
return build_convolution_backward_weights_bias(
arg0_desc,
arg1_desc,
out0_desc,
out1_desc,
convolution->get_window_movement_strides_forward(),
window_dilation_strides_adjusted,
convolution->get_padding_below_forward(),
convolution->get_padding_above_forward());
}
throw ngraph_error(std::string("Unknown op ") + convolution->get_name());
}
size_t build_pooling_forward(mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& window_strides,
const ngraph::Shape& window_shape,
const ngraph::Shape& padding_below,
const ngraph::Shape& padding_above);
template <typename OP>
mkldnn::pooling_forward::desc get_avg_pooling_forward_desc(const ngraph::Node* node,
bool training)
......@@ -532,6 +501,14 @@ namespace ngraph
}
}
size_t build_pooling_forward(mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& window_strides,
const ngraph::Shape& window_shape,
const ngraph::Shape& padding_below,
const ngraph::Shape& padding_above);
template <typename OP>
mkldnn::pooling_forward::desc get_max_pooling_forward_desc(const ngraph::Node* node,
bool training)
......@@ -778,6 +755,56 @@ namespace ngraph
void build_elementwise_add(const mkldnn::sum::primitive_desc& sum_pd,
size_t add_index);
template <typename OpTy>
size_t build_batch_norm_primitive(const Node* node,
const bool append_relu,
const bool training)
{
const auto& args = node->get_inputs();
mkldnn::post_ops ops;
if (append_relu)
{
const float ops_scale = 1.f;
const float ops_alpha = -0.f; // relu negative slope
const float ops_beta = 0.f;
ops.append_eltwise(
ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
}
auto weights_shape =
Shape{2, args[0].get_tensor().get_tensor_layout()->get_size()};
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto weights_desc = build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto results_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
bool use_global_stats;
const mkldnn::memory::desc *mean_desc, *variance_desc;
if (training && args.size() == 3)
{
mean_desc = &mkldnn_utils::get_output_mkldnn_md(node, 1);
variance_desc = &mkldnn_utils::get_output_mkldnn_md(node, 2);
use_global_stats = false;
}
else
{
mean_desc = &mkldnn_utils::get_input_mkldnn_md(node, 3);
variance_desc = &mkldnn_utils::get_input_mkldnn_md(node, 4);
use_global_stats = true;
}
const OpTy* batchnorm = static_cast<const OpTy*>(node);
return build_batchnorm_forward(input_desc,
weights_desc,
results_desc,
*mean_desc,
*variance_desc,
batchnorm->get_eps_value(),
use_global_stats,
training,
ops);
}
size_t build_batchnorm_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& weights_desc,
......@@ -843,10 +870,10 @@ namespace ngraph
size_t batchnorm_index);
template <typename OP>
size_t build_rnn(const ngraph::Node* node,
const std::vector<TensorViewWrapper>& args,
const std::vector<TensorViewWrapper>& out)
size_t build_rnn(const ngraph::Node* node)
{
const auto& out = node->get_outputs();
const auto& args = node->get_inputs();
auto rnn_node = static_cast<const OP*>(node);
auto src_sequence_length_max =
static_cast<unsigned long>(rnn_node->get_src_sequence_length());
......@@ -1062,28 +1089,6 @@ namespace ngraph
return scale_val;
}
template <typename OP,
typename std::enable_if<
(std::is_same<OP, ngraph::op::Convolution>::value ||
std::is_same<OP, ngraph::op::QuantizedConvolution>::value ||
std::is_same<OP, ngraph::op::GroupConvolution>::value),
std::nullptr_t>::type = nullptr>
bool has_relu(const ngraph::Node* node)
{
return false;
}
template <typename OP,
typename std::enable_if<
(!std::is_same<OP, ngraph::op::Convolution>::value &&
!std::is_same<OP, ngraph::op::QuantizedConvolution>::value &&
!std::is_same<OP, ngraph::op::GroupConvolution>::value),
std::nullptr_t>::type = nullptr>
bool has_relu(const ngraph::Node* node)
{
return static_cast<const OP*>(node)->with_relu();
}
template <typename OP>
bool has_bias()
{
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "cpu_mkldnn_primitive_build.hpp"
#include "ngraph/op/add.hpp"
#include "ngraph/op/avg_pool.hpp"
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convert.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/experimental/quantized_avg_pool.hpp"
#include "ngraph/op/experimental/quantized_avg_pool.hpp"
#include "ngraph/op/experimental/quantized_concat.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/op/experimental/quantized_max_pool.hpp"
#include "ngraph/op/experimental/quantized_max_pool.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/lrn.hpp"
#include "ngraph/op/max_pool.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/relu.hpp"
#include "ngraph/op/replace_slice.hpp"
#include "ngraph/op/slice.hpp"
#include "ngraph/op/softmax.hpp"
#include "ngraph/runtime/cpu/cpu_executor.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
#include "ngraph/runtime/cpu/mkldnn_emitter.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
#include "ngraph/runtime/cpu/op/rnn.hpp"
#include "ngraph/runtime/cpu/op/sigmoid.hpp"
#include "ngraph/runtime/cpu/op/update_slice.hpp"
using namespace ngraph;
using namespace ngraph::op;
using namespace ngraph::runtime::cpu;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace pass
{
// The following functions build the MKLDNN primitive for each type of nGraph Node.
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Add)
{
std::vector<float> scale_vector(2, 1);
std::vector<mkldnn::memory::primitive_desc> inputs_pd;
auto input0_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto input1_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
inputs_pd.push_back(mkldnn::memory::primitive_desc(
input0_data_desc, executor::global_cpu_engine));
inputs_pd.push_back(mkldnn::memory::primitive_desc(
input1_data_desc, executor::global_cpu_engine));
return mkldnn_emitter.build_elementwise_add(
input0_data_desc, input1_data_desc, result_desc, scale_vector, inputs_pd);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Lstm)
{
return mkldnn_emitter.build_rnn<Lstm>(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Rnn)
{
return mkldnn_emitter.build_rnn<Rnn>(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(BatchNormTraining)
{
return mkldnn_emitter.build_batch_norm_primitive<BatchNormInference>(
node, false /*Append relu*/, true /*Training*/);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(BatchNormInference)
{
return mkldnn_emitter.build_batch_norm_primitive<BatchNormInference>(
node, false /*Append relu*/, false /*Training*/);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(BatchNormTrainingRelu)
{
return mkldnn_emitter.build_batch_norm_primitive<BatchNormTrainingRelu>(
node, true /*Append relu*/, true /*Training*/);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(BatchNormInferenceRelu)
{
return mkldnn_emitter.build_batch_norm_primitive<BatchNormInferenceRelu>(
node, true /*Append relu*/, false /*Training*/);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(BatchNormTrainingBackprop)
{
const auto& args = node->get_inputs();
auto weights_shape =
Shape{2, args[0].get_tensor().get_tensor_layout()->get_size()};
auto weights_desc = mkldnn_emitter.build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto mean_desc = mkldnn_utils::get_input_mkldnn_md(node, 3);
auto variance_desc = mkldnn_utils::get_input_mkldnn_md(node, 4);
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 5);
auto dinput_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto dweights_desc = mkldnn_emitter.build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
const auto* batchnorm = static_cast<const BatchNormTrainingBackprop*>(node);
return mkldnn_emitter.build_batchnorm_backward(weights_desc,
input_desc,
mean_desc,
variance_desc,
delta_desc,
dinput_desc,
dweights_desc,
batchnorm->get_eps_value());
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Concat)
{
std::vector<mkldnn::memory::desc> inputs_data_desc;
for (size_t i = 0, end = node->get_inputs().size(); i < end; i++)
{
inputs_data_desc.push_back(mkldnn_utils::get_input_mkldnn_md(node, i));
}
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t concat_dim =
(static_cast<const Concat*>(node))->get_concatenation_axis();
return mkldnn_emitter.build_concat(inputs_data_desc, result_desc, concat_dim);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(LRN)
{
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
const auto* lrn = static_cast<const LRN*>(node);
return mkldnn_emitter.build_lrn_forward(input_data_desc,
result_desc,
static_cast<float>(lrn->get_alpha()),
static_cast<float>(lrn->get_beta()),
static_cast<float>(lrn->get_bias()),
static_cast<int>(lrn->get_nsize()));
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Slice)
{
const auto& out = node->get_outputs();
const Slice* slice = static_cast<const Slice*>(node);
auto out_shape = out[0].get_shape();
auto lower_bounds = slice->get_lower_bounds();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn_emitter.build_slice(
input_desc, result_desc, lower_bounds, out_shape);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(ConvolutionRelu)
{
return mkldnn_emitter.build_convolution<ConvolutionRelu>(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(QuantizedConvolutionRelu)
{
return mkldnn_emitter.build_convolution<QuantizedConvolutionRelu>(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(QuantizedConvolution)
{
return mkldnn_emitter.build_convolution<QuantizedConvolution>(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(GroupConvolution)
{
Strides window_dilation_strides_adjusted;
auto convolution = static_cast<const ngraph::op::GroupConvolution*>(node);
for (size_t s : convolution->get_window_dilation_strides())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto padding_below = convolution->get_padding_below();
auto padding_above = convolution->get_padding_above();
auto filter_strides = convolution->get_window_movement_strides();
return mkldnn_emitter.build_convolution_forward(
input_data_desc,
weights_desc,
result_desc,
filter_strides,
window_dilation_strides_adjusted,
padding_below,
padding_above);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(GroupConvolutionBias)
{
Strides window_dilation_strides_adjusted;
auto convolution = static_cast<const ngraph::op::GroupConvolutionBias*>(node);
for (size_t s : convolution->get_window_dilation_strides())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto padding_below = convolution->get_padding_below();
auto padding_above = convolution->get_padding_above();
auto filter_strides = convolution->get_window_movement_strides();
const float ops_scale = 1.f;
const float ops_alpha = -0.f; // relu negative slope
const float ops_beta = 0.f;
mkldnn::post_ops ops;
if (convolution->with_relu())
{
ops.append_eltwise(
ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
}
return mkldnn_emitter.build_convolution_forward(
input_data_desc,
weights_desc,
bias_desc,
result_desc,
filter_strides,
window_dilation_strides_adjusted,
padding_below,
padding_above,
ops);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Convolution)
{
return mkldnn_emitter.build_convolution<Convolution>(node);
}
template <typename OpTy>
size_t build_convolution_backward(MKLDNNEmitter& mkldnn_emitter,
const ngraph::Node* node)
{
auto convolution = static_cast<const OpTy*>(node);
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides_forward())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto arg0_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto arg1_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto out0_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
if (std::is_same<OpTy, ngraph::op::ConvolutionBackpropData>())
{
// MKLDNN relies on named formats for kernel selection
if (arg0_desc.data.format == mkldnn_nchw)
{
arg0_desc.data.format = mkldnn_oihw;
}
if (arg0_desc.data.format == mkldnn_ncdhw)
{
arg0_desc.data.format = mkldnn_oidhw;
}
return mkldnn_emitter.build_convolution_backward_data(
arg0_desc,
arg1_desc,
out0_desc,
convolution->get_window_movement_strides_forward(),
window_dilation_strides_adjusted,
convolution->get_padding_below_forward(),
convolution->get_padding_above_forward());
}
if (std::is_same<OpTy, ngraph::op::ConvolutionBackpropFilters>())
{
return mkldnn_emitter.build_convolution_backward_weights(
arg0_desc,
arg1_desc,
out0_desc,
convolution->get_window_movement_strides_forward(),
window_dilation_strides_adjusted,
convolution->get_padding_below_forward(),
convolution->get_padding_above_forward());
}
if (std::is_same<OpTy, ngraph::op::ConvolutionBiasBackpropFiltersBias>())
{
auto out1_desc = mkldnn_utils::get_output_mkldnn_md(node, 1);
return mkldnn_emitter.build_convolution_backward_weights_bias(
arg0_desc,
arg1_desc,
out0_desc,
out1_desc,
convolution->get_window_movement_strides_forward(),
window_dilation_strides_adjusted,
convolution->get_padding_below_forward(),
convolution->get_padding_above_forward());
}
throw ngraph_error(std::string("Unknown op ") + convolution->get_name());
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(ConvolutionBackpropFilters)
{
return build_convolution_backward<ConvolutionBackpropFilters>(mkldnn_emitter,
node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(ConvolutionBackpropData)
{
return build_convolution_backward<ConvolutionBackpropData>(mkldnn_emitter,
node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(QuantizedConvolutionBias)
{
return mkldnn_emitter.build_convolution<QuantizedConvolutionBias>(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(QuantizedConvolutionBiasAdd)
{
return mkldnn_emitter.build_convolution<QuantizedConvolutionBiasAdd>(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(
QuantizedConvolutionBiasSignedAdd)
{
return mkldnn_emitter.build_convolution<QuantizedConvolutionBiasSignedAdd>(
node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(ConvolutionBias)
{
return mkldnn_emitter.build_convolution<ConvolutionBias>(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(ConvolutionBiasAdd)
{
return mkldnn_emitter.build_convolution<ConvolutionBiasAdd>(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(ConvolutionAdd)
{
return mkldnn_emitter.build_convolution<ConvolutionAdd>(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(
ConvolutionBiasBackpropFiltersBias)
{
return build_convolution_backward<ConvolutionBiasBackpropFiltersBias>(
mkldnn_emitter, node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(MaxPool)
{
auto max_pool = static_cast<const ngraph::op::MaxPool*>(node);
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn_emitter.build_pooling_forward(
mkldnn::algorithm::pooling_max,
input_desc,
result_desc,
max_pool->get_window_movement_strides(),
max_pool->get_window_shape(),
max_pool->get_padding_below(),
max_pool->get_padding_above());
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(QuantizedMaxPool)
{
return mkldnn_emitter.build_quantized_max_pool(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(QuantizedAvgPool)
{
return mkldnn_emitter.build_quantized_avg_pool(node);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(MaxPoolWithIndices)
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto max_pool = static_cast<const ngraph::op::MaxPoolWithIndices*>(node);
return mkldnn_emitter.build_max_pooling_with_indices_forward(
mkldnn::algorithm::pooling_max,
input_desc,
result_desc,
max_pool->get_window_movement_strides(),
max_pool->get_window_shape(),
max_pool->get_padding_below(),
max_pool->get_padding_above());
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(AvgPool)
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto avg_pool = static_cast<const ngraph::op::AvgPool*>(node);
return mkldnn_emitter.build_pooling_forward(
(avg_pool->get_include_padding_in_avg_computation()
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding),
input_desc,
result_desc,
avg_pool->get_window_movement_strides(),
avg_pool->get_window_shape(),
avg_pool->get_padding_below(),
avg_pool->get_padding_above());
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(AvgPoolBackprop)
{
auto diff_dst_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto diff_src_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto apb = static_cast<const ngraph::op::AvgPoolBackprop*>(node);
return mkldnn_emitter.build_pooling_backward(
(apb->get_include_padding_in_avg_computation()
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding),
diff_dst_desc,
diff_src_desc,
apb->get_window_movement_strides(),
apb->get_window_shape(),
apb->get_padding_below(),
apb->get_padding_above());
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(MaxPoolBackprop)
{
auto fprop_src_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto diff_dst_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto diff_src_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto mpb = static_cast<const ngraph::op::MaxPoolBackprop*>(node);
return mkldnn_emitter.build_max_pooling_backward(
mkldnn::algorithm::pooling_max,
fprop_src_desc,
diff_dst_desc,
diff_src_desc,
mpb->get_window_movement_strides(),
mpb->get_window_shape(),
mpb->get_padding_below(),
mpb->get_padding_above());
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(MaxPoolWithIndicesBackprop)
{
auto diff_dst_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto diff_src_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto mpb = static_cast<const ngraph::op::MaxPoolWithIndicesBackprop*>(node);
return mkldnn_emitter.build_max_pooling_with_indices_backward(
mkldnn::algorithm::pooling_max,
diff_dst_desc,
diff_src_desc,
mpb->get_window_movement_strides(),
mpb->get_window_shape(),
mpb->get_padding_below(),
mpb->get_padding_above());
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(
ngraph::runtime::cpu::op::ConvertLayout)
{
const auto& args = node->get_inputs();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
// This is a special case to handle nchw(oihw) to goihw/Goihw16g/Goihw8g for
// GroupConvolution's weights.
if (input_desc.data.format == mkldnn_nchw &&
result_desc.data.format == mkldnn_goihw)
{
input_desc = result_desc;
}
else if (input_desc.data.format == mkldnn_nchw &&
input_desc.data.ndims == 4 /*nchw*/ &&
result_desc.data.ndims == 5 /*Goihw16g/Goihw8g/etc*/ &&
node->get_users().size() == 1)
{
Shape weights_shape_groups;
if (auto gconv = std::dynamic_pointer_cast<ngraph::op::GroupConvolution>(
node->get_users()[0]))
{
weights_shape_groups = gconv->get_weights_dimensions();
}
else if (auto gconvb =
std::dynamic_pointer_cast<ngraph::op::GroupConvolutionBias>(
node->get_users()[0]))
{
weights_shape_groups = gconvb->get_weights_dimensions();
}
else
{
throw ngraph_error(
"Incompatible input/output shape in ConvertLayout op");
}
input_desc = mkldnn::memory::desc(
mkldnn::memory::dims(weights_shape_groups.begin(),
weights_shape_groups.end()),
mkldnn_utils::get_mkldnn_data_type(args[0].get_element_type()),
mkldnn::memory::format::goihw);
}
return mkldnn_emitter.build_reorder(input_desc, result_desc);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(ReluBackprop)
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn_emitter.build_relu_backward(input_desc, delta_desc, result_desc);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Relu)
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn_emitter.build_relu_forward(input_desc, result_desc);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(LeakyRelu)
{
auto leaky_relu_node = static_cast<const ngraph::op::LeakyRelu*>(node);
float alpha = leaky_relu_node->get_alpha();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn_emitter.build_leaky_relu(input_desc, result_desc, alpha);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(BoundedRelu)
{
auto bounded_relu_node = static_cast<const ngraph::op::BoundedRelu*>(node);
float alpha = bounded_relu_node->get_alpha();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn_emitter.build_bounded_relu(input_desc, result_desc, alpha);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Sigmoid)
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn_emitter.build_sigmoid_forward(input_desc, result_desc);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(SigmoidBackprop)
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn_emitter.build_sigmoid_backward(
input_desc, delta_desc, result_desc);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Softmax)
{
auto softmax = static_cast<const ngraph::op::Softmax*>(node);
if (softmax->get_axes().size() != 1)
{
throw ngraph_error("MKLDNN supports softmax only across single axis");
}
int softmax_axis = static_cast<int>(*(softmax->get_axes().begin()));
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn_emitter.build_softmax_forward(
input_desc, result_desc, softmax_axis);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Dequantize)
{
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn_emitter.build_dequantization(node, input_data_desc, result_desc);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(Quantize)
{
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto quantize = static_cast<const ngraph::op::Quantize*>(node);
auto scale_const_op =
std::dynamic_pointer_cast<Constant>(quantize->get_argument(1));
if (scale_const_op == nullptr)
{
throw ngraph_error("Quantize scale must be a constant");
}
auto scale = scale_const_op->get_vector<float>();
std::vector<float> scales;
scales.push_back(1.0 / scale[0]);
return mkldnn_emitter.build_quantize_reorder(
input_data_desc, result_desc, scales);
}
template <>
size_t MKLDNNPrimitiveBuildPass::BUILD_PRIMITIVE_DECL(QuantizedConcat)
{
int args_size = node->get_inputs().size();
std::vector<mkldnn::memory::desc> inputs_data_desc;
for (size_t i = 0; i < args_size; i++)
{
inputs_data_desc.push_back(mkldnn_utils::get_input_mkldnn_md(node, i));
}
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t concat_dim =
(static_cast<const QuantizedConcat*>(node))->get_concatenation_axis();
return mkldnn_emitter.build_concat(inputs_data_desc, result_desc, concat_dim);
}
}
}
}
}
using namespace ngraph::runtime::cpu::pass;
#define TI(x) std::type_index(typeid(x))
static const PrimitiveBuildOpMap prim_build_dispatcher{
{TI(Add), &MKLDNNPrimitiveBuildPass::build_primitive<Add>},
{TI(Concat), &MKLDNNPrimitiveBuildPass::build_primitive<Concat>},
{TI(Convert), &MKLDNNPrimitiveBuildPass::build_primitive<Convert>},
{TI(runtime::cpu::op::ConvertLayout),
&MKLDNNPrimitiveBuildPass::build_primitive<runtime::cpu::op::ConvertLayout>},
{TI(AvgPool), &MKLDNNPrimitiveBuildPass::build_primitive<AvgPool>},
{TI(AvgPoolBackprop), &MKLDNNPrimitiveBuildPass::build_primitive<AvgPoolBackprop>},
{TI(BatchNormTraining), &MKLDNNPrimitiveBuildPass::build_primitive<BatchNormTraining>},
{TI(BatchNormInference), &MKLDNNPrimitiveBuildPass::build_primitive<BatchNormInference>},
{TI(BoundedRelu), &MKLDNNPrimitiveBuildPass::build_primitive<BoundedRelu>},
{TI(BatchNormTrainingBackprop),
&MKLDNNPrimitiveBuildPass::build_primitive<BatchNormTrainingBackprop>},
{TI(Convolution), &MKLDNNPrimitiveBuildPass::build_primitive<Convolution>},
{TI(GroupConvolution), &MKLDNNPrimitiveBuildPass::build_primitive<GroupConvolution>},
{TI(ConvolutionRelu), &MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionRelu>},
{TI(ConvolutionBiasAdd), &MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionBiasAdd>},
{TI(BatchNormTrainingRelu), &MKLDNNPrimitiveBuildPass::build_primitive<BatchNormTrainingRelu>},
{TI(BatchNormInferenceRelu),
&MKLDNNPrimitiveBuildPass::build_primitive<BatchNormInferenceRelu>},
{TI(ConvolutionBackpropData),
&MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionBackpropData>},
{TI(ConvolutionBackpropFilters),
&MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionBackpropFilters>},
{TI(MaxPool), &MKLDNNPrimitiveBuildPass::build_primitive<MaxPool>},
{TI(MaxPoolWithIndices), &MKLDNNPrimitiveBuildPass::build_primitive<MaxPoolWithIndices>},
{TI(MaxPoolBackprop), &MKLDNNPrimitiveBuildPass::build_primitive<MaxPoolBackprop>},
{TI(MaxPoolWithIndicesBackprop),
&MKLDNNPrimitiveBuildPass::build_primitive<MaxPoolWithIndicesBackprop>},
{TI(ConvolutionBias), &MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionBias>},
{TI(QuantizedConvolution), &MKLDNNPrimitiveBuildPass::build_primitive<QuantizedConvolution>},
{TI(ConvolutionBiasBackpropFiltersBias),
&MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionBiasBackpropFiltersBias>},
{TI(LRN), &MKLDNNPrimitiveBuildPass::build_primitive<LRN>},
{TI(Relu), &MKLDNNPrimitiveBuildPass::build_primitive<Relu>},
{TI(ReluBackprop), &MKLDNNPrimitiveBuildPass::build_primitive<ReluBackprop>},
{TI(LeakyRelu), &MKLDNNPrimitiveBuildPass::build_primitive<LeakyRelu>},
{TI(Sigmoid), &MKLDNNPrimitiveBuildPass::build_primitive<Sigmoid>},
{TI(SigmoidBackprop), &MKLDNNPrimitiveBuildPass::build_primitive<SigmoidBackprop>},
{TI(Lstm), &MKLDNNPrimitiveBuildPass::build_primitive<Lstm>},
{TI(Rnn), &MKLDNNPrimitiveBuildPass::build_primitive<Rnn>},
{TI(QuantizedMaxPool), &MKLDNNPrimitiveBuildPass::build_primitive<QuantizedMaxPool>},
{TI(QuantizedAvgPool), &MKLDNNPrimitiveBuildPass::build_primitive<QuantizedAvgPool>},
{TI(Softmax), &MKLDNNPrimitiveBuildPass::build_primitive<Softmax>},
{TI(Slice), &MKLDNNPrimitiveBuildPass::build_primitive<Slice>},
{TI(ReplaceSlice), &MKLDNNPrimitiveBuildPass::build_primitive<ReplaceSlice>},
{TI(UpdateSlice), &MKLDNNPrimitiveBuildPass::build_primitive<UpdateSlice>},
{TI(ConvolutionAdd), &MKLDNNPrimitiveBuildPass::build_primitive<ConvolutionAdd>},
{TI(QuantizedConvolutionRelu),
&MKLDNNPrimitiveBuildPass::build_primitive<QuantizedConvolutionRelu>},
{TI(QuantizedConvolutionBias),
&MKLDNNPrimitiveBuildPass::build_primitive<QuantizedConvolutionBias>},
{TI(QuantizedConvolutionBiasAdd),
&MKLDNNPrimitiveBuildPass::build_primitive<QuantizedConvolutionBiasAdd>},
{TI(QuantizedConvolutionBiasSignedAdd),
&MKLDNNPrimitiveBuildPass::build_primitive<QuantizedConvolutionBiasSignedAdd>},
{TI(GroupConvolutionBias), &MKLDNNPrimitiveBuildPass::build_primitive<GroupConvolutionBias>},
{TI(Quantize), &MKLDNNPrimitiveBuildPass::build_primitive<Quantize>},
{TI(Dequantize), &MKLDNNPrimitiveBuildPass::build_primitive<Dequantize>},
{TI(QuantizedConcat), &MKLDNNPrimitiveBuildPass::build_primitive<QuantizedConcat>},
{TI(GetOutputElement), &MKLDNNPrimitiveBuildPass::build_primitive<GetOutputElement>},
};
bool MKLDNNPrimitiveBuildPass::run_on_call_graph(const std::list<std::shared_ptr<Node>>& nodes)
{
for (const auto& shp_node : nodes)
{
Node* node = shp_node.get();
if (mkldnn_utils::use_mkldnn_kernel(node))
{
auto handler = prim_build_dispatcher.find(TI(*node));
NGRAPH_ASSERT(handler != prim_build_dispatcher.end())
<< "Unsupported node '" << node->description() << "' in MKLDNNPrimitiveBuildPass";
size_t primitive_idx = handler->second(m_mkldnn_emitter, node);
m_node_primitive_idx_map[node] = primitive_idx;
}
}
return false;
}
#undef TI
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/pass/pass.hpp"
#include <functional>
#include <typeindex>
#include <unordered_map>
#define BUILD_PRIMITIVE_DECL(op_name) \
build_primitive<op_name>(ngraph::runtime::cpu::MKLDNNEmitter & mkldnn_emitter, \
ngraph::Node * node)
namespace mkldnn
{
class primitive;
}
namespace ngraph
{
class Node;
namespace runtime
{
namespace cpu
{
class MKLDNNEmitter;
namespace pass
{
using PrimitiveBuildFunction =
std::function<size_t(ngraph::runtime::cpu::MKLDNNEmitter&, ngraph::Node*)>;
using PrimitiveBuildOpMap =
std::unordered_map<std::type_index, PrimitiveBuildFunction>;
/// This pass traverses the call graph and creates MKLDNN primitives for those ops
/// that have been assigned to MKLDNN.
class MKLDNNPrimitiveBuildPass : public ngraph::pass::CallGraphPass
{
private:
ngraph::runtime::cpu::MKLDNNEmitter& m_mkldnn_emitter;
/// External map to store each node with mkldnn implementation and its mkldnn
/// associated primitive index.
std::unordered_map<const Node*, size_t>& m_node_primitive_idx_map;
public:
MKLDNNPrimitiveBuildPass(
ngraph::runtime::cpu::MKLDNNEmitter& mkldnn_emitter,
std::unordered_map<const Node*, size_t>& node_primitive_idx_map)
: m_mkldnn_emitter(mkldnn_emitter)
, m_node_primitive_idx_map(node_primitive_idx_map)
{
}
bool run_on_call_graph(const std::list<std::shared_ptr<Node>>& nodes) override;
template <typename OP>
static size_t
build_primitive(ngraph::runtime::cpu::MKLDNNEmitter& mkldnn_emitter,
ngraph::Node* node)
{
throw std::runtime_error("Unimplemented op '" + node->description() +
"' in MKLDNNPrimitiveBuildPass");
}
};
}
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment