Commit eda11da7 authored by Amy Zhuang's avatar Amy Zhuang Committed by Adam Procter

Refactor to create MKLDNN primitives on the first iteration: (#2363)

* Refactor to create MKLDNN primitives on the first iteration:
  add, avg_pool, batch_norm, bounded_relu, concat, convert_layout,
  leaky_relu, lrn, max_pool, quantized_avg_pool, quantized_max_pool,
  relu, sigmoid, slice, softmax.

* Refactor to create MKLDNN primitives on the first iteration:
  pooling backward, convolution.

* Refactor to create MKLDNN primitives on the first iteration:
  convolution backward, rnn, lstm, quantization, dequantization.

* Delete one duplicate declaration.

* Create and pass mkldnn descriptors/primitive-descriptors for ops.

* Create and pass mkldnn descriptors for convolution backward ops.

* Remove one unused variable.

* Remove unused variables.

* Remove unused variables.

* Address PR feedback.

* Fix a bug.

* Add one parameter to build_quantize_reorder.

* Address PR feedback.

* Fix bi-rnn issue.
parent c571b7a7
...@@ -38,28 +38,22 @@ namespace ngraph ...@@ -38,28 +38,22 @@ namespace ngraph
{ {
auto& functors = external_function->get_functors(); auto& functors = external_function->get_functors();
vector<float> scale_vector(2, 1);
vector<mkldnn::memory::primitive_desc> inputs_pd;
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input0_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto sum_pd = mkldnn_emitter->get_elementwise_add_desc(node);
auto input1_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 1); // Add needs 4 primitives: input0, input1, result, and sum.
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); size_t add_index = mkldnn_emitter->reserve_primitive_space(4);
inputs_pd.push_back(mkldnn::memory::primitive_desc(
input0_data_desc, runtime::cpu::executor::global_cpu_engine));
inputs_pd.push_back(mkldnn::memory::primitive_desc(
input1_data_desc, runtime::cpu::executor::global_cpu_engine));
size_t add_index = mkldnn_emitter->build_elementwise_add(
input0_data_desc, input1_data_desc, result_desc, scale_vector, inputs_pd);
auto& deps = mkldnn_emitter->get_primitive_deps(add_index); auto& deps = mkldnn_emitter->get_primitive_deps(add_index);
auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name()); auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name()); auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name()); auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto functor = [&, add_index](CPURuntimeContext* ctx, auto functor = [&, sum_pd, add_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_elementwise_add(sum_pd, add_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
......
...@@ -52,24 +52,19 @@ namespace ngraph ...@@ -52,24 +52,19 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto avg_pool_desc =
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); mkldnn_emitter->get_avg_pooling_forward_desc<ngraph::op::AvgPool>(node,
false);
size_t avg_pool_index = mkldnn_emitter->build_pooling_forward( // AvgPool needs 3 primitives: input, result, and pooling_forward.
(include_padding_in_avg_computation size_t avg_pool_index = mkldnn_emitter->reserve_primitive_space(3);
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding),
input_desc,
result_desc,
window_movement_strides,
window_shape,
padding_below,
padding_above);
auto& deps = mkldnn_emitter->get_primitive_deps(avg_pool_index); auto& deps = mkldnn_emitter->get_primitive_deps(avg_pool_index);
auto functor = [&, avg_pool_index](CPURuntimeContext* ctx, auto functor = [&, avg_pool_desc, avg_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_pooling_forward(avg_pool_desc, avg_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, avg_pool_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, avg_pool_index);
...@@ -130,23 +125,23 @@ namespace ngraph ...@@ -130,23 +125,23 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto diff_dst_desc = runtime::cpu::mkldnn_utils::get_input_mkldnn_md(node, 0); auto avg_pool_fwd_desc =
auto diff_src_desc = runtime::cpu::mkldnn_utils::get_output_mkldnn_md(node, 0); mkldnn_emitter->get_avg_pooling_forward_desc<ngraph::op::AvgPoolBackprop>(
node, true);
size_t avg_pool_index = mkldnn_emitter->build_pooling_backward( auto avg_pool_desc =
(apb->get_include_padding_in_avg_computation() mkldnn_emitter->get_avg_pooling_backward_desc<ngraph::op::AvgPoolBackprop>(
? mkldnn::algorithm::pooling_avg_include_padding node);
: mkldnn::algorithm::pooling_avg_exclude_padding), // AvgPoolBackprop needs 3 primitives: input, result, and pooling_backward.
diff_dst_desc, size_t avg_pool_index = mkldnn_emitter->reserve_primitive_space(3);
diff_src_desc,
apb->get_window_movement_strides(),
apb->get_window_shape(),
apb->get_padding_below(),
apb->get_padding_above());
auto& deps = mkldnn_emitter->get_primitive_deps(avg_pool_index); auto& deps = mkldnn_emitter->get_primitive_deps(avg_pool_index);
auto functor = [&, avg_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { auto functor = [&, avg_pool_desc, avg_pool_fwd_desc, avg_pool_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_pooling_backward(
avg_pool_desc, avg_pool_fwd_desc, avg_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], delta_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], delta_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, avg_pool_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, avg_pool_index);
......
...@@ -48,8 +48,6 @@ namespace ngraph ...@@ -48,8 +48,6 @@ namespace ngraph
auto& arg2_tensor = external_function->get_tensor_data(args[2].get_name()); auto& arg2_tensor = external_function->get_tensor_data(args[2].get_name());
auto& out0_tensor = external_function->get_tensor_data(out[0].get_name()); auto& out0_tensor = external_function->get_tensor_data(out[0].get_name());
const OP* batchnorm = static_cast<const OP*>(node);
// Kill clang diagnostics bug // Kill clang diagnostics bug
#pragma clang diagnostic push #pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wmissing-braces" #pragma clang diagnostic ignored "-Wmissing-braces"
...@@ -80,28 +78,32 @@ namespace ngraph ...@@ -80,28 +78,32 @@ namespace ngraph
auto& out2_tensor = external_function->get_tensor_data(out[2].get_name()); auto& out2_tensor = external_function->get_tensor_data(out[2].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2); auto batchnorm_desc =
mkldnn_emitter->get_batchnorm_forward_desc<OP>(node, true);
auto weights_shape = Shape{2, args[0].get_size()}; auto weights_shape = Shape{2, args[0].get_size()};
auto weights_desc = mkldnn_emitter->build_memory_descriptor( auto weights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc); weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto results_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto mean_desc = mkldnn_utils::get_output_mkldnn_md(node, 1);
auto variance_desc = mkldnn_utils::get_output_mkldnn_md(node, 2);
auto batchnorm_index =
mkldnn_emitter->build_batchnorm_forward(input_desc,
weights_desc,
results_desc,
mean_desc,
variance_desc,
batchnorm->get_eps_value(),
false,
training,
ops);
// batchnorm forward needs 6 primitives: input, weights, result, mean,
// variance, and batch_normalization_forward.
auto batchnorm_index = mkldnn_emitter->reserve_primitive_space(6);
auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index); auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index);
auto functor = [&, batchnorm_index, stacked_weights, weight_sizes](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) { auto functor = [&,
batchnorm_desc,
weights_desc,
training,
ops,
batchnorm_index,
stacked_weights,
weight_sizes](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_batchnorm_forward(
batchnorm_desc, weights_desc, training, batchnorm_index, ops);
}
memcpy(stacked_weights.get(), arg0_tensor, weight_sizes[0]); memcpy(stacked_weights.get(), arg0_tensor, weight_sizes[0]);
memcpy( memcpy(
stacked_weights.get() + weight_sizes[0], arg1_tensor, weight_sizes[1]); stacked_weights.get() + weight_sizes[0], arg1_tensor, weight_sizes[1]);
...@@ -122,29 +124,32 @@ namespace ngraph ...@@ -122,29 +124,32 @@ namespace ngraph
auto& arg4_tensor = external_function->get_tensor_data(args[4].get_name()); auto& arg4_tensor = external_function->get_tensor_data(args[4].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto batchnorm_desc =
mkldnn_emitter->get_batchnorm_forward_desc<OP>(node, false);
auto weights_shape = Shape{2, args[0].get_size()}; auto weights_shape = Shape{2, args[0].get_size()};
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto weights_desc = mkldnn_emitter->build_memory_descriptor( auto weights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc); weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto mean_desc = mkldnn_utils::get_input_mkldnn_md(node, 3);
auto variance_desc = mkldnn_utils::get_input_mkldnn_md(node, 4);
auto results_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto batchnorm_index =
mkldnn_emitter->build_batchnorm_forward(input_desc,
weights_desc,
results_desc,
mean_desc,
variance_desc,
batchnorm->get_eps_value(),
true,
training,
ops);
// batchnorm forward needs 6 primitives: input, weights, result, mean,
// variance, and batch_normalization_forward.
auto batchnorm_index = mkldnn_emitter->reserve_primitive_space(6);
auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index); auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index);
auto functor = [&, batchnorm_index, stacked_weights, weight_sizes]( auto functor = [&,
CPURuntimeContext* ctx, CPUExecutionContext* ectx) { batchnorm_desc,
weights_desc,
training,
ops,
batchnorm_index,
stacked_weights,
weight_sizes](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_batchnorm_forward(
batchnorm_desc, weights_desc, training, batchnorm_index, ops);
}
memcpy(stacked_weights.get(), arg0_tensor, weight_sizes[0]); memcpy(stacked_weights.get(), arg0_tensor, weight_sizes[0]);
memcpy( memcpy(
stacked_weights.get() + weight_sizes[0], arg1_tensor, weight_sizes[1]); stacked_weights.get() + weight_sizes[0], arg1_tensor, weight_sizes[1]);
...@@ -295,9 +300,6 @@ namespace ngraph ...@@ -295,9 +300,6 @@ namespace ngraph
template <> template <>
void Builder::BUILDER_DECL(ngraph::op::BatchNormTrainingBackprop) void Builder::BUILDER_DECL(ngraph::op::BatchNormTrainingBackprop)
{ {
const ngraph::op::BatchNormTrainingBackprop* batchnorm =
static_cast<const ngraph::op::BatchNormTrainingBackprop*>(node);
auto& functors = external_function->get_functors(); auto& functors = external_function->get_functors();
auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name()); auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
...@@ -326,34 +328,31 @@ namespace ngraph ...@@ -326,34 +328,31 @@ namespace ngraph
std::default_delete<uint8_t[]>()); std::default_delete<uint8_t[]>());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto batchnorm_desc = mkldnn_emitter->get_batchnorm_backward_desc(node);
auto weights_shape = Shape{2, args[0].get_size()}; auto weights_shape = Shape{2, args[0].get_size()};
auto weights_desc = mkldnn_emitter->build_memory_descriptor( auto weights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc); weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto mean_desc = mkldnn_utils::get_input_mkldnn_md(node, 3);
auto variance_desc = mkldnn_utils::get_input_mkldnn_md(node, 4);
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 5);
auto dinput_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto dweights_desc = mkldnn_emitter->build_memory_descriptor( auto dweights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc); weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto batchnorm_index = // batchnorm backward needs 8 primitives: weights, input, mean, variance,
mkldnn_emitter->build_batchnorm_backward(weights_desc, // dinput, dweights, and batch_normalization_backward.
input_desc, auto batchnorm_index = mkldnn_emitter->reserve_primitive_space(8);
mean_desc,
variance_desc,
delta_desc,
dinput_desc,
dweights_desc,
batchnorm->get_eps_value());
auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index); auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index);
auto functor = [&, auto functor = [&,
batchnorm_desc,
weights_desc,
dweights_desc,
batchnorm_index, batchnorm_index,
stacked_weights, stacked_weights,
stacked_dweights, stacked_dweights,
weight_sizes](CPURuntimeContext* ctx, CPUExecutionContext* ectx) { weight_sizes](CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_batchnorm_backward(
batchnorm_desc, weights_desc, dweights_desc, batchnorm_index);
}
memcpy(stacked_weights.get(), arg0_tensor, weight_sizes[0]); memcpy(stacked_weights.get(), arg0_tensor, weight_sizes[0]);
memcpy(stacked_weights.get() + weight_sizes[0], arg1_tensor, weight_sizes[1]); memcpy(stacked_weights.get() + weight_sizes[0], arg1_tensor, weight_sizes[1]);
......
...@@ -43,13 +43,18 @@ namespace ngraph ...@@ -43,13 +43,18 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto bounded_relu_desc = mkldnn_emitter->get_bounded_relu_desc(node);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); // BoundedRelu needs 3 primitives: input, result, and eltwise_forward.
auto bounded_relu_index = auto bounded_relu_index = mkldnn_emitter->reserve_primitive_space(3);
mkldnn_emitter->build_bounded_relu(input_desc, result_desc, alpha);
auto& deps = mkldnn_emitter->get_primitive_deps(bounded_relu_index); auto& deps = mkldnn_emitter->get_primitive_deps(bounded_relu_index);
auto functor = [&, bounded_relu_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { auto functor = [&, bounded_relu_desc, bounded_relu_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_bounded_relu(bounded_relu_desc,
bounded_relu_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], input_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], input_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, bounded_relu_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, bounded_relu_index);
......
...@@ -92,29 +92,31 @@ namespace ngraph ...@@ -92,29 +92,31 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto concat_pd = mkldnn_emitter->get_concat_desc(node, nargs);
std::vector<mkldnn::memory::desc> inputs_data_desc; std::vector<mkldnn::memory::desc> inputs_data_desc;
for (size_t i = 0; i < args.size(); i++) for (size_t i = 0; i < nargs; i++)
{ {
inputs_data_desc.push_back(mkldnn_utils::get_input_mkldnn_md(node, i)); inputs_data_desc.push_back(mkldnn_utils::get_input_mkldnn_md(node, i));
} }
// Concat needs number of inputs plus 2 primitives; those two are for result and concat.
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); auto concat_index = mkldnn_emitter->reserve_primitive_space(nargs + 2);
size_t concat_dim =
(dynamic_cast<const ngraph::op::Concat*>(node))->get_concatenation_axis();
auto concat_index =
mkldnn_emitter->build_concat(inputs_data_desc, result_desc, concat_dim);
auto& deps = mkldnn_emitter->get_primitive_deps(concat_index); auto& deps = mkldnn_emitter->get_primitive_deps(concat_index);
auto functor = [&, arg_tensors, nargs, concat_index]( auto functor =
CPURuntimeContext* ctx, CPUExecutionContext* ectx) { [&, concat_pd, inputs_data_desc, arg_tensors, nargs, concat_index](
for (size_t i = 0; i < nargs; i++) CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
{ if (ctx->first_iteration)
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[i], arg_tensors[i]); {
} mkldnn_emitter->build_concat(
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[nargs], out_tensor); concat_pd, inputs_data_desc, concat_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, concat_index); }
}; for (size_t i = 0; i < nargs; i++)
{
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[i], arg_tensors[i]);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[nargs], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, concat_index);
};
functors.emplace_back(functor); functors.emplace_back(functor);
} }
......
...@@ -81,11 +81,15 @@ namespace ngraph ...@@ -81,11 +81,15 @@ namespace ngraph
mkldnn::memory::format::goihw); mkldnn::memory::format::goihw);
} }
size_t reorder_index = mkldnn_emitter->build_reorder(input_desc, result_desc); // ConvertLayout needs 3 primitives: input, result, and reorder.
size_t reorder_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(reorder_index); auto& deps = mkldnn_emitter->get_primitive_deps(reorder_index);
auto functor = [&, reorder_index](CPURuntimeContext* ctx, auto functor = [&, input_desc, result_desc, reorder_index](
CPUExecutionContext* ectx) { CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_reorder(input_desc, result_desc, reorder_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, reorder_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, reorder_index);
......
...@@ -52,12 +52,20 @@ namespace ngraph ...@@ -52,12 +52,20 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = auto conv_desc =
mkldnn_emitter->build_convolution<ngraph::op::Convolution>(node, args, out); mkldnn_emitter->get_convolution_forward_desc<ngraph::op::Convolution>(node);
auto conv_attr =
mkldnn_emitter->get_convolution_forward_attr<ngraph::op::Convolution>(node);
size_t conv_index = mkldnn_emitter->convolution_forward_init();
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx, auto functor = [&, conv_desc, conv_attr, conv_index](
CPUExecutionContext* ectx) { CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_convolution_forward<false>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
...@@ -124,13 +132,22 @@ namespace ngraph ...@@ -124,13 +132,22 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = auto conv_desc =
mkldnn_emitter->build_convolution<ngraph::op::ConvolutionRelu>( mkldnn_emitter->get_convolution_forward_desc<ngraph::op::ConvolutionRelu>(
node, args, out); node);
auto conv_attr =
mkldnn_emitter->get_convolution_forward_attr<ngraph::op::ConvolutionRelu>(
node);
size_t conv_index = mkldnn_emitter->convolution_forward_init();
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx, auto functor = [&, conv_desc, conv_attr, conv_index](
CPUExecutionContext* ectx) { CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_convolution_forward<false>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
...@@ -157,13 +174,22 @@ namespace ngraph ...@@ -157,13 +174,22 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = auto conv_desc =
mkldnn_emitter->build_convolution<ngraph::op::ConvolutionBias>( mkldnn_emitter->get_convolution_forward_desc<ngraph::op::ConvolutionBias>(
node, args, out); node);
auto conv_attr =
mkldnn_emitter->get_convolution_forward_attr<ngraph::op::ConvolutionBias>(
node);
size_t conv_index = mkldnn_emitter->convolution_forward_init(true);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx, auto functor = [&, conv_desc, conv_attr, conv_index](
CPUExecutionContext* ectx) { CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_convolution_forward<true>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], arg2_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], arg2_tensor);
...@@ -193,13 +219,22 @@ namespace ngraph ...@@ -193,13 +219,22 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = auto conv_desc =
mkldnn_emitter->build_convolution<ngraph::op::ConvolutionBiasAdd>( mkldnn_emitter
node, args, out); ->get_convolution_forward_desc<ngraph::op::ConvolutionBiasAdd>(node);
auto conv_attr =
mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::ConvolutionBiasAdd>(node);
size_t conv_index = mkldnn_emitter->convolution_forward_init(true);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index, arg3_size](CPURuntimeContext* ctx, auto functor = [&, conv_desc, conv_attr, conv_index, arg3_size](
CPUExecutionContext* ectx) { CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_convolution_forward<true>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
if (out_tensor != arg3_tensor) if (out_tensor != arg3_tensor)
{ {
memcpy(static_cast<char*>(out_tensor), memcpy(static_cast<char*>(out_tensor),
...@@ -234,12 +269,22 @@ namespace ngraph ...@@ -234,12 +269,22 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = mkldnn_emitter->build_convolution<ngraph::op::ConvolutionAdd>( auto conv_desc =
node, args, out); mkldnn_emitter->get_convolution_forward_desc<ngraph::op::ConvolutionAdd>(
node);
auto conv_attr =
mkldnn_emitter->get_convolution_forward_attr<ngraph::op::ConvolutionAdd>(
node);
size_t conv_index = mkldnn_emitter->convolution_forward_init(false);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index, arg2_size](CPURuntimeContext* ctx, auto functor = [&, conv_desc, conv_attr, conv_index, arg2_size](
CPUExecutionContext* ectx) { CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_convolution_forward<false>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
if (out_tensor != arg2_tensor) if (out_tensor != arg2_tensor)
{ {
memcpy(static_cast<char*>(out_tensor), memcpy(static_cast<char*>(out_tensor),
...@@ -277,14 +322,22 @@ namespace ngraph ...@@ -277,14 +322,22 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = auto bwd_desc = mkldnn_emitter->get_convolution_backward_data_desc<
mkldnn_emitter ngraph::op::ConvolutionBackpropData>(node);
->build_convolution_backward<ngraph::op::ConvolutionBackpropData>( auto fwd_desc = mkldnn_emitter->get_convolution_forward_desc_for_backward_op<
node, args, out); ngraph::op::ConvolutionBackpropData>(node);
// ConvolutionBackpropData needs 4 primitives: weights, delta, result,
// and convolution_backward.
auto conv_index = mkldnn_emitter->reserve_primitive_space(4);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx, auto functor = [&, bwd_desc, fwd_desc, conv_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_convolution_backward_data(
bwd_desc, fwd_desc, conv_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
...@@ -359,14 +412,22 @@ namespace ngraph ...@@ -359,14 +412,22 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = auto bwd_desc = mkldnn_emitter->get_convolution_backward_weights_desc<
mkldnn_emitter ngraph::op::ConvolutionBackpropFilters>(node);
->build_convolution_backward<ngraph::op::ConvolutionBackpropFilters>( auto fwd_desc = mkldnn_emitter->get_convolution_forward_desc_for_backward_op<
node, args, out); ngraph::op::ConvolutionBackpropFilters>(node);
// ConvolutionBackpropFilter needs 4 primitives: input, delta, weights_delta,
// and convolution_backward_weights.
auto conv_index = mkldnn_emitter->reserve_primitive_space(4);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx, auto functor = [&, bwd_desc, fwd_desc, conv_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_convolution_backward_weights(
bwd_desc, fwd_desc, conv_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
...@@ -436,12 +497,22 @@ namespace ngraph ...@@ -436,12 +497,22 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = mkldnn_emitter->build_convolution_backward< auto bwd_desc = mkldnn_emitter->get_convolution_backward_weights_desc<
ngraph::op::ConvolutionBiasBackpropFiltersBias>(node, args, out); ngraph::op::ConvolutionBiasBackpropFiltersBias>(node);
auto fwd_desc = mkldnn_emitter->get_convolution_forward_desc_for_backward_op<
ngraph::op::ConvolutionBiasBackpropFiltersBias>(node);
// ConvolutionBiasBackpropFilter needs 5 primitives: input, delta, weights_delta,
// bias_delta, and convolution_backward_weights.
auto conv_index = mkldnn_emitter->reserve_primitive_space(5);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx, auto functor = [&, bwd_desc, fwd_desc, conv_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_convolution_backward_weights_bias(
bwd_desc, fwd_desc, conv_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out0_tensor);
...@@ -466,43 +537,25 @@ namespace ngraph ...@@ -466,43 +537,25 @@ namespace ngraph
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name()); auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name()); auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto convolution = static_cast<const ngraph::op::GroupConvolution*>(node);
auto arg0_shape = args[0].get_shape();
auto arg1_shape = args[1].get_shape();
auto result_shape = out[0].get_shape();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_desc =
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); mkldnn_emitter->get_convolution_forward_desc<ngraph::op::GroupConvolution>(
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 1); node);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); auto conv_attr =
mkldnn_emitter->get_convolution_forward_attr<ngraph::op::GroupConvolution>(
auto padding_below = convolution->get_padding_below(); node);
auto padding_above = convolution->get_padding_above(); size_t conv_index = mkldnn_emitter->convolution_forward_init();
auto filter_strides = convolution->get_window_movement_strides();
size_t conv_index =
mkldnn_emitter->build_convolution_forward(input_data_desc,
weights_desc,
result_desc,
filter_strides,
window_dilation_strides_adjusted,
padding_below,
padding_above);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx, auto functor = [&, conv_desc, conv_attr, conv_index](
CPUExecutionContext* ectx) { CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_convolution_forward<false>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
// group convolution // group convolution
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
...@@ -528,58 +581,25 @@ namespace ngraph ...@@ -528,58 +581,25 @@ namespace ngraph
auto& arg2_tensor = external_function->get_tensor_data(args[2].get_name()); auto& arg2_tensor = external_function->get_tensor_data(args[2].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name()); auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto convolution = static_cast<const ngraph::op::GroupConvolutionBias*>(node);
auto arg0_shape = args[0].get_shape();
auto arg1_shape = args[1].get_shape();
auto arg2_shape = args[2].get_shape();
auto result_shape = out[0].get_shape();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_desc =
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); mkldnn_emitter
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 1); ->get_convolution_forward_desc<ngraph::op::GroupConvolutionBias>(node);
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2); auto conv_attr =
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::GroupConvolutionBias>(node);
auto padding_below = convolution->get_padding_below(); size_t conv_index = mkldnn_emitter->convolution_forward_init(true);
auto padding_above = convolution->get_padding_above();
auto filter_strides = convolution->get_window_movement_strides();
const float ops_scale = 1.f;
const float ops_alpha = -0.f; // relu negative slope
const float ops_beta = 0.f;
mkldnn::post_ops ops;
if (convolution->with_relu())
{
ops.append_eltwise(
ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
}
size_t conv_index =
mkldnn_emitter->build_convolution_forward(input_data_desc,
weights_desc,
bias_desc,
result_desc,
filter_strides,
window_dilation_strides_adjusted,
padding_below,
padding_above,
ops);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx, auto functor = [&, conv_desc, conv_attr, conv_index](
CPUExecutionContext* ectx) { CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_convolution_forward<true>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], arg2_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], arg2_tensor);
......
...@@ -43,13 +43,17 @@ namespace ngraph ...@@ -43,13 +43,17 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto leaky_relu_desc = mkldnn_emitter->get_leaky_relu_desc(node);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); // LeakyRelu needs 3 primitives: input, result, and eltwise_forward.
auto leaky_relu_index = auto leaky_relu_index = mkldnn_emitter->reserve_primitive_space(3);
mkldnn_emitter->build_leaky_relu(input_desc, result_desc, alpha);
auto& deps = mkldnn_emitter->get_primitive_deps(leaky_relu_index); auto& deps = mkldnn_emitter->get_primitive_deps(leaky_relu_index);
auto functor = [&, leaky_relu_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { auto functor = [&, leaky_relu_desc, leaky_relu_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_leaky_relu(leaky_relu_desc, leaky_relu_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], input_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], input_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, leaky_relu_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, leaky_relu_index);
......
...@@ -43,19 +43,17 @@ namespace ngraph ...@@ -43,19 +43,17 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto lrn_desc = mkldnn_emitter->get_lrn_forward_desc(node);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); // LRN needs 3 primitives: input, result, and lrn_forward.
auto lrn_index = mkldnn_emitter->reserve_primitive_space(3);
auto lrn_index =
mkldnn_emitter->build_lrn_forward(input_data_desc,
result_desc,
static_cast<float>(lrn->get_alpha()),
static_cast<float>(lrn->get_beta()),
static_cast<float>(lrn->get_bias()),
static_cast<int>(lrn->get_nsize()));
auto& deps = mkldnn_emitter->get_primitive_deps(lrn_index); auto& deps = mkldnn_emitter->get_primitive_deps(lrn_index);
functor = [&, lrn_index](CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
functor = [&, lrn_desc, lrn_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_lrn_forward(lrn_desc, lrn_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, lrn_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, lrn_index);
......
...@@ -54,10 +54,22 @@ namespace ngraph ...@@ -54,10 +54,22 @@ namespace ngraph
auto& dst_iter_tensor = external_function->get_tensor_data(out[1].get_name()); auto& dst_iter_tensor = external_function->get_tensor_data(out[1].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto lstm_index = mkldnn_emitter->build_rnn<ngraph::op::Lstm>(node, args, out); auto lstm_desc =
mkldnn_emitter->get_rnn_forward_desc<ngraph::op::Lstm>(node, args, out);
// Lstm needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias,
// dst_layer, dst_iter, and rnn_forward.
// It needs a new workspace.
auto lstm_index =
mkldnn_emitter->reserve_primitive_space(9, true /* new workspace */);
auto& deps = mkldnn_emitter->get_primitive_deps(lstm_index); auto& deps = mkldnn_emitter->get_primitive_deps(lstm_index);
auto functor = [&, lstm_index](CPURuntimeContext* ctx, CPUExecutionContext* ectx) { auto functor = [&, lstm_desc, lstm_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_rnn_forward(lstm_desc, lstm_index);
ctx->mkldnn_workspaces = mkldnn_emitter->get_mkldnn_workspaces().data();
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], src_layer_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], src_layer_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], src_iter_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], src_iter_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], weights_layer_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], weights_layer_tensor);
......
...@@ -51,22 +51,19 @@ namespace ngraph ...@@ -51,22 +51,19 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto max_pool_desc =
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); mkldnn_emitter->get_max_pooling_forward_desc<ngraph::op::MaxPool>(node,
false);
size_t max_pool_index = // MaxPool needs 3 primitives: input, result, and pooling_forward.
mkldnn_emitter->build_pooling_forward(mkldnn::algorithm::pooling_max, size_t max_pool_index = mkldnn_emitter->reserve_primitive_space(3);
input_desc,
result_desc,
window_movement_strides,
window_shape,
padding_below,
padding_above);
auto& deps = mkldnn_emitter->get_primitive_deps(max_pool_index); auto& deps = mkldnn_emitter->get_primitive_deps(max_pool_index);
auto functor = [&, max_pool_index](CPURuntimeContext* ctx, auto functor = [&, max_pool_desc, max_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_pooling_forward(max_pool_desc, max_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, max_pool_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, max_pool_index);
...@@ -124,40 +121,62 @@ namespace ngraph ...@@ -124,40 +121,62 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto fwd_pool_desc =
mkldnn_emitter->get_max_pooling_forward_desc<ngraph::op::MaxPoolBackprop>(
node, true);
auto bwd_pool_desc =
mkldnn_emitter->get_max_pooling_backward_desc<ngraph::op::MaxPoolBackprop>(
node);
auto fprop_src_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto fprop_src_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto diff_dst_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto diff_src_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); // MaxPoolBackprop forward needs 4 primitives: fprop_src, diff_src, workspace,
// and pooling_forward.
size_t max_pool_index = mkldnn_emitter->build_max_pooling_backward( // It needs a new workspace.
mkldnn::algorithm::pooling_max, size_t fwd_pool_index =
fprop_src_desc, mkldnn_emitter->reserve_primitive_space(4, true /* new workspace */);
diff_dst_desc, auto& fdeps = mkldnn_emitter->get_primitive_deps(fwd_pool_index);
diff_src_desc,
mpb->get_window_movement_strides(), auto functor_fprop = [&, fwd_pool_index](CPURuntimeContext* ctx,
mpb->get_window_shape(),
mpb->get_padding_below(),
mpb->get_padding_above());
auto& fdeps = mkldnn_emitter->get_primitive_deps(max_pool_index - 1);
auto functor_fprop = [&, max_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, fdeps[0], arg_fwd_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, fdeps[0], arg_fwd_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, fdeps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, fdeps[1], out_tensor);
cpu::mkldnn_utils::set_memory_ptr( cpu::mkldnn_utils::set_memory_ptr(
ctx, fdeps[2], ctx->mkldnn_workspaces[fdeps[3]]); ctx, fdeps[2], ctx->mkldnn_workspaces[fdeps[3]]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, max_pool_index - 1); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, fwd_pool_index);
}; };
auto& bdeps = mkldnn_emitter->get_primitive_deps(max_pool_index);
auto functor_bprop = [&, max_pool_index](CPURuntimeContext* ctx, // MaxPoolBackprop backward needs 4 primitives: diff_dst, workspace, diff_src,
// and pooling_backward.
// It needs a new workspace.
size_t bwd_pool_index =
mkldnn_emitter->reserve_primitive_space(4, true /* new workspace */);
auto& bdeps = mkldnn_emitter->get_primitive_deps(bwd_pool_index);
auto functor_bprop = [&, bwd_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, bdeps[0], delta_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, bdeps[0], delta_tensor);
cpu::mkldnn_utils::set_memory_ptr( cpu::mkldnn_utils::set_memory_ptr(
ctx, bdeps[1], ctx->mkldnn_workspaces[bdeps[3]]); ctx, bdeps[1], ctx->mkldnn_workspaces[bdeps[3]]);
cpu::mkldnn_utils::set_memory_ptr(ctx, bdeps[2], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, bdeps[2], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, max_pool_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, bwd_pool_index);
}; };
auto functor = [&, functor_fprop, functor_bprop](CPURuntimeContext* ctx, auto functor = [&,
CPUExecutionContext* ectx) { bwd_pool_desc,
fwd_pool_desc,
fprop_src_desc,
fwd_pool_index,
bwd_pool_index,
functor_fprop,
functor_bprop](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_max_pooling_backward(bwd_pool_desc,
fwd_pool_desc,
fprop_src_desc,
fwd_pool_index,
bwd_pool_index);
ctx->mkldnn_workspaces = mkldnn_emitter->get_mkldnn_workspaces().data();
}
functor_fprop(ctx, ectx); functor_fprop(ctx, ectx);
functor_bprop(ctx, ectx); functor_bprop(ctx, ectx);
}; };
...@@ -202,8 +221,6 @@ namespace ngraph ...@@ -202,8 +221,6 @@ namespace ngraph
throw ngraph_error("MaxPoolWithIndices isn't supported"); throw ngraph_error("MaxPoolWithIndices isn't supported");
} }
auto max_pool = static_cast<const ngraph::op::MaxPoolWithIndices*>(node);
auto& functors = external_function->get_functors(); auto& functors = external_function->get_functors();
auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name()); auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
...@@ -211,22 +228,22 @@ namespace ngraph ...@@ -211,22 +228,22 @@ namespace ngraph
auto& out1_tensor = external_function->get_tensor_data(out[1].get_name()); auto& out1_tensor = external_function->get_tensor_data(out[1].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = runtime::cpu::mkldnn_utils::get_input_mkldnn_md(node, 0); auto max_pool_desc =
auto result_desc = runtime::cpu::mkldnn_utils::get_output_mkldnn_md(node, 0); mkldnn_emitter
->get_max_pooling_with_indices_forward_desc<ngraph::op::MaxPoolWithIndices>(
size_t max_pool_index = mkldnn_emitter->build_max_pooling_with_indices_forward( node);
mkldnn::algorithm::pooling_max,
input_desc,
result_desc,
max_pool->get_window_movement_strides(),
max_pool->get_window_shape(),
max_pool->get_padding_below(),
max_pool->get_padding_above());
// MaxPoolWithIndices needs 4 primitives: src, dst, workspace, and pooling_forward.
size_t max_pool_index = mkldnn_emitter->reserve_primitive_space(4);
auto& deps = mkldnn_emitter->get_primitive_deps(max_pool_index); auto& deps = mkldnn_emitter->get_primitive_deps(max_pool_index);
auto functor = [&, max_pool_index](CPURuntimeContext* ctx, auto functor = [&, max_pool_desc, max_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_max_pooling_with_indices_forward(max_pool_desc,
max_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out1_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out1_tensor);
...@@ -249,25 +266,27 @@ namespace ngraph ...@@ -249,25 +266,27 @@ namespace ngraph
auto& arg2_tensor = external_function->get_tensor_data(args[2].get_name()); auto& arg2_tensor = external_function->get_tensor_data(args[2].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name()); auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto mpb = static_cast<const ngraph::op::MaxPoolWithIndicesBackprop*>(node);
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto diff_dst_desc = runtime::cpu::mkldnn_utils::get_input_mkldnn_md(node, 1); auto fwd_pool_desc =
auto diff_src_desc = runtime::cpu::mkldnn_utils::get_output_mkldnn_md(node, 0); mkldnn_emitter
->get_max_pooling_forward_desc<ngraph::op::MaxPoolWithIndicesBackprop>(
size_t max_pool_index = mkldnn_emitter->build_max_pooling_with_indices_backward( node, true);
mkldnn::algorithm::pooling_max, auto bwd_pool_desc =
diff_dst_desc, mkldnn_emitter
diff_src_desc, ->get_max_pooling_backward_desc<ngraph::op::MaxPoolWithIndicesBackprop>(
mpb->get_window_movement_strides(), node);
mpb->get_window_shape(), // MaxPoolWithIndicesBackprop needs 4 primitives: diff_dst, fprop_workspace,
mpb->get_padding_below(), // diff_dst, and pooling_backward.
mpb->get_padding_above()); size_t max_pool_index = mkldnn_emitter->reserve_primitive_space(4);
auto& deps = mkldnn_emitter->get_primitive_deps(max_pool_index); auto& deps = mkldnn_emitter->get_primitive_deps(max_pool_index);
auto functor = [&, max_pool_index](CPURuntimeContext* ctx, auto functor = [&, bwd_pool_desc, fwd_pool_desc, max_pool_index](
CPUExecutionContext* ectx) { CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_max_pooling_with_indices_backward(
bwd_pool_desc, fwd_pool_desc, max_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg1_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg2_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg2_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
......
...@@ -54,35 +54,26 @@ namespace ngraph ...@@ -54,35 +54,26 @@ namespace ngraph
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto scale_const_op = std::dynamic_pointer_cast<ngraph::op::Constant>( auto scale_const_op = std::dynamic_pointer_cast<ngraph::op::Constant>(
dequantize->get_argument(1)); dequantize->get_argument(1));
std::vector<float> scales;
if (scale_const_op == nullptr) if (scale_const_op == nullptr)
{ {
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name()); auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto scales_size = shape_size(args[1].get_shape()); auto scales_size = shape_size(args[1].get_shape());
size_t dequantize_index = // Dequantize needs 3 primitives: input, result, and reorder.
mkldnn_emitter->build_dequantization(node, input_desc, result_desc); size_t dequantize_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(dequantize_index); auto& deps = mkldnn_emitter->get_primitive_deps(dequantize_index);
functor = [&, input_desc, result_desc, scales_size, dequantize_index]( functor = [&, input_desc, result_desc, scales_size, dequantize_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) { CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
// Create MKLDNN reorder primitive during the first iteration. // Create MKLDNN reorder primitive during the first iteration.
// Assumes the scales dont change for the duration of the graph // Assumes the scales dont change for the duration of the graph
if (ctx->first_iteration) if (ctx->first_iteration)
{ {
mkldnn::primitive_attr attr;
vector<float> dyn_scales; vector<float> dyn_scales;
dyn_scales.assign(static_cast<float*>(arg1_tensor), dyn_scales.assign(static_cast<float*>(arg1_tensor),
static_cast<float*>(arg1_tensor) + scales_size); static_cast<float*>(arg1_tensor) + scales_size);
attr.set_output_scales(0, dyn_scales); mkldnn_emitter->build_quantize_reorder(
attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest); input_desc, result_desc, dyn_scales, dequantize_index);
auto reorder_desc = mkldnn::reorder::primitive_desc(
{input_desc, executor::global_cpu_engine},
{result_desc, executor::global_cpu_engine},
attr);
*ctx->mkldnn_primitives[dequantize_index] =
mkldnn::reorder(reorder_desc,
*ctx->mkldnn_primitives[deps[0]],
*ctx->mkldnn_primitives[deps[1]]);
} }
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
...@@ -92,11 +83,19 @@ namespace ngraph ...@@ -92,11 +83,19 @@ namespace ngraph
} }
else else
{ {
size_t dequantize_index = std::vector<float> scale = scale_const_op->get_vector<float>();
mkldnn_emitter->build_dequantization(node, input_desc, result_desc); std::vector<float> scales;
scales.push_back(scale[0]);
size_t dequantize_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(dequantize_index); auto& deps = mkldnn_emitter->get_primitive_deps(dequantize_index);
functor = [&, dequantize_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { functor = [&, input_desc, result_desc, scales, dequantize_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_quantize_reorder(
input_desc, result_desc, scales, dequantize_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, dequantize_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, dequantize_index);
...@@ -243,25 +242,21 @@ namespace ngraph ...@@ -243,25 +242,21 @@ namespace ngraph
auto scale_const_op = auto scale_const_op =
std::dynamic_pointer_cast<ngraph::op::Constant>(quantize->get_argument(1)); std::dynamic_pointer_cast<ngraph::op::Constant>(quantize->get_argument(1));
std::vector<float> scales;
if (scale_const_op == nullptr) if (scale_const_op == nullptr)
{ {
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name()); auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto scales_size = shape_size(args[1].get_shape()); auto scales_size = shape_size(args[1].get_shape());
// Dummy value while we wait for the actual values that are provided during // Quantize needs 3 primitives: input, result, and reorder.
// execution size_t quantize_index = mkldnn_emitter->reserve_primitive_space(3);
scales.push_back(1.0f);
size_t quantize_index =
mkldnn_emitter->build_quantize_reorder(input_desc, result_desc, scales);
auto& deps = mkldnn_emitter->get_primitive_deps(quantize_index); auto& deps = mkldnn_emitter->get_primitive_deps(quantize_index);
auto functor = [&, input_desc, result_desc, scales_size, quantize_index]( auto functor = [&, input_desc, result_desc, scales_size, quantize_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) { CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
// Create MKLDNN reorder primitive during the first iteration. // Create MKLDNN reorder primitive during the first iteration.
// Assumes the scales dont change for the duration of the graph // Assumes the scales dont change for the duration of the graph
if (ctx->first_iteration) if (ctx->first_iteration)
{ {
mkldnn::primitive_attr attr;
vector<float> dyn_scales; vector<float> dyn_scales;
dyn_scales.assign(static_cast<float*>(arg1_tensor), dyn_scales.assign(static_cast<float*>(arg1_tensor),
static_cast<float*>(arg1_tensor) + scales_size); static_cast<float*>(arg1_tensor) + scales_size);
...@@ -271,16 +266,8 @@ namespace ngraph ...@@ -271,16 +266,8 @@ namespace ngraph
} }
// quantize across first dim (mask=2^0) if dyn_scales is a vector // quantize across first dim (mask=2^0) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 1; const int mask = scales_size == 1 ? 0 : 1;
attr.set_output_scales(mask, dyn_scales); mkldnn_emitter->build_quantize_reorder(
attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest); input_desc, result_desc, dyn_scales, quantize_index, mask);
auto reorder_desc = mkldnn::reorder::primitive_desc(
{input_desc, executor::global_cpu_engine},
{result_desc, executor::global_cpu_engine},
attr);
*ctx->mkldnn_primitives[quantize_index] =
mkldnn::reorder(reorder_desc,
*ctx->mkldnn_primitives[deps[0]],
*ctx->mkldnn_primitives[deps[1]]);
} }
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
...@@ -291,12 +278,18 @@ namespace ngraph ...@@ -291,12 +278,18 @@ namespace ngraph
else else
{ {
auto scale = scale_const_op->get_vector<float>(); auto scale = scale_const_op->get_vector<float>();
std::vector<float> scales;
scales.push_back(1.0 / scale[0]); scales.push_back(1.0 / scale[0]);
size_t quantize_index = size_t quantize_index = mkldnn_emitter->reserve_primitive_space(3);
mkldnn_emitter->build_quantize_reorder(input_desc, result_desc, scales);
auto& deps = mkldnn_emitter->get_primitive_deps(quantize_index); auto& deps = mkldnn_emitter->get_primitive_deps(quantize_index);
auto functor = [&, quantize_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { auto functor = [&, input_desc, result_desc, scales, quantize_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_quantize_reorder(
input_desc, result_desc, scales, quantize_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, quantize_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, quantize_index);
......
...@@ -35,15 +35,24 @@ namespace ngraph ...@@ -35,15 +35,24 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& functors = external_function->get_functors(); auto& functors = external_function->get_functors();
auto& arg_tensor = external_function->get_tensor_data(args[0].get_name()); auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name()); auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
size_t qavg_pool_index = mkldnn_emitter->build_quantized_avg_pool(node); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto qavg_pool_desc =
mkldnn_emitter->get_avg_pooling_forward_desc<ngraph::op::QuantizedAvgPool>(
node, false);
// QuantizedAvgPool needs 3 primitives: input, result, and pooling_forward.
size_t qavg_pool_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(qavg_pool_index); auto& deps = mkldnn_emitter->get_primitive_deps(qavg_pool_index);
auto functor = [&, qavg_pool_index](CPURuntimeContext* ctx, auto functor = [&, qavg_pool_desc, qavg_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_pooling_forward(qavg_pool_desc, qavg_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, qavg_pool_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, qavg_pool_index);
......
...@@ -48,8 +48,7 @@ namespace ngraph ...@@ -48,8 +48,7 @@ namespace ngraph
auto conv_desc = auto conv_desc =
mkldnn_emitter mkldnn_emitter
->get_convolution_forward_desc<ngraph::op::QuantizedConvolution>( ->get_convolution_forward_desc<ngraph::op::QuantizedConvolution>(node);
node, args, out);
auto conv_attr = auto conv_attr =
mkldnn_emitter mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::QuantizedConvolution>(node); ->get_convolution_forward_attr<ngraph::op::QuantizedConvolution>(node);
...@@ -68,7 +67,7 @@ namespace ngraph ...@@ -68,7 +67,7 @@ namespace ngraph
// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector // use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 2; const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales); conv_attr.set_output_scales(mask, dyn_scales);
mkldnn_emitter->convolution_forward<false>( mkldnn_emitter->build_convolution_forward<false>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index); conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
} }
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
...@@ -101,7 +100,7 @@ namespace ngraph ...@@ -101,7 +100,7 @@ namespace ngraph
auto conv_desc = auto conv_desc =
mkldnn_emitter mkldnn_emitter
->get_convolution_forward_desc<ngraph::op::QuantizedConvolutionRelu>( ->get_convolution_forward_desc<ngraph::op::QuantizedConvolutionRelu>(
node, args, out); node);
auto conv_attr = auto conv_attr =
mkldnn_emitter mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::QuantizedConvolutionRelu>( ->get_convolution_forward_attr<ngraph::op::QuantizedConvolutionRelu>(
...@@ -119,7 +118,7 @@ namespace ngraph ...@@ -119,7 +118,7 @@ namespace ngraph
// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector // use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 2; const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales); conv_attr.set_output_scales(mask, dyn_scales);
mkldnn_emitter->convolution_forward<false>( mkldnn_emitter->build_convolution_forward<false>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index); conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
} }
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
...@@ -154,7 +153,7 @@ namespace ngraph ...@@ -154,7 +153,7 @@ namespace ngraph
auto conv_desc = auto conv_desc =
mkldnn_emitter mkldnn_emitter
->get_convolution_forward_desc<ngraph::op::QuantizedConvolutionBias>( ->get_convolution_forward_desc<ngraph::op::QuantizedConvolutionBias>(
node, args, out); node);
auto conv_attr = auto conv_attr =
mkldnn_emitter mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::QuantizedConvolutionBias>( ->get_convolution_forward_attr<ngraph::op::QuantizedConvolutionBias>(
...@@ -172,7 +171,7 @@ namespace ngraph ...@@ -172,7 +171,7 @@ namespace ngraph
// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector // use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 2; const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales); conv_attr.set_output_scales(mask, dyn_scales);
mkldnn_emitter->convolution_forward<true>( mkldnn_emitter->build_convolution_forward<true>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index); conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
} }
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
...@@ -213,7 +212,7 @@ namespace ngraph ...@@ -213,7 +212,7 @@ namespace ngraph
auto conv_desc = auto conv_desc =
mkldnn_emitter mkldnn_emitter
->get_convolution_forward_desc<ngraph::op::QuantizedConvolutionBiasAdd>( ->get_convolution_forward_desc<ngraph::op::QuantizedConvolutionBiasAdd>(
node, args, out); node);
auto conv_attr = auto conv_attr =
mkldnn_emitter mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::QuantizedConvolutionBiasAdd>( ->get_convolution_forward_attr<ngraph::op::QuantizedConvolutionBiasAdd>(
...@@ -259,7 +258,7 @@ namespace ngraph ...@@ -259,7 +258,7 @@ namespace ngraph
const int mask = scales_size == 1 ? 0 : 2; const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales); conv_attr.set_output_scales(mask, dyn_scales);
conv_attr.set_post_ops(new_pops); conv_attr.set_post_ops(new_pops);
mkldnn_emitter->convolution_forward<true>( mkldnn_emitter->build_convolution_forward<true>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index); conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
} }
...@@ -305,7 +304,7 @@ namespace ngraph ...@@ -305,7 +304,7 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_desc = mkldnn_emitter->get_convolution_forward_desc< auto conv_desc = mkldnn_emitter->get_convolution_forward_desc<
ngraph::op::QuantizedConvolutionBiasSignedAdd>(node, args, out); ngraph::op::QuantizedConvolutionBiasSignedAdd>(node);
auto conv_attr = mkldnn_emitter->get_convolution_forward_attr< auto conv_attr = mkldnn_emitter->get_convolution_forward_attr<
ngraph::op::QuantizedConvolutionBiasSignedAdd>(node); ngraph::op::QuantizedConvolutionBiasSignedAdd>(node);
size_t conv_index = mkldnn_emitter->convolution_forward_init(true); size_t conv_index = mkldnn_emitter->convolution_forward_init(true);
...@@ -349,7 +348,7 @@ namespace ngraph ...@@ -349,7 +348,7 @@ namespace ngraph
// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector // use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 2; const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales); conv_attr.set_output_scales(mask, dyn_scales);
mkldnn_emitter->convolution_forward<true>( mkldnn_emitter->build_convolution_forward<true>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index); conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
} }
......
...@@ -35,16 +35,24 @@ namespace ngraph ...@@ -35,16 +35,24 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& functors = external_function->get_functors(); auto& functors = external_function->get_functors();
auto& arg_tensor = external_function->get_tensor_data(args[0].get_name()); auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name()); auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto qmax_pool_desc =
size_t qmax_pool_index = mkldnn_emitter->build_quantized_max_pool(node); mkldnn_emitter->get_max_pooling_forward_desc<ngraph::op::QuantizedMaxPool>(
node, false);
// QuantizedMaxPool needs 3 primitives: input, result, and pooling_forward.
size_t qmax_pool_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(qmax_pool_index); auto& deps = mkldnn_emitter->get_primitive_deps(qmax_pool_index);
auto functor = [&, qmax_pool_index](CPURuntimeContext* ctx, auto functor = [&, qmax_pool_desc, qmax_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_pooling_forward(qmax_pool_desc, qmax_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, qmax_pool_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, qmax_pool_index);
......
...@@ -40,15 +40,17 @@ namespace ngraph ...@@ -40,15 +40,17 @@ namespace ngraph
auto& out_tensor = external_function->get_tensor_data(out[0].get_name()); auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto relu_desc = mkldnn_emitter->get_relu_forward_desc(node);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); // Relu needs 3 primitives: input, result, and eltwise_forward.
size_t relu_index = mkldnn_emitter->reserve_primitive_space(3);
size_t relu_index = mkldnn_emitter->build_relu_forward(input_desc, result_desc);
auto& deps = mkldnn_emitter->get_primitive_deps(relu_index); auto& deps = mkldnn_emitter->get_primitive_deps(relu_index);
auto functor = [&, relu_index](CPURuntimeContext* ctx, auto functor = [&, relu_desc, relu_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_relu_forward(relu_desc, relu_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, relu_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, relu_index);
...@@ -74,16 +76,18 @@ namespace ngraph ...@@ -74,16 +76,18 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto bwd_desc = mkldnn_emitter->get_relu_backward_desc(node);
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1); auto fwd_desc = mkldnn_emitter->get_relu_forward_desc(node);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); // ReluBackprop needs 4 primitives: input, delta, result, and eltwise_backward.
size_t relu_index = mkldnn_emitter->reserve_primitive_space(4);
size_t relu_index =
mkldnn_emitter->build_relu_backward(input_desc, delta_desc, result_desc);
auto& deps = mkldnn_emitter->get_primitive_deps(relu_index); auto& deps = mkldnn_emitter->get_primitive_deps(relu_index);
auto functor = [&, relu_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { auto functor = [&, bwd_desc, fwd_desc, relu_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_relu_backward(bwd_desc, fwd_desc, relu_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_fwd_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_fwd_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], delta_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], delta_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
......
...@@ -49,9 +49,22 @@ namespace ngraph ...@@ -49,9 +49,22 @@ namespace ngraph
auto& dst_iter_tensor = external_function->get_tensor_data(out[1].get_name()); auto& dst_iter_tensor = external_function->get_tensor_data(out[1].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto rnn_index = mkldnn_emitter->build_rnn<ngraph::op::Rnn>(node, args, out); auto rnn_desc =
mkldnn_emitter->get_rnn_forward_desc<ngraph::op::Rnn>(node, args, out);
// Rnn needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias,
// dst_layer, dst_iter, and rnn_forward.
// It needs a new workspace.
auto rnn_index =
mkldnn_emitter->reserve_primitive_space(9, true /* new workspace */);
auto& deps = mkldnn_emitter->get_primitive_deps(rnn_index); auto& deps = mkldnn_emitter->get_primitive_deps(rnn_index);
auto functor = [&, rnn_index](CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
auto functor = [&, rnn_desc, rnn_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_rnn_forward(rnn_desc, rnn_index);
ctx->mkldnn_workspaces = mkldnn_emitter->get_mkldnn_workspaces().data();
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], src_layer_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], src_layer_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], src_iter_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], src_iter_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], weights_layer_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], weights_layer_tensor);
......
...@@ -42,15 +42,17 @@ namespace ngraph ...@@ -42,15 +42,17 @@ namespace ngraph
auto out_shape = out[0].get_shape(); auto out_shape = out[0].get_shape();
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto sigmoid_desc = mkldnn_emitter->get_sigmoid_forward_desc(node, false);
auto out_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); // Sigmoid needs 3 primitives: input, result, and eltwise_forward.
auto sigmoid_index = mkldnn_emitter->reserve_primitive_space(3);
auto sigmoid_index = mkldnn_emitter->build_sigmoid_forward(input_desc, out_desc);
auto& deps = mkldnn_emitter->get_primitive_deps(sigmoid_index); auto& deps = mkldnn_emitter->get_primitive_deps(sigmoid_index);
auto functor = [&, sigmoid_index](CPURuntimeContext* ctx, auto functor = [&, sigmoid_desc, sigmoid_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_sigmoid_forward(sigmoid_desc, sigmoid_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, sigmoid_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, sigmoid_index);
...@@ -72,17 +74,18 @@ namespace ngraph ...@@ -72,17 +74,18 @@ namespace ngraph
auto out_shape = out[0].get_shape(); auto out_shape = out[0].get_shape();
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto fwd_desc = mkldnn_emitter->get_sigmoid_forward_desc(node, true);
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto bwd_desc = mkldnn_emitter->get_sigmoid_backward_desc(node);
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1); // SigmoidBackprop needs 4 primitives: input, delta, result, and eltwise_backward.
auto out_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); size_t sigmoid_index = mkldnn_emitter->reserve_primitive_space(4);
size_t sigmoid_index =
mkldnn_emitter->build_sigmoid_backward(input_desc, delta_desc, out_desc);
auto& deps = mkldnn_emitter->get_primitive_deps(sigmoid_index); auto& deps = mkldnn_emitter->get_primitive_deps(sigmoid_index);
auto functor = [&, sigmoid_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { auto functor = [&, bwd_desc, fwd_desc, sigmoid_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_sigmoid_backward(bwd_desc, fwd_desc, sigmoid_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
......
...@@ -84,17 +84,22 @@ namespace ngraph ...@@ -84,17 +84,22 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
// Slice needs 3 primitives: input, result, and reorder.
auto slice_index = mkldnn_emitter->build_slice( auto slice_index = mkldnn_emitter->reserve_primitive_space(3);
input_desc, result_desc, lower_bounds, out_shape);
auto& deps = mkldnn_emitter->get_primitive_deps(slice_index); auto& deps = mkldnn_emitter->get_primitive_deps(slice_index);
auto functor = [&, slice_index](CPURuntimeContext* ctx, auto functor =
CPUExecutionContext* ectx) { [&, input_desc, result_desc, lower_bounds, out_shape, slice_index](
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor); CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); if (ctx->first_iteration)
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, slice_index); {
}; mkldnn_emitter->build_slice(
input_desc, result_desc, lower_bounds, out_shape, slice_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, slice_index);
};
functors.emplace_back(functor); functors.emplace_back(functor);
} }
......
...@@ -46,23 +46,18 @@ namespace ngraph ...@@ -46,23 +46,18 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
if (axes.size() != 1)
{
throw ngraph_error("MKLDNN supports softmax only across single axis");
}
int softmax_axis = static_cast<int>(*(axes.begin()));
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto softmax_desc = mkldnn_emitter->get_softmax_forward_desc(node);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); // Softmax needs 3 primitives: input, result, and softmax_forward.
size_t softmax_index = mkldnn_emitter->reserve_primitive_space(3);
size_t softmax_index = mkldnn_emitter->build_softmax_forward(
input_desc, result_desc, softmax_axis);
auto& deps = mkldnn_emitter->get_primitive_deps(softmax_index); auto& deps = mkldnn_emitter->get_primitive_deps(softmax_index);
auto functor = [&, softmax_index](CPURuntimeContext* ctx, auto functor = [&, softmax_desc, softmax_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_softmax_forward(softmax_desc, softmax_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, softmax_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, softmax_index);
......
...@@ -131,6 +131,11 @@ size_t MKLDNNEmitter::build_memory_primitive(const mkldnn::memory::desc& desc) ...@@ -131,6 +131,11 @@ size_t MKLDNNEmitter::build_memory_primitive(const mkldnn::memory::desc& desc)
return index; return index;
} }
void MKLDNNEmitter::build_memory_primitive(const mkldnn::memory::desc& desc, size_t index)
{
m_mkldnn_primitives[index] = new mkldnn::memory({desc, executor::global_cpu_engine}, nullptr);
}
size_t MKLDNNEmitter::build_quantize_reorder(const mkldnn::memory::desc& input_desc, size_t MKLDNNEmitter::build_quantize_reorder(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
const std::vector<float>& scales) const std::vector<float>& scales)
...@@ -149,6 +154,27 @@ size_t MKLDNNEmitter::build_quantize_reorder(const mkldnn::memory::desc& input_d ...@@ -149,6 +154,27 @@ size_t MKLDNNEmitter::build_quantize_reorder(const mkldnn::memory::desc& input_d
return primitive_index; return primitive_index;
} }
void MKLDNNEmitter::build_quantize_reorder(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
const std::vector<float>& scales,
size_t quantize_index,
const int mask)
{
size_t input_index = m_primitive_deps[quantize_index][0];
build_memory_primitive(input_desc, input_index);
size_t result_index = m_primitive_deps[quantize_index][1];
build_memory_primitive(result_desc, result_index);
mkldnn::primitive_attr attr;
attr.set_output_scales(mask, scales);
attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
auto reorder_desc = mkldnn::reorder::primitive_desc({input_desc, executor::global_cpu_engine},
{result_desc, executor::global_cpu_engine},
attr);
m_mkldnn_primitives[quantize_index] = new mkldnn::reorder(
reorder_desc, *m_mkldnn_primitives[input_index], *m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_dequantization(const ngraph::Node* node, size_t MKLDNNEmitter::build_dequantization(const ngraph::Node* node,
const mkldnn::memory::desc& input_desc, const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc) const mkldnn::memory::desc& result_desc)
...@@ -478,6 +504,33 @@ size_t MKLDNNEmitter::build_convolution_backward_weights_bias( ...@@ -478,6 +504,33 @@ size_t MKLDNNEmitter::build_convolution_backward_weights_bias(
return conv_index; return conv_index;
} }
void MKLDNNEmitter::build_convolution_backward_weights_bias(
const mkldnn::convolution_backward_weights::desc& bwd_desc,
const mkldnn::convolution_forward::desc& fwd_desc,
size_t conv_index)
{
size_t in_data_index = m_primitive_deps[conv_index][0];
build_memory_primitive(bwd_desc.data.src_desc, in_data_index);
size_t in_delta_index = m_primitive_deps[conv_index][1];
build_memory_primitive(bwd_desc.data.diff_dst_desc, in_delta_index);
size_t out_weights_delta_index = m_primitive_deps[conv_index][2];
build_memory_primitive(bwd_desc.data.diff_weights_desc, out_weights_delta_index);
size_t out_bias_delta_index = m_primitive_deps[conv_index][3];
build_memory_primitive(bwd_desc.data.diff_bias_desc, out_bias_delta_index);
mkldnn::convolution_forward::primitive_desc fwd_pd{fwd_desc, executor::global_cpu_engine};
mkldnn::convolution_backward_weights::primitive_desc bwd_pd{
bwd_desc, executor::global_cpu_engine, fwd_pd};
m_mkldnn_primitives[conv_index] =
new mkldnn::convolution_backward_weights(bwd_pd,
*m_mkldnn_primitives[in_data_index],
*m_mkldnn_primitives[in_delta_index],
*m_mkldnn_primitives[out_weights_delta_index],
*m_mkldnn_primitives[out_bias_delta_index]);
}
size_t size_t
MKLDNNEmitter::build_convolution_backward_weights(const mkldnn::memory::desc& input_desc, MKLDNNEmitter::build_convolution_backward_weights(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& delta_desc, const mkldnn::memory::desc& delta_desc,
...@@ -522,6 +575,28 @@ size_t ...@@ -522,6 +575,28 @@ size_t
return primitive_index; return primitive_index;
} }
void MKLDNNEmitter::build_convolution_backward_weights(
const mkldnn::convolution_backward_weights::desc& bwd_desc,
const mkldnn::convolution_forward::desc& fwd_desc,
size_t conv_index)
{
size_t in_data_index = m_primitive_deps[conv_index][0];
build_memory_primitive(bwd_desc.data.src_desc, in_data_index);
size_t in_delta_index = m_primitive_deps[conv_index][1];
build_memory_primitive(bwd_desc.data.diff_dst_desc, in_delta_index);
size_t out_weights_delta_index = m_primitive_deps[conv_index][2];
build_memory_primitive(bwd_desc.data.diff_weights_desc, out_weights_delta_index);
m_mkldnn_primitives[conv_index] = new mkldnn::convolution_backward_weights(
{bwd_desc,
executor::global_cpu_engine,
// Forward primitive descriptor corresponding to this backward weights descriptor
{fwd_desc, executor::global_cpu_engine}},
*m_mkldnn_primitives[in_data_index],
*m_mkldnn_primitives[in_delta_index],
*m_mkldnn_primitives[out_weights_delta_index]);
}
size_t MKLDNNEmitter::build_convolution_backward_data(const mkldnn::memory::desc& weights_desc, size_t MKLDNNEmitter::build_convolution_backward_data(const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& delta_desc, const mkldnn::memory::desc& delta_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
...@@ -565,6 +640,28 @@ size_t MKLDNNEmitter::build_convolution_backward_data(const mkldnn::memory::desc ...@@ -565,6 +640,28 @@ size_t MKLDNNEmitter::build_convolution_backward_data(const mkldnn::memory::desc
return primitive_index; return primitive_index;
} }
void MKLDNNEmitter::build_convolution_backward_data(
const mkldnn::convolution_backward_data::desc& bwd_desc,
const mkldnn::convolution_forward::desc& fwd_desc,
size_t conv_index)
{
size_t weights_index = m_primitive_deps[conv_index][0];
build_memory_primitive(bwd_desc.data.weights_desc, weights_index);
size_t delta_index = m_primitive_deps[conv_index][1];
build_memory_primitive(bwd_desc.data.diff_dst_desc, delta_index);
size_t result_index = m_primitive_deps[conv_index][2];
build_memory_primitive(bwd_desc.data.diff_src_desc, result_index);
m_mkldnn_primitives[conv_index] = new mkldnn::convolution_backward_data(
{bwd_desc,
executor::global_cpu_engine,
// Forward primitive descriptor corresponding to this backward data descriptor
{fwd_desc, executor::global_cpu_engine}},
*m_mkldnn_primitives[delta_index],
*m_mkldnn_primitives[weights_index],
*m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_pooling_forward(mkldnn::algorithm pooling_algorithm, size_t MKLDNNEmitter::build_pooling_forward(mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& input_desc, const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
...@@ -594,6 +691,20 @@ size_t MKLDNNEmitter::build_pooling_forward(mkldnn::algorithm pooling_algorithm, ...@@ -594,6 +691,20 @@ size_t MKLDNNEmitter::build_pooling_forward(mkldnn::algorithm pooling_algorithm,
return primitive_index; return primitive_index;
} }
void MKLDNNEmitter::build_pooling_forward(const mkldnn::pooling_forward::desc& pool_desc,
size_t pool_index)
{
size_t input_index = m_primitive_deps[pool_index][0];
build_memory_primitive(pool_desc.data.src_desc, input_index);
size_t result_index = m_primitive_deps[pool_index][1];
build_memory_primitive(pool_desc.data.dst_desc, result_index);
m_mkldnn_primitives[pool_index] =
new mkldnn::pooling_forward({pool_desc, executor::global_cpu_engine},
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_pooling_backward(mkldnn::algorithm pooling_algorithm, size_t MKLDNNEmitter::build_pooling_backward(mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& diff_dst_desc, const mkldnn::memory::desc& diff_dst_desc,
const mkldnn::memory::desc& diff_src_desc, const mkldnn::memory::desc& diff_src_desc,
...@@ -632,6 +743,24 @@ size_t MKLDNNEmitter::build_pooling_backward(mkldnn::algorithm pooling_algorithm ...@@ -632,6 +743,24 @@ size_t MKLDNNEmitter::build_pooling_backward(mkldnn::algorithm pooling_algorithm
return primitive_index; return primitive_index;
} }
void MKLDNNEmitter::build_pooling_backward(const mkldnn::pooling_backward::desc& pool_desc,
const mkldnn::pooling_forward::desc& pool_fwd_desc,
size_t pool_index)
{
size_t input_index = m_primitive_deps[pool_index][0];
build_memory_primitive(pool_desc.data.diff_dst_desc, input_index);
size_t result_index = m_primitive_deps[pool_index][1];
build_memory_primitive(pool_desc.data.diff_src_desc, result_index);
auto pool_fwd_pd =
mkldnn::pooling_forward::primitive_desc(pool_fwd_desc, executor::global_cpu_engine);
auto pool_pd = mkldnn::pooling_backward::primitive_desc(
pool_desc, executor::global_cpu_engine, pool_fwd_pd);
m_mkldnn_primitives[pool_index] = new mkldnn::pooling_backward(
pool_pd, *m_mkldnn_primitives[input_index], *m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_max_pooling_backward(mkldnn::algorithm pooling_algorithm, size_t MKLDNNEmitter::build_max_pooling_backward(mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& fprop_src_desc, const mkldnn::memory::desc& fprop_src_desc,
const mkldnn::memory::desc& diff_dst_desc, const mkldnn::memory::desc& diff_dst_desc,
...@@ -693,6 +822,48 @@ size_t MKLDNNEmitter::build_max_pooling_backward(mkldnn::algorithm pooling_algor ...@@ -693,6 +822,48 @@ size_t MKLDNNEmitter::build_max_pooling_backward(mkldnn::algorithm pooling_algor
return bwd_primitive_index; return bwd_primitive_index;
} }
void MKLDNNEmitter::build_max_pooling_backward(const mkldnn::pooling_backward::desc& bwd_pool_desc,
const mkldnn::pooling_forward::desc& fwd_pool_desc,
const mkldnn::memory::desc& fprop_src_desc,
size_t fwd_pool_index,
size_t bwd_pool_index)
{
size_t fprop_src_index = m_primitive_deps[fwd_pool_index][0];
build_memory_primitive(fprop_src_desc, fprop_src_index);
size_t diff_dst_index = m_primitive_deps[bwd_pool_index][0];
build_memory_primitive(bwd_pool_desc.data.diff_dst_desc, diff_dst_index);
size_t diff_src_index = m_primitive_deps[fwd_pool_index][1];
build_memory_primitive(bwd_pool_desc.data.diff_src_desc, diff_src_index);
m_primitive_deps[bwd_pool_index][2] = diff_src_index;
mkldnn::pooling_forward::primitive_desc fwd_pd{fwd_pool_desc, executor::global_cpu_engine};
size_t ws_index = m_primitive_deps[fwd_pool_index][1];
build_memory_primitive(fwd_pd.workspace_primitive_desc().desc(), ws_index);
m_primitive_deps[bwd_pool_index][1] = ws_index;
// Allocate workspace
// TODO (jbobba): Might need to align memory
auto ws = std::unique_ptr<MKLDNNWorkspace>(
new MKLDNNWorkspace(fwd_pd.workspace_primitive_desc().get_size()));
auto ws_buf_index = insert_workspace(ws);
m_primitive_deps[fwd_pool_index][3] = ws_buf_index;
m_primitive_deps[bwd_pool_index][3] = ws_buf_index;
m_mkldnn_primitives[fwd_pool_index] = new mkldnn::pooling_forward(
fwd_pd,
*m_mkldnn_primitives[fprop_src_index],
*m_mkldnn_primitives
[diff_src_index], // HACK - Uses diff_src buffer. Safe since diff_src > fprop_dst
*m_mkldnn_primitives[ws_index]);
m_mkldnn_primitives[bwd_pool_index] =
new mkldnn::pooling_backward({bwd_pool_desc, executor::global_cpu_engine, fwd_pd},
*m_mkldnn_primitives[diff_dst_index],
*m_mkldnn_primitives[ws_index],
*m_mkldnn_primitives[diff_src_index]);
}
size_t MKLDNNEmitter::build_max_pooling_with_indices_forward(mkldnn::algorithm pooling_algorithm, size_t MKLDNNEmitter::build_max_pooling_with_indices_forward(mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& src_desc, const mkldnn::memory::desc& src_desc,
const mkldnn::memory::desc& dst_desc, const mkldnn::memory::desc& dst_desc,
...@@ -728,6 +899,26 @@ size_t MKLDNNEmitter::build_max_pooling_with_indices_forward(mkldnn::algorithm p ...@@ -728,6 +899,26 @@ size_t MKLDNNEmitter::build_max_pooling_with_indices_forward(mkldnn::algorithm p
return fwd_primitive_index; return fwd_primitive_index;
} }
void MKLDNNEmitter::build_max_pooling_with_indices_forward(
const mkldnn::pooling_forward::desc& max_pool_desc, size_t max_pool_index)
{
size_t src_index = m_primitive_deps[max_pool_index][0];
build_memory_primitive(max_pool_desc.data.src_desc, src_index);
size_t dst_index = m_primitive_deps[max_pool_index][1];
build_memory_primitive(max_pool_desc.data.dst_desc, dst_index);
mkldnn::pooling_forward::primitive_desc fwd_pd{max_pool_desc, executor::global_cpu_engine};
size_t ws_index = m_primitive_deps[max_pool_index][2];
build_memory_primitive(fwd_pd.workspace_primitive_desc().desc(), ws_index);
m_mkldnn_primitives[max_pool_index] =
new mkldnn::pooling_forward(fwd_pd,
*m_mkldnn_primitives[src_index],
*m_mkldnn_primitives[dst_index],
*m_mkldnn_primitives[ws_index]);
}
size_t MKLDNNEmitter::build_max_pooling_with_indices_backward( size_t MKLDNNEmitter::build_max_pooling_with_indices_backward(
mkldnn::algorithm pooling_algorithm, mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& diff_dst_desc, const mkldnn::memory::desc& diff_dst_desc,
...@@ -773,6 +964,28 @@ size_t MKLDNNEmitter::build_max_pooling_with_indices_backward( ...@@ -773,6 +964,28 @@ size_t MKLDNNEmitter::build_max_pooling_with_indices_backward(
return bwd_primitive_index; return bwd_primitive_index;
} }
void MKLDNNEmitter::build_max_pooling_with_indices_backward(
const mkldnn::pooling_backward::desc& bwd_pool_desc,
const mkldnn::pooling_forward::desc& fwd_pool_desc,
size_t max_pool_index)
{
size_t diff_dst_index = m_primitive_deps[max_pool_index][0];
build_memory_primitive(bwd_pool_desc.data.diff_dst_desc, diff_dst_index);
size_t diff_src_index = m_primitive_deps[max_pool_index][2];
build_memory_primitive(bwd_pool_desc.data.diff_src_desc, diff_src_index);
mkldnn::pooling_forward::primitive_desc fwd_pd{fwd_pool_desc, executor::global_cpu_engine};
size_t fprop_ws_index = m_primitive_deps[max_pool_index][1];
build_memory_primitive(fwd_pd.workspace_primitive_desc().desc(), fprop_ws_index);
m_mkldnn_primitives[max_pool_index] =
new mkldnn::pooling_backward({bwd_pool_desc, executor::global_cpu_engine, fwd_pd},
*m_mkldnn_primitives[diff_dst_index],
*m_mkldnn_primitives[fprop_ws_index],
*m_mkldnn_primitives[diff_src_index]);
}
size_t MKLDNNEmitter::build_reorder(const mkldnn::memory::desc& input_desc, size_t MKLDNNEmitter::build_reorder(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc) const mkldnn::memory::desc& result_desc)
{ {
...@@ -794,6 +1007,19 @@ size_t MKLDNNEmitter::build_reorder(const mkldnn::memory::desc& input_desc, ...@@ -794,6 +1007,19 @@ size_t MKLDNNEmitter::build_reorder(const mkldnn::memory::desc& input_desc,
return primitive_index; return primitive_index;
} }
void MKLDNNEmitter::build_reorder(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
size_t reorder_index)
{
size_t input_index = m_primitive_deps[reorder_index][0];
build_memory_primitive(input_desc, input_index);
size_t result_index = m_primitive_deps[reorder_index][1];
build_memory_primitive(result_desc, result_index);
m_mkldnn_primitives[reorder_index] =
new mkldnn::reorder(*m_mkldnn_primitives[input_index], *m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_lrn_forward(const mkldnn::memory::desc& input_desc, size_t MKLDNNEmitter::build_lrn_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
float alpha, float alpha,
...@@ -820,6 +1046,39 @@ size_t MKLDNNEmitter::build_lrn_forward(const mkldnn::memory::desc& input_desc, ...@@ -820,6 +1046,39 @@ size_t MKLDNNEmitter::build_lrn_forward(const mkldnn::memory::desc& input_desc,
return primitive_index; return primitive_index;
} }
mkldnn::lrn_forward::desc MKLDNNEmitter::get_lrn_forward_desc(const ngraph::Node* node)
{
const ngraph::op::LRN* lrn = static_cast<const ngraph::op::LRN*>(node);
auto alpha = static_cast<float>(lrn->get_alpha());
auto beta = static_cast<float>(lrn->get_beta());
auto bias = static_cast<float>(lrn->get_bias());
auto nsize = static_cast<int>(lrn->get_nsize());
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
return mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring,
mkldnn::algorithm::lrn_across_channels,
input_desc,
nsize,
alpha,
beta,
bias);
}
void MKLDNNEmitter::build_lrn_forward(const mkldnn::lrn_forward::desc& lrn_desc, size_t lrn_index)
{
size_t input_index = m_primitive_deps[lrn_index][0];
build_memory_primitive(lrn_desc.data.data_desc, input_index);
size_t result_index = m_primitive_deps[lrn_index][1];
build_memory_primitive(lrn_desc.data.data_desc, result_index);
auto lrn_prim_desc = mkldnn::lrn_forward::primitive_desc(lrn_desc, executor::global_cpu_engine);
m_mkldnn_primitives[lrn_index] = new mkldnn::lrn_forward(
lrn_prim_desc, *m_mkldnn_primitives[input_index], *m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_relu_forward(const mkldnn::memory::desc& input_desc, size_t MKLDNNEmitter::build_relu_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc) const mkldnn::memory::desc& result_desc)
{ {
...@@ -838,6 +1097,30 @@ size_t MKLDNNEmitter::build_relu_forward(const mkldnn::memory::desc& input_desc, ...@@ -838,6 +1097,30 @@ size_t MKLDNNEmitter::build_relu_forward(const mkldnn::memory::desc& input_desc,
return primitive_index; return primitive_index;
} }
mkldnn::eltwise_forward::desc MKLDNNEmitter::get_relu_forward_desc(const ngraph::Node* node)
{
const float negative_slope = 0.0f;
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
return mkldnn::eltwise_forward::desc(
mkldnn::prop_kind::forward, mkldnn::algorithm::eltwise_relu, input_desc, negative_slope);
}
void MKLDNNEmitter::build_relu_forward(const mkldnn::eltwise_forward::desc& relu_desc,
size_t relu_index)
{
size_t input_index = m_primitive_deps[relu_index][0];
build_memory_primitive(relu_desc.data.data_desc, input_index);
size_t result_index = m_primitive_deps[relu_index][1];
build_memory_primitive(relu_desc.data.data_desc, result_index);
m_mkldnn_primitives[relu_index] =
new mkldnn::eltwise_forward({relu_desc, executor::global_cpu_engine},
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_relu_backward(const mkldnn::memory::desc& input_desc, size_t MKLDNNEmitter::build_relu_backward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& delta_desc, const mkldnn::memory::desc& delta_desc,
const mkldnn::memory::desc& result_desc) const mkldnn::memory::desc& result_desc)
...@@ -868,6 +1151,41 @@ size_t MKLDNNEmitter::build_relu_backward(const mkldnn::memory::desc& input_desc ...@@ -868,6 +1151,41 @@ size_t MKLDNNEmitter::build_relu_backward(const mkldnn::memory::desc& input_desc
return primitive_index; return primitive_index;
} }
mkldnn::eltwise_backward::desc MKLDNNEmitter::get_relu_backward_desc(const ngraph::Node* node)
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
const float negative_slope = 0.0f;
return mkldnn::eltwise_backward::desc(
mkldnn::algorithm::eltwise_relu, result_desc, input_desc, negative_slope);
}
void MKLDNNEmitter::build_relu_backward(const mkldnn::eltwise_backward::desc& bwd_desc,
const mkldnn::eltwise_forward::desc& fwd_desc,
size_t relu_index)
{
size_t input_index = m_primitive_deps[relu_index][0];
build_memory_primitive(bwd_desc.data.data_desc, input_index);
size_t delta_index = m_primitive_deps[relu_index][1];
build_memory_primitive(bwd_desc.data.diff_data_desc, delta_index);
size_t result_index = m_primitive_deps[relu_index][2];
build_memory_primitive(bwd_desc.data.data_desc, result_index);
/* create forward relu primitive descriptor*/
auto relu_pd = mkldnn::eltwise_forward::primitive_desc(fwd_desc, executor::global_cpu_engine);
/* create backward relu primitive_descriptor */
auto relu_bwd_pd =
mkldnn::eltwise_backward::primitive_desc(bwd_desc, executor::global_cpu_engine, relu_pd);
m_mkldnn_primitives[relu_index] =
new mkldnn::eltwise_backward(relu_bwd_pd,
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[delta_index],
*m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_sigmoid_forward(const mkldnn::memory::desc& input_desc, size_t MKLDNNEmitter::build_sigmoid_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc) const mkldnn::memory::desc& result_desc)
{ {
...@@ -888,6 +1206,40 @@ size_t MKLDNNEmitter::build_sigmoid_forward(const mkldnn::memory::desc& input_de ...@@ -888,6 +1206,40 @@ size_t MKLDNNEmitter::build_sigmoid_forward(const mkldnn::memory::desc& input_de
return primitive_index; return primitive_index;
} }
mkldnn::eltwise_forward::desc MKLDNNEmitter::get_sigmoid_forward_desc(const ngraph::Node* node,
bool backward_op)
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
if (backward_op)
{
return mkldnn::eltwise_forward::desc(
mkldnn::prop_kind::forward, mkldnn::algorithm::eltwise_logistic, input_desc, 0, 0);
}
else
{
return mkldnn::eltwise_forward::desc(mkldnn::prop_kind::forward_training,
mkldnn::algorithm::eltwise_logistic,
input_desc,
0,
0);
}
}
void MKLDNNEmitter::build_sigmoid_forward(const mkldnn::eltwise_forward::desc& sigmoid_desc,
size_t sigmoid_index)
{
size_t input_index = m_primitive_deps[sigmoid_index][0];
build_memory_primitive(sigmoid_desc.data.data_desc, input_index);
size_t result_index = m_primitive_deps[sigmoid_index][1];
build_memory_primitive(sigmoid_desc.data.data_desc, result_index);
m_mkldnn_primitives[sigmoid_index] =
new mkldnn::eltwise_forward({sigmoid_desc, executor::global_cpu_engine},
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_sigmoid_backward(const mkldnn::memory::desc& input_desc, size_t MKLDNNEmitter::build_sigmoid_backward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& delta_desc, const mkldnn::memory::desc& delta_desc,
const mkldnn::memory::desc& result_desc) const mkldnn::memory::desc& result_desc)
...@@ -914,6 +1266,37 @@ size_t MKLDNNEmitter::build_sigmoid_backward(const mkldnn::memory::desc& input_d ...@@ -914,6 +1266,37 @@ size_t MKLDNNEmitter::build_sigmoid_backward(const mkldnn::memory::desc& input_d
return primitive_index; return primitive_index;
} }
mkldnn::eltwise_backward::desc MKLDNNEmitter::get_sigmoid_backward_desc(const ngraph::Node* node)
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
return mkldnn::eltwise_backward::desc(
mkldnn::algorithm::eltwise_logistic, delta_desc, input_desc, 0, 0);
}
void MKLDNNEmitter::build_sigmoid_backward(const mkldnn::eltwise_backward::desc& bwd_desc,
const mkldnn::eltwise_forward::desc& fwd_desc,
size_t sigmoid_index)
{
size_t input_index = m_primitive_deps[sigmoid_index][0];
build_memory_primitive(bwd_desc.data.data_desc, input_index);
size_t delta_index = m_primitive_deps[sigmoid_index][1];
build_memory_primitive(bwd_desc.data.diff_data_desc, delta_index);
size_t result_index = m_primitive_deps[sigmoid_index][2];
build_memory_primitive(bwd_desc.data.data_desc, result_index);
// sigmoid forward primitive desc
mkldnn::eltwise_forward::primitive_desc sigmoid_fwd_pd =
mkldnn::eltwise_forward::primitive_desc(fwd_desc, executor::global_cpu_engine);
m_mkldnn_primitives[sigmoid_index] =
new mkldnn::eltwise_backward({bwd_desc, executor::global_cpu_engine, sigmoid_fwd_pd},
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[delta_index],
*m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_elementwise_add( size_t MKLDNNEmitter::build_elementwise_add(
const mkldnn::memory::desc& input0_data_desc, const mkldnn::memory::desc& input0_data_desc,
const mkldnn::memory::desc& input1_data_desc, const mkldnn::memory::desc& input1_data_desc,
...@@ -942,6 +1325,45 @@ size_t MKLDNNEmitter::build_elementwise_add( ...@@ -942,6 +1325,45 @@ size_t MKLDNNEmitter::build_elementwise_add(
return add_index; return add_index;
} }
mkldnn::sum::primitive_desc MKLDNNEmitter::get_elementwise_add_desc(const ngraph::Node* node)
{
std::vector<float> scale_vector(2, 1);
std::vector<mkldnn::memory::primitive_desc> inputs_pd;
auto input0_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto input1_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
inputs_pd.push_back(mkldnn::memory::primitive_desc(
input0_data_desc, ngraph::runtime::cpu::executor::global_cpu_engine));
inputs_pd.push_back(mkldnn::memory::primitive_desc(
input1_data_desc, ngraph::runtime::cpu::executor::global_cpu_engine));
// elementwise sum primtive descriptor
mkldnn::sum::primitive_desc sum_pd =
mkldnn::sum::primitive_desc(result_desc, scale_vector, inputs_pd);
return sum_pd;
}
void MKLDNNEmitter::build_elementwise_add(const mkldnn::sum::primitive_desc& sum_pd,
size_t add_index)
{
std::vector<mkldnn::memory::primitive::at> inputs_primitive;
size_t input0_data_index = m_primitive_deps[add_index][0];
build_memory_primitive(sum_pd.dst_primitive_desc().desc(), input0_data_index);
size_t input1_data_index = m_primitive_deps[add_index][1];
build_memory_primitive(sum_pd.dst_primitive_desc().desc(), input1_data_index);
size_t result_index = m_primitive_deps[add_index][2];
build_memory_primitive(sum_pd.dst_primitive_desc().desc(), result_index);
inputs_primitive.push_back(*m_mkldnn_primitives[input0_data_index]);
inputs_primitive.push_back(*m_mkldnn_primitives[input1_data_index]);
// sum primitive
m_mkldnn_primitives[add_index] =
new mkldnn::sum(sum_pd, inputs_primitive, *m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_batchnorm_forward(const mkldnn::memory::desc& input_desc, size_t MKLDNNEmitter::build_batchnorm_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& weights_desc, const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
...@@ -1002,6 +1424,60 @@ size_t MKLDNNEmitter::build_batchnorm_forward(const mkldnn::memory::desc& input_ ...@@ -1002,6 +1424,60 @@ size_t MKLDNNEmitter::build_batchnorm_forward(const mkldnn::memory::desc& input_
} }
} }
void MKLDNNEmitter::build_batchnorm_forward(
const mkldnn::batch_normalization_forward::desc& batchnorm_desc,
const mkldnn::memory::desc& weights_desc,
bool bn_training_flag,
size_t batchnorm_index,
const mkldnn::post_ops& pops)
{
size_t input_index = m_primitive_deps[batchnorm_index][0];
build_memory_primitive(batchnorm_desc.data.data_desc, input_index);
mkldnn::primitive_attr bn_attr;
bn_attr.set_post_ops(pops);
auto use_global_stats = batchnorm_desc.data.flags & 0x1U;
if (bn_training_flag && !use_global_stats)
{
size_t weights_index = m_primitive_deps[batchnorm_index][1];
build_memory_primitive(weights_desc, weights_index);
size_t result_index = m_primitive_deps[batchnorm_index][2];
build_memory_primitive(batchnorm_desc.data.data_desc, result_index);
size_t mean_index = m_primitive_deps[batchnorm_index][3];
build_memory_primitive(batchnorm_desc.data.mean_desc, mean_index);
size_t variance_index = m_primitive_deps[batchnorm_index][4];
build_memory_primitive(batchnorm_desc.data.variance_desc, variance_index);
m_mkldnn_primitives[batchnorm_index] = new mkldnn::batch_normalization_forward(
{batchnorm_desc, bn_attr, executor::global_cpu_engine},
mkldnn::primitive::at(*m_mkldnn_primitives[input_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[weights_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[result_index]),
*m_mkldnn_primitives[mean_index],
*m_mkldnn_primitives[variance_index]);
}
else
{
size_t weights_index = m_primitive_deps[batchnorm_index][3];
build_memory_primitive(weights_desc, weights_index);
size_t result_index = m_primitive_deps[batchnorm_index][4];
build_memory_primitive(batchnorm_desc.data.data_desc, result_index);
size_t mean_index = m_primitive_deps[batchnorm_index][1];
build_memory_primitive(batchnorm_desc.data.mean_desc, mean_index);
size_t variance_index = m_primitive_deps[batchnorm_index][2];
build_memory_primitive(batchnorm_desc.data.variance_desc, variance_index);
m_mkldnn_primitives[batchnorm_index] = new mkldnn::batch_normalization_forward(
{batchnorm_desc, bn_attr, executor::global_cpu_engine},
mkldnn::primitive::at(*m_mkldnn_primitives[input_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[mean_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[variance_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[weights_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[result_index]));
}
}
size_t MKLDNNEmitter::build_batchnorm_backward(const mkldnn::memory::desc& weights_desc, size_t MKLDNNEmitter::build_batchnorm_backward(const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& input_desc, const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& mean_desc, const mkldnn::memory::desc& mean_desc,
...@@ -1049,6 +1525,62 @@ size_t MKLDNNEmitter::build_batchnorm_backward(const mkldnn::memory::desc& weigh ...@@ -1049,6 +1525,62 @@ size_t MKLDNNEmitter::build_batchnorm_backward(const mkldnn::memory::desc& weigh
return batchnorm_index; return batchnorm_index;
} }
mkldnn::batch_normalization_backward::desc
MKLDNNEmitter::get_batchnorm_backward_desc(const ngraph::Node* node)
{
const ngraph::op::BatchNormTrainingBackprop* batchnorm =
static_cast<const ngraph::op::BatchNormTrainingBackprop*>(node);
auto eps = batchnorm->get_eps_value();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 5);
return mkldnn::batch_normalization_backward::desc(
mkldnn::prop_kind::backward,
delta_desc,
input_desc,
eps,
mkldnn::batch_normalization_flag::use_scale_shift);
}
void MKLDNNEmitter::build_batchnorm_backward(
const mkldnn::batch_normalization_backward::desc& batchnorm_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& dweights_desc,
size_t batchnorm_index)
{
size_t weights_index = m_primitive_deps[batchnorm_index][0];
build_memory_primitive(weights_desc, weights_index);
size_t input_index = m_primitive_deps[batchnorm_index][1];
build_memory_primitive(batchnorm_desc.data.data_desc, input_index);
size_t mean_index = m_primitive_deps[batchnorm_index][2];
build_memory_primitive(batchnorm_desc.data.mean_desc, mean_index);
size_t variance_index = m_primitive_deps[batchnorm_index][3];
build_memory_primitive(batchnorm_desc.data.variance_desc, variance_index);
size_t delta_index = m_primitive_deps[batchnorm_index][4];
build_memory_primitive(batchnorm_desc.data.diff_data_desc, delta_index);
size_t dinput_index = m_primitive_deps[batchnorm_index][5];
build_memory_primitive(batchnorm_desc.data.data_desc, dinput_index);
size_t dweights_index = m_primitive_deps[batchnorm_index][6];
build_memory_primitive(dweights_desc, dweights_index);
m_mkldnn_primitives[batchnorm_index] = new mkldnn::batch_normalization_backward(
{batchnorm_desc,
executor::global_cpu_engine,
{{mkldnn::prop_kind::forward_training,
batchnorm_desc.data.data_desc,
static_cast<double>(batchnorm_desc.data.batch_norm_epsilon),
mkldnn::batch_normalization_flag::use_scale_shift},
executor::global_cpu_engine}},
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[mean_index],
*m_mkldnn_primitives[variance_index],
*m_mkldnn_primitives[delta_index],
*m_mkldnn_primitives[weights_index],
*m_mkldnn_primitives[dinput_index],
*m_mkldnn_primitives[dweights_index]);
}
size_t MKLDNNEmitter::build_rnn_forward(const mkldnn::memory::desc& src_layer_desc, size_t MKLDNNEmitter::build_rnn_forward(const mkldnn::memory::desc& src_layer_desc,
const mkldnn::memory::desc& src_iter_desc, const mkldnn::memory::desc& src_iter_desc,
const mkldnn::memory::desc& weights_layer_desc, const mkldnn::memory::desc& weights_layer_desc,
...@@ -1085,6 +1617,7 @@ size_t MKLDNNEmitter::build_rnn_forward(const mkldnn::memory::desc& src_layer_de ...@@ -1085,6 +1617,7 @@ size_t MKLDNNEmitter::build_rnn_forward(const mkldnn::memory::desc& src_layer_de
build_memory_primitive(rnn_layer_prim_desc.workspace_primitive_desc().desc()); build_memory_primitive(rnn_layer_prim_desc.workspace_primitive_desc().desc());
auto workspace = std::unique_ptr<MKLDNNWorkspace>( auto workspace = std::unique_ptr<MKLDNNWorkspace>(
new MKLDNNWorkspace(rnn_layer_prim_desc.workspace_primitive_desc().get_size())); new MKLDNNWorkspace(rnn_layer_prim_desc.workspace_primitive_desc().get_size()));
auto workspace_buf_index = insert_workspace(workspace); auto workspace_buf_index = insert_workspace(workspace);
size_t rnn_index = insert_primitive(new mkldnn::rnn_forward( size_t rnn_index = insert_primitive(new mkldnn::rnn_forward(
...@@ -1110,6 +1643,44 @@ size_t MKLDNNEmitter::build_rnn_forward(const mkldnn::memory::desc& src_layer_de ...@@ -1110,6 +1643,44 @@ size_t MKLDNNEmitter::build_rnn_forward(const mkldnn::memory::desc& src_layer_de
return rnn_index; return rnn_index;
} }
void MKLDNNEmitter::build_rnn_forward(const mkldnn::rnn_forward::desc& rnn_desc, size_t rnn_index)
{
size_t src_layer_index = m_primitive_deps[rnn_index][0];
build_memory_primitive(rnn_desc.data.src_layer_desc, src_layer_index);
size_t src_iter_index = m_primitive_deps[rnn_index][1];
build_memory_primitive(rnn_desc.data.src_iter_desc, src_iter_index);
size_t weights_layer_index = m_primitive_deps[rnn_index][2];
build_memory_primitive(rnn_desc.data.weights_layer_desc, weights_layer_index);
size_t weights_iter_index = m_primitive_deps[rnn_index][3];
build_memory_primitive(rnn_desc.data.weights_iter_desc, weights_iter_index);
size_t bias_index = m_primitive_deps[rnn_index][4];
build_memory_primitive(rnn_desc.data.bias_desc, bias_index);
size_t dst_layer_index = m_primitive_deps[rnn_index][5];
build_memory_primitive(rnn_desc.data.dst_layer_desc, dst_layer_index);
size_t dst_iter_index = m_primitive_deps[rnn_index][6];
build_memory_primitive(rnn_desc.data.dst_iter_desc, dst_iter_index);
auto rnn_layer_prim_desc =
mkldnn::rnn_forward::primitive_desc(rnn_desc, executor::global_cpu_engine);
size_t workspace_index = m_primitive_deps[rnn_index][7];
build_memory_primitive(rnn_layer_prim_desc.workspace_primitive_desc().desc(), workspace_index);
auto workspace = std::unique_ptr<MKLDNNWorkspace>(
new MKLDNNWorkspace(rnn_layer_prim_desc.workspace_primitive_desc().get_size()));
auto workspace_buf_index = insert_workspace(workspace);
m_primitive_deps[rnn_index][8] = workspace_buf_index;
m_mkldnn_primitives[rnn_index] =
new mkldnn::rnn_forward(rnn_layer_prim_desc,
mkldnn::primitive::at(*m_mkldnn_primitives[src_layer_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[src_iter_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[weights_layer_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[weights_iter_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[bias_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[dst_layer_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[dst_iter_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[workspace_index]));
}
size_t MKLDNNEmitter::build_concat(const std::vector<mkldnn::memory::desc>& inputs_data_desc, size_t MKLDNNEmitter::build_concat(const std::vector<mkldnn::memory::desc>& inputs_data_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
const size_t concat_dim) const size_t concat_dim)
...@@ -1148,6 +1719,53 @@ size_t MKLDNNEmitter::build_concat(const std::vector<mkldnn::memory::desc>& inpu ...@@ -1148,6 +1719,53 @@ size_t MKLDNNEmitter::build_concat(const std::vector<mkldnn::memory::desc>& inpu
return concat_index; return concat_index;
} }
mkldnn::concat::primitive_desc MKLDNNEmitter::get_concat_desc(const ngraph::Node* node,
size_t nargs)
{
auto concat = static_cast<const ngraph::op::Concat*>(node);
std::vector<mkldnn::memory::primitive_desc> inputs_pd;
for (size_t i = 0; i < nargs; i++)
{
inputs_pd.push_back(mkldnn::memory::primitive_desc(
mkldnn_utils::get_input_mkldnn_md(node, i), runtime::cpu::executor::global_cpu_engine));
}
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t concat_dim = concat->get_concatenation_axis();
// concat primtive descriptor
return mkldnn::concat::primitive_desc(result_desc, static_cast<int>(concat_dim), inputs_pd);
}
void MKLDNNEmitter::build_concat(const mkldnn::concat::primitive_desc& concat_pd,
const std::vector<mkldnn::memory::desc>& inputs_data_desc,
size_t concat_index)
{
std::vector<mkldnn::memory::primitive::at> inputs_primitive;
std::vector<mkldnn::memory::primitive_desc> inputs_pd;
for (size_t i = 0; i < inputs_data_desc.size(); i++)
{
inputs_pd.push_back(mkldnn::memory::primitive_desc(
inputs_data_desc[i], runtime::cpu::executor::global_cpu_engine));
}
for (size_t i = 0; i < inputs_data_desc.size(); i++)
{
size_t inputs_data_index = m_primitive_deps[concat_index][i];
build_memory_primitive(inputs_data_desc[i], inputs_data_index);
inputs_primitive.push_back(*m_mkldnn_primitives[inputs_data_index]);
}
size_t result_index = m_primitive_deps[concat_index][inputs_data_desc.size()];
build_memory_primitive(concat_pd.dst_primitive_desc().desc(), result_index);
// concat primitive
m_mkldnn_primitives[concat_index] =
new mkldnn::concat(concat_pd, inputs_primitive, *m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_slice(const mkldnn::memory::desc& input_desc, size_t MKLDNNEmitter::build_slice(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
const ngraph::Coordinate& lower_bounds, const ngraph::Coordinate& lower_bounds,
...@@ -1179,6 +1797,35 @@ size_t MKLDNNEmitter::build_slice(const mkldnn::memory::desc& input_desc, ...@@ -1179,6 +1797,35 @@ size_t MKLDNNEmitter::build_slice(const mkldnn::memory::desc& input_desc,
return reorder_index; return reorder_index;
} }
void MKLDNNEmitter::build_slice(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Coordinate& lower_bounds,
const ngraph::Shape& result_shape,
size_t slice_index)
{
std::vector<size_t> in_out_index;
mkldnn::memory::primitive_desc input_pd =
mkldnn::memory::primitive_desc(input_desc, runtime::cpu::executor::global_cpu_engine);
size_t input_index = m_primitive_deps[slice_index][0];
build_memory_primitive(input_desc, input_index);
auto dims = mkldnn::memory::dims(result_shape.begin(), result_shape.end());
auto offsets = mkldnn::memory::dims(lower_bounds.begin(), lower_bounds.end());
auto view_pd = mkldnn::view::primitive_desc(input_pd, dims, offsets).dst_primitive_desc();
mkldnn::memory::primitive_desc result_pd =
mkldnn::memory::primitive_desc(result_desc, runtime::cpu::executor::global_cpu_engine);
size_t result_index = m_primitive_deps[slice_index][1];
build_memory_primitive(result_desc, result_index);
// reorder primitive descriptor
mkldnn::reorder::primitive_desc reorder_pd =
mkldnn::reorder::primitive_desc(view_pd, result_pd);
// reorder primitive
m_mkldnn_primitives[slice_index] = new mkldnn::reorder(
reorder_pd, *m_mkldnn_primitives[input_index], *m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_softmax_forward(const mkldnn::memory::desc& input_desc, size_t MKLDNNEmitter::build_softmax_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
int softmax_axis) int softmax_axis)
...@@ -1196,6 +1843,37 @@ size_t MKLDNNEmitter::build_softmax_forward(const mkldnn::memory::desc& input_de ...@@ -1196,6 +1843,37 @@ size_t MKLDNNEmitter::build_softmax_forward(const mkldnn::memory::desc& input_de
return primitive_index; return primitive_index;
} }
mkldnn::softmax_forward::desc MKLDNNEmitter::get_softmax_forward_desc(const ngraph::Node* node)
{
auto softmax = static_cast<const ngraph::op::Softmax*>(node);
auto axes = softmax->get_axes();
if (axes.size() != 1)
{
throw ngraph_error("MKLDNN supports softmax only across single axis");
}
int softmax_axis = static_cast<int>(*(axes.begin()));
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
return mkldnn::softmax_forward::desc(
mkldnn::prop_kind::forward_scoring, input_desc, softmax_axis);
}
void MKLDNNEmitter::build_softmax_forward(const mkldnn::softmax_forward::desc& softmax_desc,
size_t softmax_index)
{
size_t input_index = m_primitive_deps[softmax_index][0];
build_memory_primitive(softmax_desc.data.data_desc, input_index);
size_t result_index = m_primitive_deps[softmax_index][1];
build_memory_primitive(softmax_desc.data.data_desc, result_index);
m_mkldnn_primitives[softmax_index] =
new mkldnn::softmax_forward({softmax_desc, executor::global_cpu_engine},
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_leaky_relu(const mkldnn::memory::desc& input_desc, size_t MKLDNNEmitter::build_leaky_relu(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
float alpha) float alpha)
...@@ -1217,6 +1895,33 @@ size_t MKLDNNEmitter::build_leaky_relu(const mkldnn::memory::desc& input_desc, ...@@ -1217,6 +1895,33 @@ size_t MKLDNNEmitter::build_leaky_relu(const mkldnn::memory::desc& input_desc,
return primitive_index; return primitive_index;
} }
mkldnn::eltwise_forward::desc MKLDNNEmitter::get_leaky_relu_desc(const ngraph::Node* node)
{
auto alpha = static_cast<const op::LeakyRelu*>(node)->get_alpha();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
return mkldnn::eltwise_forward::desc(mkldnn::prop_kind::forward_training,
mkldnn::algorithm::eltwise_relu,
input_desc,
alpha,
0.0f);
}
void MKLDNNEmitter::build_leaky_relu(const mkldnn::eltwise_forward::desc& leaky_relu_desc,
size_t leaky_relu_index)
{
size_t input_index = m_primitive_deps[leaky_relu_index][0];
build_memory_primitive(leaky_relu_desc.data.data_desc, input_index);
size_t result_index = m_primitive_deps[leaky_relu_index][1];
build_memory_primitive(leaky_relu_desc.data.data_desc, result_index);
m_mkldnn_primitives[leaky_relu_index] =
new mkldnn::eltwise_forward({leaky_relu_desc, executor::global_cpu_engine},
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_bounded_relu(const mkldnn::memory::desc& input_desc, size_t MKLDNNEmitter::build_bounded_relu(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
float alpha) float alpha)
...@@ -1238,6 +1943,33 @@ size_t MKLDNNEmitter::build_bounded_relu(const mkldnn::memory::desc& input_desc, ...@@ -1238,6 +1943,33 @@ size_t MKLDNNEmitter::build_bounded_relu(const mkldnn::memory::desc& input_desc,
return primitive_index; return primitive_index;
} }
mkldnn::eltwise_forward::desc MKLDNNEmitter::get_bounded_relu_desc(const ngraph::Node* node)
{
auto alpha = static_cast<const op::BoundedRelu*>(node)->get_alpha();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
return mkldnn::eltwise_forward::desc(mkldnn::prop_kind::forward_training,
mkldnn::algorithm::eltwise_bounded_relu,
input_desc,
alpha,
0.0f);
}
void MKLDNNEmitter::build_bounded_relu(const mkldnn::eltwise_forward::desc& bounded_relu_desc,
size_t bounded_relu_index)
{
size_t input_index = m_primitive_deps[bounded_relu_index][0];
build_memory_primitive(bounded_relu_desc.data.data_desc, input_index);
size_t result_index = m_primitive_deps[bounded_relu_index][1];
build_memory_primitive(bounded_relu_desc.data.data_desc, result_index);
m_mkldnn_primitives[bounded_relu_index] =
new mkldnn::eltwise_forward({bounded_relu_desc, executor::global_cpu_engine},
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::convolution_forward_init(bool with_bias) size_t MKLDNNEmitter::convolution_forward_init(bool with_bias)
{ {
size_t size = m_mkldnn_primitives.size(); size_t size = m_mkldnn_primitives.size();
...@@ -1255,3 +1987,18 @@ size_t MKLDNNEmitter::convolution_forward_init(bool with_bias) ...@@ -1255,3 +1987,18 @@ size_t MKLDNNEmitter::convolution_forward_init(bool with_bias)
} }
return m_mkldnn_primitives.size() - 1; return m_mkldnn_primitives.size() - 1;
} }
size_t MKLDNNEmitter::reserve_primitive_space(size_t count, bool new_workspace)
{
size_t size = m_mkldnn_primitives.size();
m_mkldnn_primitives.resize(size + count, nullptr);
for (auto i = 0; i < count - 1; i++)
{
m_primitive_deps[m_mkldnn_primitives.size() - 1].push_back(size + i);
}
if (new_workspace)
{
m_primitive_deps[m_mkldnn_primitives.size() - 1].push_back(0);
}
return m_mkldnn_primitives.size() - 1;
}
...@@ -14,6 +14,10 @@ ...@@ -14,6 +14,10 @@
// limitations under the License. // limitations under the License.
//***************************************************************************** //*****************************************************************************
// For direct execution, we reserve space for primitives then create those primitives the first
// time functor is called. This could be extended to create primitives when shapes are changed.
// Different ops need different numbers of primitives.
#pragma once #pragma once
#include <memory> #include <memory>
...@@ -25,11 +29,18 @@ ...@@ -25,11 +29,18 @@
#include "ngraph/coordinate_diff.hpp" #include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/op/avg_pool.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "ngraph/op/convolution.hpp" #include "ngraph/op/convolution.hpp"
#include "ngraph/op/experimental/quantized_avg_pool.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp" #include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp" #include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp" #include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/op/experimental/quantized_max_pool.hpp"
#include "ngraph/op/lrn.hpp"
#include "ngraph/op/max_pool.hpp"
#include "ngraph/op/softmax.hpp"
#include "ngraph/runtime/cpu/cpu_executor.hpp" #include "ngraph/runtime/cpu/cpu_executor.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp" #include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp" #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
...@@ -37,6 +48,9 @@ ...@@ -37,6 +48,9 @@
#include "ngraph/runtime/cpu/op/conv_add.hpp" #include "ngraph/runtime/cpu/op/conv_add.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp" #include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp" #include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/group_conv_bias.hpp"
#include "ngraph/runtime/cpu/op/leaky_relu.hpp"
#include "ngraph/runtime/cpu/op/rnn_utils.hpp" #include "ngraph/runtime/cpu/op/rnn_utils.hpp"
#include "ngraph/shape.hpp" #include "ngraph/shape.hpp"
#include "ngraph/strides.hpp" #include "ngraph/strides.hpp"
...@@ -75,6 +89,9 @@ namespace ngraph ...@@ -75,6 +89,9 @@ namespace ngraph
const std::vector<mkldnn::primitive*>& get_mkldnn_primitives() const; const std::vector<mkldnn::primitive*>& get_mkldnn_primitives() const;
const std::vector<char*>& get_mkldnn_workspaces(); const std::vector<char*>& get_mkldnn_workspaces();
// reserve the space for primitives for each op, different op requires different number of primitives.
// some ops require a new workspace.
size_t reserve_primitive_space(size_t count, bool new_workspace = false);
size_t insert_primitive(mkldnn::primitive* primitive); size_t insert_primitive(mkldnn::primitive* primitive);
size_t insert_workspace(std::unique_ptr<MKLDNNWorkspace>& workspace); size_t insert_workspace(std::unique_ptr<MKLDNNWorkspace>& workspace);
const std::vector<size_t>& get_primitive_deps(size_t index) const; const std::vector<size_t>& get_primitive_deps(size_t index) const;
...@@ -90,6 +107,7 @@ namespace ngraph ...@@ -90,6 +107,7 @@ namespace ngraph
const mkldnn::memory::dims& strides, const mkldnn::memory::dims& strides,
mkldnn::memory::data_type dtype) const; mkldnn::memory::data_type dtype) const;
size_t build_memory_primitive(const mkldnn::memory::desc& desc); size_t build_memory_primitive(const mkldnn::memory::desc& desc);
void build_memory_primitive(const mkldnn::memory::desc& desc, size_t index);
size_t build_convolution_forward(const mkldnn::memory::desc& input_data_desc, size_t build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc, const mkldnn::memory::desc& weights_desc,
...@@ -173,9 +191,13 @@ namespace ngraph ...@@ -173,9 +191,13 @@ namespace ngraph
// MKLDNN relies on named formats for kernel selection // MKLDNN relies on named formats for kernel selection
if (weights_desc.data.format == mkldnn_nchw) if (weights_desc.data.format == mkldnn_nchw)
{
weights_desc.data.format = mkldnn_oihw; weights_desc.data.format = mkldnn_oihw;
}
if (weights_desc.data.format == mkldnn_ncdhw) if (weights_desc.data.format == mkldnn_ncdhw)
{
weights_desc.data.format = mkldnn_oidhw; weights_desc.data.format = mkldnn_oidhw;
}
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
...@@ -284,6 +306,11 @@ namespace ngraph ...@@ -284,6 +306,11 @@ namespace ngraph
const ngraph::CoordinateDiff& padding_below, const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above); const ngraph::CoordinateDiff& padding_above);
void build_convolution_backward_weights(
const mkldnn::convolution_backward_weights::desc& bwd_desc,
const mkldnn::convolution_forward::desc& fwd_desc,
size_t conv_index);
size_t build_convolution_backward_data(const mkldnn::memory::desc& weights_desc, size_t build_convolution_backward_data(const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& delta_desc, const mkldnn::memory::desc& delta_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
...@@ -291,6 +318,12 @@ namespace ngraph ...@@ -291,6 +318,12 @@ namespace ngraph
const ngraph::Strides& dilation_strides, const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below, const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above); const ngraph::CoordinateDiff& padding_above);
void build_convolution_backward_data(
const mkldnn::convolution_backward_data::desc& bwd_desc,
const mkldnn::convolution_forward::desc& fwd_desc,
size_t conv_index);
/** /**
* Convolution + bias backprop for weights and bias * Convolution + bias backprop for weights and bias
*/ */
...@@ -304,6 +337,11 @@ namespace ngraph ...@@ -304,6 +337,11 @@ namespace ngraph
const ngraph::CoordinateDiff& ng_padding_below, const ngraph::CoordinateDiff& ng_padding_below,
const ngraph::CoordinateDiff& ng_padding_above); const ngraph::CoordinateDiff& ng_padding_above);
void build_convolution_backward_weights_bias(
const mkldnn::convolution_backward_weights::desc& bwd_desc,
const mkldnn::convolution_forward::desc& fwd_desc,
size_t conv_index);
template <typename OP> template <typename OP>
size_t build_convolution_backward(const ngraph::Node* node, size_t build_convolution_backward(const ngraph::Node* node,
const std::vector<TensorViewWrapper>& args, const std::vector<TensorViewWrapper>& args,
...@@ -326,9 +364,13 @@ namespace ngraph ...@@ -326,9 +364,13 @@ namespace ngraph
{ {
// MKLDNN relies on named formats for kernel selection // MKLDNN relies on named formats for kernel selection
if (arg0_desc.data.format == mkldnn_nchw) if (arg0_desc.data.format == mkldnn_nchw)
{
arg0_desc.data.format = mkldnn_oihw; arg0_desc.data.format = mkldnn_oihw;
}
if (arg0_desc.data.format == mkldnn_ncdhw) if (arg0_desc.data.format == mkldnn_ncdhw)
{
arg0_desc.data.format = mkldnn_oidhw; arg0_desc.data.format = mkldnn_oidhw;
}
return build_convolution_backward_data( return build_convolution_backward_data(
arg0_desc, arg0_desc,
...@@ -375,6 +417,102 @@ namespace ngraph ...@@ -375,6 +417,102 @@ namespace ngraph
const ngraph::Shape& padding_below, const ngraph::Shape& padding_below,
const ngraph::Shape& padding_above); const ngraph::Shape& padding_above);
template <typename OP>
mkldnn::pooling_forward::desc get_avg_pooling_forward_desc(const ngraph::Node* node,
bool training)
{
auto pool = static_cast<const OP*>(node);
auto window_shape = pool->get_window_shape();
auto window_strides = pool->get_window_movement_strides();
auto padding_below = pool->get_padding_below();
auto padding_above = pool->get_padding_above();
auto include_padding_in_avg_computation =
pool->get_include_padding_in_avg_computation();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
if (training)
{
return mkldnn::pooling_forward::desc(
mkldnn::prop_kind::forward_training,
(include_padding_in_avg_computation
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding),
result_desc,
input_desc,
mkldnn::memory::dims(window_strides.begin(), window_strides.end()),
mkldnn::memory::dims(window_shape.begin(), window_shape.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero);
}
else
{
return mkldnn::pooling_forward::desc(
mkldnn::prop_kind::forward_inference,
(include_padding_in_avg_computation
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding),
input_desc,
result_desc,
mkldnn::memory::dims(window_strides.begin(), window_strides.end()),
mkldnn::memory::dims(window_shape.begin(), window_shape.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero);
}
}
template <typename OP>
mkldnn::pooling_forward::desc get_max_pooling_forward_desc(const ngraph::Node* node,
bool training)
{
auto pool = static_cast<const OP*>(node);
auto window_shape = pool->get_window_shape();
auto window_strides = pool->get_window_movement_strides();
auto padding_below = pool->get_padding_below();
auto padding_above = pool->get_padding_above();
if (training)
{
auto diff_dst_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto diff_src_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn::pooling_forward::desc(
mkldnn::prop_kind::forward_training,
mkldnn::algorithm::pooling_max,
diff_src_desc,
diff_dst_desc,
mkldnn::memory::dims(window_strides.begin(), window_strides.end()),
mkldnn::memory::dims(window_shape.begin(), window_shape.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero);
}
else
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn::pooling_forward::desc(
mkldnn::prop_kind::forward_inference,
mkldnn::algorithm::pooling_max,
input_desc,
result_desc,
mkldnn::memory::dims(window_strides.begin(), window_strides.end()),
mkldnn::memory::dims(window_shape.begin(), window_shape.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero);
}
}
void build_pooling_forward(const mkldnn::pooling_forward::desc& pool_desc,
size_t pool_index);
size_t build_pooling_backward(mkldnn::algorithm pooling_algorithm, size_t build_pooling_backward(mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& diff_dst_desc, const mkldnn::memory::desc& diff_dst_desc,
const mkldnn::memory::desc& diff_src_desc, const mkldnn::memory::desc& diff_src_desc,
...@@ -383,6 +521,39 @@ namespace ngraph ...@@ -383,6 +521,39 @@ namespace ngraph
const ngraph::Shape& padding_below, const ngraph::Shape& padding_below,
const ngraph::Shape& padding_above); const ngraph::Shape& padding_above);
template <typename OP>
mkldnn::pooling_backward::desc
get_avg_pooling_backward_desc(const ngraph::Node* node)
{
auto pool = static_cast<const OP*>(node);
auto window_shape = pool->get_window_shape();
auto window_strides = pool->get_window_movement_strides();
auto padding_below = pool->get_padding_below();
auto padding_above = pool->get_padding_above();
auto include_padding_in_avg_computation =
pool->get_include_padding_in_avg_computation();
auto diff_dst_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto diff_src_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn::pooling_backward::desc(
(include_padding_in_avg_computation
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding),
diff_src_desc,
diff_dst_desc,
mkldnn::memory::dims(window_strides.begin(), window_strides.end()),
mkldnn::memory::dims(window_shape.begin(), window_shape.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero);
}
void build_pooling_backward(const mkldnn::pooling_backward::desc& pool_desc,
const mkldnn::pooling_forward::desc& pool_fwd_desc,
size_t pool_index);
size_t build_max_pooling_with_indices_forward(mkldnn::algorithm pooling_algorithm, size_t build_max_pooling_with_indices_forward(mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& src_desc, const mkldnn::memory::desc& src_desc,
const mkldnn::memory::desc& dst_desc, const mkldnn::memory::desc& dst_desc,
...@@ -391,6 +562,35 @@ namespace ngraph ...@@ -391,6 +562,35 @@ namespace ngraph
const ngraph::Shape& padding_below, const ngraph::Shape& padding_below,
const ngraph::Shape& padding_above); const ngraph::Shape& padding_above);
template <typename OP>
mkldnn::pooling_forward::desc
get_max_pooling_with_indices_forward_desc(const ngraph::Node* node)
{
auto pool = static_cast<const OP*>(node);
auto window_shape = pool->get_window_shape();
auto window_strides = pool->get_window_movement_strides();
auto padding_below = pool->get_padding_below();
auto padding_above = pool->get_padding_above();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn::pooling_forward::desc(
mkldnn::prop_kind::forward_training,
mkldnn::algorithm::pooling_max,
input_desc,
result_desc,
mkldnn::memory::dims(window_strides.begin(), window_strides.end()),
mkldnn::memory::dims(window_shape.begin(), window_shape.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero);
}
void build_max_pooling_with_indices_forward(
const mkldnn::pooling_forward::desc& max_pool_desc, size_t max_pool_index);
size_t build_max_pooling_backward(mkldnn::algorithm pooling_algorithm, size_t build_max_pooling_backward(mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& fprop_src_desc, const mkldnn::memory::desc& fprop_src_desc,
const mkldnn::memory::desc& diff_dst_desc, const mkldnn::memory::desc& diff_dst_desc,
...@@ -400,6 +600,37 @@ namespace ngraph ...@@ -400,6 +600,37 @@ namespace ngraph
const ngraph::Shape& padding_below, const ngraph::Shape& padding_below,
const ngraph::Shape& padding_above); const ngraph::Shape& padding_above);
template <typename OP>
mkldnn::pooling_backward::desc
get_max_pooling_backward_desc(const ngraph::Node* node)
{
auto pool = static_cast<const OP*>(node);
auto window_shape = pool->get_window_shape();
auto window_strides = pool->get_window_movement_strides();
auto padding_below = pool->get_padding_below();
auto padding_above = pool->get_padding_above();
auto diff_dst_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto diff_src_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn::pooling_backward::desc(
mkldnn::algorithm::pooling_max,
diff_src_desc,
diff_dst_desc,
mkldnn::memory::dims(window_strides.begin(), window_strides.end()),
mkldnn::memory::dims(window_shape.begin(), window_shape.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero);
}
void build_max_pooling_backward(const mkldnn::pooling_backward::desc& bwd_pool_desc,
const mkldnn::pooling_forward::desc& fwd_pool_desc,
const mkldnn::memory::desc& fprop_src_desc,
size_t fwd_pool_index,
size_t bwd_pool_index);
size_t build_max_pooling_with_indices_backward( size_t build_max_pooling_with_indices_backward(
mkldnn::algorithm pooling_algorithm, mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& diff_dst_desc, const mkldnn::memory::desc& diff_dst_desc,
...@@ -409,9 +640,18 @@ namespace ngraph ...@@ -409,9 +640,18 @@ namespace ngraph
const ngraph::Shape& padding_below, const ngraph::Shape& padding_below,
const ngraph::Shape& padding_above); const ngraph::Shape& padding_above);
void build_max_pooling_with_indices_backward(
const mkldnn::pooling_backward::desc& bwd_pool_desc,
const mkldnn::pooling_forward::desc& fwd_pool_desc,
size_t max_pool_index);
size_t build_reorder(const mkldnn::memory::desc& input_desc, size_t build_reorder(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc); const mkldnn::memory::desc& result_desc);
void build_reorder(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
size_t reorder_index);
size_t build_lrn_forward(const mkldnn::memory::desc& input_desc, size_t build_lrn_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
float alpha, float alpha,
...@@ -419,20 +659,47 @@ namespace ngraph ...@@ -419,20 +659,47 @@ namespace ngraph
float bias, float bias,
int nsize); int nsize);
mkldnn::lrn_forward::desc get_lrn_forward_desc(const ngraph::Node* node);
void build_lrn_forward(const mkldnn::lrn_forward::desc& lrn_desc, size_t lrn_index);
size_t build_relu_forward(const mkldnn::memory::desc& input_desc, size_t build_relu_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc); const mkldnn::memory::desc& result_desc);
mkldnn::eltwise_forward::desc get_relu_forward_desc(const ngraph::Node* node);
void build_relu_forward(const mkldnn::eltwise_forward::desc& relu_desc,
size_t relu_index);
size_t build_relu_backward(const mkldnn::memory::desc& input_desc, size_t build_relu_backward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& delta_desc, const mkldnn::memory::desc& delta_desc,
const mkldnn::memory::desc& result_desc); const mkldnn::memory::desc& result_desc);
mkldnn::eltwise_backward::desc get_relu_backward_desc(const ngraph::Node* node);
void build_relu_backward(const mkldnn::eltwise_backward::desc& bwd_desc,
const mkldnn::eltwise_forward::desc& fwd_desc,
size_t relu_index);
size_t build_sigmoid_forward(const mkldnn::memory::desc& input_desc, size_t build_sigmoid_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc); const mkldnn::memory::desc& result_desc);
mkldnn::eltwise_forward::desc get_sigmoid_forward_desc(const ngraph::Node* node,
bool backward_op);
void build_sigmoid_forward(const mkldnn::eltwise_forward::desc& sigmoid_desc,
size_t sigmoid_index);
size_t build_sigmoid_backward(const mkldnn::memory::desc& input_desc, size_t build_sigmoid_backward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& delta_desc, const mkldnn::memory::desc& delta_desc,
const mkldnn::memory::desc& result_desc); const mkldnn::memory::desc& result_desc);
mkldnn::eltwise_backward::desc get_sigmoid_backward_desc(const ngraph::Node* node);
void build_sigmoid_backward(const mkldnn::eltwise_backward::desc& bwd_desc,
const mkldnn::eltwise_forward::desc& fwd_desc,
size_t sigmoid_index);
size_t build_elementwise_add( size_t build_elementwise_add(
const mkldnn::memory::desc& input0_data_desc, const mkldnn::memory::desc& input0_data_desc,
const mkldnn::memory::desc& input1_data_desc, const mkldnn::memory::desc& input1_data_desc,
...@@ -440,6 +707,11 @@ namespace ngraph ...@@ -440,6 +707,11 @@ namespace ngraph
const std::vector<float>& scale_vector, const std::vector<float>& scale_vector,
const std::vector<mkldnn::memory::primitive_desc>& input_pd); const std::vector<mkldnn::memory::primitive_desc>& input_pd);
mkldnn::sum::primitive_desc get_elementwise_add_desc(const ngraph::Node* node);
void build_elementwise_add(const mkldnn::sum::primitive_desc& sum_pd,
size_t add_index);
size_t build_batchnorm_forward(const mkldnn::memory::desc& input_desc, size_t build_batchnorm_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& weights_desc, const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
...@@ -450,6 +722,41 @@ namespace ngraph ...@@ -450,6 +722,41 @@ namespace ngraph
bool bn_training_flag, bool bn_training_flag,
const mkldnn::post_ops& pops = mkldnn::post_ops()); const mkldnn::post_ops& pops = mkldnn::post_ops());
template <typename OP>
mkldnn::batch_normalization_forward::desc
get_batchnorm_forward_desc(const ngraph::Node* node, bool training_with_3args)
{
const OP* batchnorm = static_cast<const OP*>(node);
auto eps = batchnorm->get_eps_value();
if (training_with_3args)
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
return mkldnn::batch_normalization_forward::desc(
mkldnn::prop_kind::forward_training,
input_desc,
eps,
mkldnn::batch_normalization_flag::use_scale_shift);
}
else
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
return mkldnn::batch_normalization_forward::desc(
mkldnn::prop_kind::forward_training,
input_desc,
eps,
mkldnn::batch_normalization_flag::use_scale_shift |
mkldnn::batch_normalization_flag::use_global_stats);
}
}
void build_batchnorm_forward(
const mkldnn::batch_normalization_forward::desc& batchnorm_desc,
const mkldnn::memory::desc& weights_desc,
bool bn_training_flag,
size_t batchnorm_index,
const mkldnn::post_ops& pops = mkldnn::post_ops());
size_t build_batchnorm_backward(const mkldnn::memory::desc& weights_desc, size_t build_batchnorm_backward(const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& input_desc, const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& mean_desc, const mkldnn::memory::desc& mean_desc,
...@@ -459,6 +766,15 @@ namespace ngraph ...@@ -459,6 +766,15 @@ namespace ngraph
const mkldnn::memory::desc& dweights_desc, const mkldnn::memory::desc& dweights_desc,
const double eps); const double eps);
mkldnn::batch_normalization_backward::desc
get_batchnorm_backward_desc(const ngraph::Node* node);
void build_batchnorm_backward(
const mkldnn::batch_normalization_backward::desc& batchnorm_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& dweights_desc,
size_t batchnorm_index);
template <typename OP> template <typename OP>
size_t build_rnn(const ngraph::Node* node, size_t build_rnn(const ngraph::Node* node,
const std::vector<TensorViewWrapper>& args, const std::vector<TensorViewWrapper>& args,
...@@ -571,27 +887,57 @@ namespace ngraph ...@@ -571,27 +887,57 @@ namespace ngraph
const mkldnn::rnn_direction& rnn_direction, const mkldnn::rnn_direction& rnn_direction,
const mkldnn::algorithm& rnn_algorithm); const mkldnn::algorithm& rnn_algorithm);
void build_rnn_forward(const mkldnn::rnn_forward::desc& desc, size_t rnn_idx);
size_t build_concat(const std::vector<mkldnn::memory::desc>& inputs_data_desc, size_t build_concat(const std::vector<mkldnn::memory::desc>& inputs_data_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
const size_t concat_dim); const size_t concat_dim);
mkldnn::concat::primitive_desc get_concat_desc(const ngraph::Node* node,
size_t nargs);
void build_concat(const mkldnn::concat::primitive_desc& concat_pd,
const std::vector<mkldnn::memory::desc>& inputs_data_desc,
size_t concat_index);
size_t build_slice(const mkldnn::memory::desc& input_desc, size_t build_slice(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
const ngraph::Coordinate& lower_bounds, const ngraph::Coordinate& lower_bounds,
const ngraph::Shape& result_shape); const ngraph::Shape& result_shape);
void build_slice(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Coordinate& lower_bounds,
const ngraph::Shape& result_shape,
size_t slice_index);
size_t build_softmax_forward(const mkldnn::memory::desc& input_desc, size_t build_softmax_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
int softmax_axis); int softmax_axis);
mkldnn::softmax_forward::desc get_softmax_forward_desc(const ngraph::Node* node);
void build_softmax_forward(const mkldnn::softmax_forward::desc& sigmoid_desc,
size_t softmax_index);
size_t build_leaky_relu(const mkldnn::memory::desc& input_desc, size_t build_leaky_relu(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
float alpha); float alpha);
mkldnn::eltwise_forward::desc get_leaky_relu_desc(const ngraph::Node* node);
void build_leaky_relu(const mkldnn::eltwise_forward::desc& leaky_relu_desc,
size_t leaky_relu_index);
size_t build_bounded_relu(const mkldnn::memory::desc& input_desc, size_t build_bounded_relu(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
float alpha); float alpha);
mkldnn::eltwise_forward::desc get_bounded_relu_desc(const ngraph::Node* node);
void build_bounded_relu(const mkldnn::eltwise_forward::desc& bounded_relu_desc,
size_t bounded_relu_index);
size_t build_quantized_max_pool(const ngraph::Node* node); size_t build_quantized_max_pool(const ngraph::Node* node);
size_t build_quantized_avg_pool(const ngraph::Node* node); size_t build_quantized_avg_pool(const ngraph::Node* node);
...@@ -603,23 +949,33 @@ namespace ngraph ...@@ -603,23 +949,33 @@ namespace ngraph
size_t build_quantize_reorder(const mkldnn::memory::desc& input_desc, size_t build_quantize_reorder(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
const std::vector<float>& scales); const std::vector<float>& scales);
void build_quantize_reorder(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
const std::vector<float>& scales,
size_t quantize_index,
const int mask = 0);
template <typename OP> template <typename OP>
size_t get_scale_index() size_t get_scale_index()
{ {
size_t index = 0;
if (std::is_same<OP, ngraph::op::QuantizedConvolution>() || if (std::is_same<OP, ngraph::op::QuantizedConvolution>() ||
std::is_same<OP, ngraph::op::QuantizedConvolutionRelu>()) std::is_same<OP, ngraph::op::QuantizedConvolutionRelu>())
{ {
return 2; index = 2;
} }
if (std::is_same<OP, ngraph::op::QuantizedConvolutionBias>()) else if (std::is_same<OP, ngraph::op::QuantizedConvolutionBias>())
{ {
return 3; index = 3;
} }
if (std::is_same<OP, ngraph::op::QuantizedConvolutionBiasAdd>() || else if (std::is_same<OP, ngraph::op::QuantizedConvolutionBiasAdd>() ||
std::is_same<OP, ngraph::op::QuantizedConvolutionBiasSignedAdd>()) std::is_same<OP, ngraph::op::QuantizedConvolutionBiasSignedAdd>())
{ {
return 4; index = 4;
} }
NGRAPH_ASSERT(index != 0);
return index;
} }
template <typename OP, typename T> template <typename OP, typename T>
...@@ -640,7 +996,8 @@ namespace ngraph ...@@ -640,7 +996,8 @@ namespace ngraph
template <typename OP, template <typename OP,
typename std::enable_if< typename std::enable_if<
(std::is_same<OP, ngraph::op::Convolution>::value || (std::is_same<OP, ngraph::op::Convolution>::value ||
std::is_same<OP, ngraph::op::QuantizedConvolution>::value), std::is_same<OP, ngraph::op::QuantizedConvolution>::value ||
std::is_same<OP, ngraph::op::GroupConvolution>::value),
std::nullptr_t>::type = nullptr> std::nullptr_t>::type = nullptr>
bool has_relu(const ngraph::Node* node) bool has_relu(const ngraph::Node* node)
{ {
...@@ -650,7 +1007,8 @@ namespace ngraph ...@@ -650,7 +1007,8 @@ namespace ngraph
template <typename OP, template <typename OP,
typename std::enable_if< typename std::enable_if<
(!std::is_same<OP, ngraph::op::Convolution>::value && (!std::is_same<OP, ngraph::op::Convolution>::value &&
!std::is_same<OP, ngraph::op::QuantizedConvolution>::value), !std::is_same<OP, ngraph::op::QuantizedConvolution>::value &&
!std::is_same<OP, ngraph::op::GroupConvolution>::value),
std::nullptr_t>::type = nullptr> std::nullptr_t>::type = nullptr>
bool has_relu(const ngraph::Node* node) bool has_relu(const ngraph::Node* node)
{ {
...@@ -662,9 +1020,11 @@ namespace ngraph ...@@ -662,9 +1020,11 @@ namespace ngraph
{ {
if (std::is_same<OP, ngraph::op::ConvolutionBias>() || if (std::is_same<OP, ngraph::op::ConvolutionBias>() ||
std::is_same<OP, ngraph::op::ConvolutionBiasAdd>() || std::is_same<OP, ngraph::op::ConvolutionBiasAdd>() ||
std::is_same<OP, ngraph::op::ConvolutionBiasBackpropFiltersBias>() ||
std::is_same<OP, ngraph::op::QuantizedConvolutionBias>() || std::is_same<OP, ngraph::op::QuantizedConvolutionBias>() ||
std::is_same<OP, ngraph::op::QuantizedConvolutionBiasAdd>() || std::is_same<OP, ngraph::op::QuantizedConvolutionBiasAdd>() ||
std::is_same<OP, ngraph::op::QuantizedConvolutionBiasSignedAdd>()) std::is_same<OP, ngraph::op::QuantizedConvolutionBiasSignedAdd>() ||
std::is_same<OP, ngraph::op::GroupConvolutionBias>())
{ {
return true; return true;
} }
...@@ -691,11 +1051,114 @@ namespace ngraph ...@@ -691,11 +1051,114 @@ namespace ngraph
} }
} }
template <typename OP>
mkldnn::rnn_forward::desc
get_rnn_forward_desc(const ngraph::Node* node,
const std::vector<TensorViewWrapper>& args,
const std::vector<TensorViewWrapper>& out)
{
auto rnn_node = static_cast<const OP*>(node);
auto src_sequence_length_max =
static_cast<unsigned long>(rnn_node->get_src_sequence_length());
auto direction = static_cast<unsigned long>(rnn_node->get_direction());
auto num_fused_layers =
static_cast<unsigned long>(rnn_node->get_num_fused_layers());
auto feature_size =
static_cast<unsigned long>(rnn_node->get_src_iter_feature_size());
auto batch = static_cast<unsigned long>(rnn_node->get_batch_size());
auto rnn_cell_n_gates =
static_cast<unsigned long>(rnn_node->get_gates_per_cell());
auto rnn_cell_n_states =
static_cast<unsigned long>(rnn_node->get_num_cell_states());
auto get_mkldnn_rnn_cell_type = [&]() {
switch (rnn_node->get_rnn_type())
{
case rnn_utils::rnntype::vanilla_rnn: return mkldnn::algorithm::vanilla_rnn;
case rnn_utils::rnntype::vanilla_gru: return mkldnn::algorithm::vanilla_gru;
case rnn_utils::rnntype::vanilla_lstm:
return mkldnn::algorithm::vanilla_lstm;
default: throw ngraph_error("unsupported mkldnn rnn algorithm");
}
};
auto get_mkldnn_rnn_direction = [&]() {
switch (direction)
{
case 1: return mkldnn::rnn_direction::unidirectional_left2right;
case 2: return mkldnn::rnn_direction::bidirectional_concat;
default: throw ngraph_error("unsupported mkldnn rnn direction");
}
};
if (out[0].get_shape().size() == 2 &&
(out[0].get_shape()[1] != direction * feature_size))
{
throw ngraph_error(
"input slc{ht} feature size is not equal to output dlc{ht} feature "
"size ");
}
if (out[1].get_shape().size() == 2 && (out[1].get_shape()[1] != feature_size) &&
rnn_node->get_num_timesteps() != 1)
{
throw ngraph_error(
"input sic{ht_1|ct_1} feature size is not equal to output "
"dlc{ht_1|ct_1} "
"feature size ");
}
Shape src_layer_tz{
src_sequence_length_max,
batch,
static_cast<unsigned long>(rnn_node->get_src_layer_feature_size())};
Shape src_iter_tz{
num_fused_layers, direction, rnn_cell_n_states, batch, feature_size};
Shape wei_layer_tz{
num_fused_layers,
direction,
static_cast<unsigned long>(rnn_node->get_src_layer_feature_size()),
rnn_cell_n_gates,
feature_size};
Shape wei_iter_tz{
num_fused_layers, direction, feature_size, rnn_cell_n_gates, feature_size};
Shape bias_tz{num_fused_layers, direction, rnn_cell_n_gates, feature_size};
Shape dst_layer_tz{src_sequence_length_max, batch, direction * feature_size};
Shape dst_iter_tz{
num_fused_layers, direction, rnn_cell_n_states, batch, feature_size};
// We create the memory descriptors used by the user
auto src_layer_desc = build_memory_descriptor(
src_layer_tz, args[0].get_element_type(), mkldnn::memory::format::tnc);
auto src_iter_desc = build_memory_descriptor(
src_iter_tz, args[1].get_element_type(), mkldnn::memory::format::ldsnc);
auto weights_layer_desc = build_memory_descriptor(
wei_layer_tz, args[2].get_element_type(), mkldnn::memory::format::ldigo);
auto weights_iter_desc = build_memory_descriptor(
wei_iter_tz, args[3].get_element_type(), mkldnn::memory::format::ldigo);
auto bias_desc = build_memory_descriptor(
bias_tz, args[4].get_element_type(), mkldnn::memory::format::ldgo);
auto dst_layer_desc = build_memory_descriptor(
dst_layer_tz, out[0].get_element_type(), mkldnn::memory::format::tnc);
auto dst_iter_desc = build_memory_descriptor(
dst_iter_tz, out[1].get_element_type(), mkldnn::memory::format::ldsnc);
mkldnn::rnn_cell::desc rnn_cell_desc(get_mkldnn_rnn_cell_type());
return mkldnn::rnn_forward::desc(mkldnn::prop_kind::forward_training,
rnn_cell_desc,
get_mkldnn_rnn_direction(),
src_layer_desc,
src_iter_desc,
weights_layer_desc,
weights_iter_desc,
bias_desc,
dst_layer_desc,
dst_iter_desc);
}
template <typename OP> template <typename OP>
mkldnn::convolution_forward::desc mkldnn::convolution_forward::desc
get_convolution_forward_desc(const ngraph::Node* node, get_convolution_forward_desc(const ngraph::Node* node)
const std::vector<TensorViewWrapper>& args,
const std::vector<TensorViewWrapper>& out)
{ {
auto convolution = static_cast<const OP*>(node); auto convolution = static_cast<const OP*>(node);
// For dilation, MKLDNN wants to know how many elements to insert between, not how far // For dilation, MKLDNN wants to know how many elements to insert between, not how far
...@@ -789,10 +1252,10 @@ namespace ngraph ...@@ -789,10 +1252,10 @@ namespace ngraph
size_t convolution_forward_init(bool with_bias = false); size_t convolution_forward_init(bool with_bias = false);
template <bool with_bias> template <bool with_bias>
void convolution_forward(const mkldnn::convolution_forward::desc& desc, void build_convolution_forward(const mkldnn::convolution_forward::desc& desc,
const mkldnn::primitive_attr& attr, const mkldnn::primitive_attr& attr,
const mkldnn::engine& engine, const mkldnn::engine& engine,
size_t& conv_idx) size_t conv_idx)
{ {
size_t input_idx, weights_idx, results_idx, bias_idx; size_t input_idx, weights_idx, results_idx, bias_idx;
input_idx = m_primitive_deps[conv_idx][0]; input_idx = m_primitive_deps[conv_idx][0];
...@@ -835,6 +1298,174 @@ namespace ngraph ...@@ -835,6 +1298,174 @@ namespace ngraph
m_mkldnn_primitives[conv_idx] = prim; m_mkldnn_primitives[conv_idx] = prim;
} }
template <typename OP>
mkldnn::convolution_backward_data::desc
get_convolution_backward_data_desc(const ngraph::Node* node)
{
auto convolution = static_cast<const OP*>(node);
// For dilation, MKLDNN wants to know how many elements to insert between, not how far
// apart to space the elements like nGraph. So we have to subtract 1 from each pos.
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides_forward())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
// MKLDNN relies on named formats for kernel selection
if (weights_desc.data.format == mkldnn_nchw)
{
weights_desc.data.format = mkldnn_oihw;
}
if (weights_desc.data.format == mkldnn_ncdhw)
{
weights_desc.data.format = mkldnn_oidhw;
}
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn::convolution_backward_data::desc(
mkldnn::algorithm::convolution_direct,
result_desc,
weights_desc,
delta_desc,
MKLDNN_DIMS(convolution->get_window_movement_strides_forward()),
MKLDNN_DIMS(window_dilation_strides_adjusted),
MKLDNN_DIMS(convolution->get_padding_below_forward()),
MKLDNN_DIMS(convolution->get_padding_above_forward()),
mkldnn::padding_kind::zero);
}
template <typename OP>
mkldnn::convolution_backward_weights::desc
get_convolution_backward_weights_desc(const ngraph::Node* node)
{
auto convolution = static_cast<const OP*>(node);
// For dilation, MKLDNN wants to know how many elements to insert between, not how far
// apart to space the elements like nGraph. So we have to subtract 1 from each pos.
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides_forward())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto in_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto in_delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto out_weights_delta_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
if (has_bias<OP>())
{
auto out_bias_delta_desc = mkldnn_utils::get_output_mkldnn_md(node, 1);
return mkldnn::convolution_backward_weights::desc(
mkldnn::algorithm::convolution_direct,
in_data_desc,
out_weights_delta_desc,
out_bias_delta_desc,
in_delta_desc,
MKLDNN_DIMS(convolution->get_window_movement_strides_forward()),
MKLDNN_DIMS(window_dilation_strides_adjusted),
MKLDNN_DIMS(convolution->get_padding_below_forward()),
MKLDNN_DIMS(convolution->get_padding_above_forward()),
mkldnn::padding_kind::zero);
}
else
{
return mkldnn::convolution_backward_weights::desc(
mkldnn::algorithm::convolution_direct,
in_data_desc,
out_weights_delta_desc,
in_delta_desc,
MKLDNN_DIMS(convolution->get_window_movement_strides_forward()),
MKLDNN_DIMS(window_dilation_strides_adjusted),
MKLDNN_DIMS(convolution->get_padding_below_forward()),
MKLDNN_DIMS(convolution->get_padding_above_forward()),
mkldnn::padding_kind::zero);
}
}
template <typename OP>
mkldnn::convolution_forward::desc
get_convolution_forward_desc_for_backward_op(const ngraph::Node* node)
{
auto convolution = static_cast<const OP*>(node);
// For dilation, MKLDNN wants to know how many elements to insert between, not how far
// apart to space the elements like nGraph. So we have to subtract 1 from each pos.
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides_forward())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
if (std::is_same<OP, ngraph::op::ConvolutionBackpropData>())
{
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
// MKLDNN relies on named formats for kernel selection
if (weights_desc.data.format == mkldnn_nchw)
{
weights_desc.data.format = mkldnn_oihw;
}
if (weights_desc.data.format == mkldnn_ncdhw)
{
weights_desc.data.format = mkldnn_oidhw;
}
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn::convolution_forward::desc(
mkldnn::prop_kind::forward,
mkldnn::algorithm::convolution_direct,
result_desc,
weights_desc,
delta_desc,
MKLDNN_DIMS(convolution->get_window_movement_strides_forward()),
MKLDNN_DIMS(window_dilation_strides_adjusted),
MKLDNN_DIMS(convolution->get_padding_below_forward()),
MKLDNN_DIMS(convolution->get_padding_above_forward()),
mkldnn::padding_kind::zero);
}
else if (std::is_same<OP, ngraph::op::ConvolutionBackpropFilters>())
{
auto in_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto in_delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto out_weights_delta_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn::convolution_forward::desc(
mkldnn::prop_kind::forward,
mkldnn::algorithm::convolution_direct,
in_data_desc,
out_weights_delta_desc,
in_delta_desc,
MKLDNN_DIMS(convolution->get_window_movement_strides_forward()),
MKLDNN_DIMS(window_dilation_strides_adjusted),
MKLDNN_DIMS(convolution->get_padding_below_forward()),
MKLDNN_DIMS(convolution->get_padding_above_forward()),
mkldnn::padding_kind::zero);
}
else
{
auto in_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto in_delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto out_weights_delta_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto out_bias_delta_desc = mkldnn_utils::get_output_mkldnn_md(node, 1);
return mkldnn::convolution_forward::desc(
mkldnn::prop_kind::forward,
mkldnn::algorithm::convolution_direct,
in_data_desc,
out_weights_delta_desc,
out_bias_delta_desc,
in_delta_desc,
MKLDNN_DIMS(convolution->get_window_movement_strides_forward()),
MKLDNN_DIMS(window_dilation_strides_adjusted),
MKLDNN_DIMS(convolution->get_padding_below_forward()),
MKLDNN_DIMS(convolution->get_padding_above_forward()),
mkldnn::padding_kind::zero);
}
}
private: private:
std::vector<mkldnn::primitive*> m_mkldnn_primitives; std::vector<mkldnn::primitive*> m_mkldnn_primitives;
std::vector<mkldnn::stream> m_mkldnn_streams; std::vector<mkldnn::stream> m_mkldnn_streams;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment