Commit eda11da7 authored by Amy Zhuang's avatar Amy Zhuang Committed by Adam Procter

Refactor to create MKLDNN primitives on the first iteration: (#2363)

* Refactor to create MKLDNN primitives on the first iteration:
  add, avg_pool, batch_norm, bounded_relu, concat, convert_layout,
  leaky_relu, lrn, max_pool, quantized_avg_pool, quantized_max_pool,
  relu, sigmoid, slice, softmax.

* Refactor to create MKLDNN primitives on the first iteration:
  pooling backward, convolution.

* Refactor to create MKLDNN primitives on the first iteration:
  convolution backward, rnn, lstm, quantization, dequantization.

* Delete one duplicate declaration.

* Create and pass mkldnn descriptors/primitive-descriptors for ops.

* Create and pass mkldnn descriptors for convolution backward ops.

* Remove one unused variable.

* Remove unused variables.

* Remove unused variables.

* Address PR feedback.

* Fix a bug.

* Add one parameter to build_quantize_reorder.

* Address PR feedback.

* Fix bi-rnn issue.
parent c571b7a7
......@@ -38,28 +38,22 @@ namespace ngraph
{
auto& functors = external_function->get_functors();
vector<float> scale_vector(2, 1);
vector<mkldnn::memory::primitive_desc> inputs_pd;
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input0_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto input1_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
inputs_pd.push_back(mkldnn::memory::primitive_desc(
input0_data_desc, runtime::cpu::executor::global_cpu_engine));
inputs_pd.push_back(mkldnn::memory::primitive_desc(
input1_data_desc, runtime::cpu::executor::global_cpu_engine));
size_t add_index = mkldnn_emitter->build_elementwise_add(
input0_data_desc, input1_data_desc, result_desc, scale_vector, inputs_pd);
auto sum_pd = mkldnn_emitter->get_elementwise_add_desc(node);
// Add needs 4 primitives: input0, input1, result, and sum.
size_t add_index = mkldnn_emitter->reserve_primitive_space(4);
auto& deps = mkldnn_emitter->get_primitive_deps(add_index);
auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto functor = [&, add_index](CPURuntimeContext* ctx,
auto functor = [&, sum_pd, add_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_elementwise_add(sum_pd, add_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
......
......@@ -52,24 +52,19 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t avg_pool_index = mkldnn_emitter->build_pooling_forward(
(include_padding_in_avg_computation
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding),
input_desc,
result_desc,
window_movement_strides,
window_shape,
padding_below,
padding_above);
auto avg_pool_desc =
mkldnn_emitter->get_avg_pooling_forward_desc<ngraph::op::AvgPool>(node,
false);
// AvgPool needs 3 primitives: input, result, and pooling_forward.
size_t avg_pool_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(avg_pool_index);
auto functor = [&, avg_pool_index](CPURuntimeContext* ctx,
auto functor = [&, avg_pool_desc, avg_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_pooling_forward(avg_pool_desc, avg_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, avg_pool_index);
......@@ -130,23 +125,23 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto diff_dst_desc = runtime::cpu::mkldnn_utils::get_input_mkldnn_md(node, 0);
auto diff_src_desc = runtime::cpu::mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t avg_pool_index = mkldnn_emitter->build_pooling_backward(
(apb->get_include_padding_in_avg_computation()
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding),
diff_dst_desc,
diff_src_desc,
apb->get_window_movement_strides(),
apb->get_window_shape(),
apb->get_padding_below(),
apb->get_padding_above());
auto avg_pool_fwd_desc =
mkldnn_emitter->get_avg_pooling_forward_desc<ngraph::op::AvgPoolBackprop>(
node, true);
auto avg_pool_desc =
mkldnn_emitter->get_avg_pooling_backward_desc<ngraph::op::AvgPoolBackprop>(
node);
// AvgPoolBackprop needs 3 primitives: input, result, and pooling_backward.
size_t avg_pool_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(avg_pool_index);
auto functor = [&, avg_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
auto functor = [&, avg_pool_desc, avg_pool_fwd_desc, avg_pool_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_pooling_backward(
avg_pool_desc, avg_pool_fwd_desc, avg_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], delta_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, avg_pool_index);
......
......@@ -48,8 +48,6 @@ namespace ngraph
auto& arg2_tensor = external_function->get_tensor_data(args[2].get_name());
auto& out0_tensor = external_function->get_tensor_data(out[0].get_name());
const OP* batchnorm = static_cast<const OP*>(node);
// Kill clang diagnostics bug
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wmissing-braces"
......@@ -80,28 +78,32 @@ namespace ngraph
auto& out2_tensor = external_function->get_tensor_data(out[2].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto batchnorm_desc =
mkldnn_emitter->get_batchnorm_forward_desc<OP>(node, true);
auto weights_shape = Shape{2, args[0].get_size()};
auto weights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto results_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto mean_desc = mkldnn_utils::get_output_mkldnn_md(node, 1);
auto variance_desc = mkldnn_utils::get_output_mkldnn_md(node, 2);
auto batchnorm_index =
mkldnn_emitter->build_batchnorm_forward(input_desc,
// batchnorm forward needs 6 primitives: input, weights, result, mean,
// variance, and batch_normalization_forward.
auto batchnorm_index = mkldnn_emitter->reserve_primitive_space(6);
auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index);
auto functor = [&,
batchnorm_desc,
weights_desc,
results_desc,
mean_desc,
variance_desc,
batchnorm->get_eps_value(),
false,
training,
ops);
auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index);
auto functor = [&, batchnorm_index, stacked_weights, weight_sizes](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
ops,
batchnorm_index,
stacked_weights,
weight_sizes](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_batchnorm_forward(
batchnorm_desc, weights_desc, training, batchnorm_index, ops);
}
memcpy(stacked_weights.get(), arg0_tensor, weight_sizes[0]);
memcpy(
stacked_weights.get() + weight_sizes[0], arg1_tensor, weight_sizes[1]);
......@@ -122,29 +124,32 @@ namespace ngraph
auto& arg4_tensor = external_function->get_tensor_data(args[4].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto batchnorm_desc =
mkldnn_emitter->get_batchnorm_forward_desc<OP>(node, false);
auto weights_shape = Shape{2, args[0].get_size()};
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto weights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto mean_desc = mkldnn_utils::get_input_mkldnn_md(node, 3);
auto variance_desc = mkldnn_utils::get_input_mkldnn_md(node, 4);
auto results_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto batchnorm_index =
mkldnn_emitter->build_batchnorm_forward(input_desc,
weights_desc,
results_desc,
mean_desc,
variance_desc,
batchnorm->get_eps_value(),
true,
training,
ops);
// batchnorm forward needs 6 primitives: input, weights, result, mean,
// variance, and batch_normalization_forward.
auto batchnorm_index = mkldnn_emitter->reserve_primitive_space(6);
auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index);
auto functor = [&, batchnorm_index, stacked_weights, weight_sizes](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
auto functor = [&,
batchnorm_desc,
weights_desc,
training,
ops,
batchnorm_index,
stacked_weights,
weight_sizes](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_batchnorm_forward(
batchnorm_desc, weights_desc, training, batchnorm_index, ops);
}
memcpy(stacked_weights.get(), arg0_tensor, weight_sizes[0]);
memcpy(
stacked_weights.get() + weight_sizes[0], arg1_tensor, weight_sizes[1]);
......@@ -295,9 +300,6 @@ namespace ngraph
template <>
void Builder::BUILDER_DECL(ngraph::op::BatchNormTrainingBackprop)
{
const ngraph::op::BatchNormTrainingBackprop* batchnorm =
static_cast<const ngraph::op::BatchNormTrainingBackprop*>(node);
auto& functors = external_function->get_functors();
auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
......@@ -326,34 +328,31 @@ namespace ngraph
std::default_delete<uint8_t[]>());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto batchnorm_desc = mkldnn_emitter->get_batchnorm_backward_desc(node);
auto weights_shape = Shape{2, args[0].get_size()};
auto weights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto mean_desc = mkldnn_utils::get_input_mkldnn_md(node, 3);
auto variance_desc = mkldnn_utils::get_input_mkldnn_md(node, 4);
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 5);
auto dinput_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto dweights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto batchnorm_index =
mkldnn_emitter->build_batchnorm_backward(weights_desc,
input_desc,
mean_desc,
variance_desc,
delta_desc,
dinput_desc,
dweights_desc,
batchnorm->get_eps_value());
// batchnorm backward needs 8 primitives: weights, input, mean, variance,
// dinput, dweights, and batch_normalization_backward.
auto batchnorm_index = mkldnn_emitter->reserve_primitive_space(8);
auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index);
auto functor = [&,
batchnorm_desc,
weights_desc,
dweights_desc,
batchnorm_index,
stacked_weights,
stacked_dweights,
weight_sizes](CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_batchnorm_backward(
batchnorm_desc, weights_desc, dweights_desc, batchnorm_index);
}
memcpy(stacked_weights.get(), arg0_tensor, weight_sizes[0]);
memcpy(stacked_weights.get() + weight_sizes[0], arg1_tensor, weight_sizes[1]);
......
......@@ -43,13 +43,18 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto bounded_relu_index =
mkldnn_emitter->build_bounded_relu(input_desc, result_desc, alpha);
auto bounded_relu_desc = mkldnn_emitter->get_bounded_relu_desc(node);
// BoundedRelu needs 3 primitives: input, result, and eltwise_forward.
auto bounded_relu_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(bounded_relu_index);
auto functor = [&, bounded_relu_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
auto functor = [&, bounded_relu_desc, bounded_relu_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_bounded_relu(bounded_relu_desc,
bounded_relu_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], input_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, bounded_relu_index);
......
......@@ -92,22 +92,24 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto concat_pd = mkldnn_emitter->get_concat_desc(node, nargs);
std::vector<mkldnn::memory::desc> inputs_data_desc;
for (size_t i = 0; i < args.size(); i++)
for (size_t i = 0; i < nargs; i++)
{
inputs_data_desc.push_back(mkldnn_utils::get_input_mkldnn_md(node, i));
}
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t concat_dim =
(dynamic_cast<const ngraph::op::Concat*>(node))->get_concatenation_axis();
auto concat_index =
mkldnn_emitter->build_concat(inputs_data_desc, result_desc, concat_dim);
// Concat needs number of inputs plus 2 primitives; those two are for result and concat.
auto concat_index = mkldnn_emitter->reserve_primitive_space(nargs + 2);
auto& deps = mkldnn_emitter->get_primitive_deps(concat_index);
auto functor = [&, arg_tensors, nargs, concat_index](
auto functor =
[&, concat_pd, inputs_data_desc, arg_tensors, nargs, concat_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_concat(
concat_pd, inputs_data_desc, concat_index);
}
for (size_t i = 0; i < nargs; i++)
{
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[i], arg_tensors[i]);
......
......@@ -81,11 +81,15 @@ namespace ngraph
mkldnn::memory::format::goihw);
}
size_t reorder_index = mkldnn_emitter->build_reorder(input_desc, result_desc);
// ConvertLayout needs 3 primitives: input, result, and reorder.
size_t reorder_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(reorder_index);
auto functor = [&, reorder_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
auto functor = [&, input_desc, result_desc, reorder_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_reorder(input_desc, result_desc, reorder_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, reorder_index);
......
......@@ -52,12 +52,20 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index =
mkldnn_emitter->build_convolution<ngraph::op::Convolution>(node, args, out);
auto conv_desc =
mkldnn_emitter->get_convolution_forward_desc<ngraph::op::Convolution>(node);
auto conv_attr =
mkldnn_emitter->get_convolution_forward_attr<ngraph::op::Convolution>(node);
size_t conv_index = mkldnn_emitter->convolution_forward_init();
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
auto functor = [&, conv_desc, conv_attr, conv_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_convolution_forward<false>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
......@@ -124,13 +132,22 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index =
mkldnn_emitter->build_convolution<ngraph::op::ConvolutionRelu>(
node, args, out);
auto conv_desc =
mkldnn_emitter->get_convolution_forward_desc<ngraph::op::ConvolutionRelu>(
node);
auto conv_attr =
mkldnn_emitter->get_convolution_forward_attr<ngraph::op::ConvolutionRelu>(
node);
size_t conv_index = mkldnn_emitter->convolution_forward_init();
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
auto functor = [&, conv_desc, conv_attr, conv_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_convolution_forward<false>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
......@@ -157,13 +174,22 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index =
mkldnn_emitter->build_convolution<ngraph::op::ConvolutionBias>(
node, args, out);
auto conv_desc =
mkldnn_emitter->get_convolution_forward_desc<ngraph::op::ConvolutionBias>(
node);
auto conv_attr =
mkldnn_emitter->get_convolution_forward_attr<ngraph::op::ConvolutionBias>(
node);
size_t conv_index = mkldnn_emitter->convolution_forward_init(true);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
auto functor = [&, conv_desc, conv_attr, conv_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_convolution_forward<true>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], arg2_tensor);
......@@ -193,13 +219,22 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index =
mkldnn_emitter->build_convolution<ngraph::op::ConvolutionBiasAdd>(
node, args, out);
auto conv_desc =
mkldnn_emitter
->get_convolution_forward_desc<ngraph::op::ConvolutionBiasAdd>(node);
auto conv_attr =
mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::ConvolutionBiasAdd>(node);
size_t conv_index = mkldnn_emitter->convolution_forward_init(true);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index, arg3_size](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
auto functor = [&, conv_desc, conv_attr, conv_index, arg3_size](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_convolution_forward<true>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
if (out_tensor != arg3_tensor)
{
memcpy(static_cast<char*>(out_tensor),
......@@ -234,12 +269,22 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = mkldnn_emitter->build_convolution<ngraph::op::ConvolutionAdd>(
node, args, out);
auto conv_desc =
mkldnn_emitter->get_convolution_forward_desc<ngraph::op::ConvolutionAdd>(
node);
auto conv_attr =
mkldnn_emitter->get_convolution_forward_attr<ngraph::op::ConvolutionAdd>(
node);
size_t conv_index = mkldnn_emitter->convolution_forward_init(false);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index, arg2_size](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
auto functor = [&, conv_desc, conv_attr, conv_index, arg2_size](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_convolution_forward<false>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
if (out_tensor != arg2_tensor)
{
memcpy(static_cast<char*>(out_tensor),
......@@ -277,14 +322,22 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index =
mkldnn_emitter
->build_convolution_backward<ngraph::op::ConvolutionBackpropData>(
node, args, out);
auto bwd_desc = mkldnn_emitter->get_convolution_backward_data_desc<
ngraph::op::ConvolutionBackpropData>(node);
auto fwd_desc = mkldnn_emitter->get_convolution_forward_desc_for_backward_op<
ngraph::op::ConvolutionBackpropData>(node);
// ConvolutionBackpropData needs 4 primitives: weights, delta, result,
// and convolution_backward.
auto conv_index = mkldnn_emitter->reserve_primitive_space(4);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx,
auto functor = [&, bwd_desc, fwd_desc, conv_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_convolution_backward_data(
bwd_desc, fwd_desc, conv_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
......@@ -359,14 +412,22 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index =
mkldnn_emitter
->build_convolution_backward<ngraph::op::ConvolutionBackpropFilters>(
node, args, out);
auto bwd_desc = mkldnn_emitter->get_convolution_backward_weights_desc<
ngraph::op::ConvolutionBackpropFilters>(node);
auto fwd_desc = mkldnn_emitter->get_convolution_forward_desc_for_backward_op<
ngraph::op::ConvolutionBackpropFilters>(node);
// ConvolutionBackpropFilter needs 4 primitives: input, delta, weights_delta,
// and convolution_backward_weights.
auto conv_index = mkldnn_emitter->reserve_primitive_space(4);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx,
auto functor = [&, bwd_desc, fwd_desc, conv_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_convolution_backward_weights(
bwd_desc, fwd_desc, conv_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
......@@ -436,12 +497,22 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index = mkldnn_emitter->build_convolution_backward<
ngraph::op::ConvolutionBiasBackpropFiltersBias>(node, args, out);
auto bwd_desc = mkldnn_emitter->get_convolution_backward_weights_desc<
ngraph::op::ConvolutionBiasBackpropFiltersBias>(node);
auto fwd_desc = mkldnn_emitter->get_convolution_forward_desc_for_backward_op<
ngraph::op::ConvolutionBiasBackpropFiltersBias>(node);
// ConvolutionBiasBackpropFilter needs 5 primitives: input, delta, weights_delta,
// bias_delta, and convolution_backward_weights.
auto conv_index = mkldnn_emitter->reserve_primitive_space(5);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx,
auto functor = [&, bwd_desc, fwd_desc, conv_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_convolution_backward_weights_bias(
bwd_desc, fwd_desc, conv_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out0_tensor);
......@@ -466,43 +537,25 @@ namespace ngraph
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto convolution = static_cast<const ngraph::op::GroupConvolution*>(node);
auto arg0_shape = args[0].get_shape();
auto arg1_shape = args[1].get_shape();
auto result_shape = out[0].get_shape();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto padding_below = convolution->get_padding_below();
auto padding_above = convolution->get_padding_above();
auto filter_strides = convolution->get_window_movement_strides();
size_t conv_index =
mkldnn_emitter->build_convolution_forward(input_data_desc,
weights_desc,
result_desc,
filter_strides,
window_dilation_strides_adjusted,
padding_below,
padding_above);
auto conv_desc =
mkldnn_emitter->get_convolution_forward_desc<ngraph::op::GroupConvolution>(
node);
auto conv_attr =
mkldnn_emitter->get_convolution_forward_attr<ngraph::op::GroupConvolution>(
node);
size_t conv_index = mkldnn_emitter->convolution_forward_init();
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
auto functor = [&, conv_desc, conv_attr, conv_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_convolution_forward<false>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
// group convolution
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
......@@ -528,58 +581,25 @@ namespace ngraph
auto& arg2_tensor = external_function->get_tensor_data(args[2].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto convolution = static_cast<const ngraph::op::GroupConvolutionBias*>(node);
auto arg0_shape = args[0].get_shape();
auto arg1_shape = args[1].get_shape();
auto arg2_shape = args[2].get_shape();
auto result_shape = out[0].get_shape();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_desc =
mkldnn_emitter
->get_convolution_forward_desc<ngraph::op::GroupConvolutionBias>(node);
auto conv_attr =
mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::GroupConvolutionBias>(node);
size_t conv_index = mkldnn_emitter->convolution_forward_init(true);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto padding_below = convolution->get_padding_below();
auto padding_above = convolution->get_padding_above();
auto filter_strides = convolution->get_window_movement_strides();
const float ops_scale = 1.f;
const float ops_alpha = -0.f; // relu negative slope
const float ops_beta = 0.f;
mkldnn::post_ops ops;
if (convolution->with_relu())
auto functor = [&, conv_desc, conv_attr, conv_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
ops.append_eltwise(
ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
mkldnn_emitter->build_convolution_forward<true>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
size_t conv_index =
mkldnn_emitter->build_convolution_forward(input_data_desc,
weights_desc,
bias_desc,
result_desc,
filter_strides,
window_dilation_strides_adjusted,
padding_below,
padding_above,
ops);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], arg2_tensor);
......
......@@ -43,13 +43,17 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto leaky_relu_index =
mkldnn_emitter->build_leaky_relu(input_desc, result_desc, alpha);
auto leaky_relu_desc = mkldnn_emitter->get_leaky_relu_desc(node);
// LeakyRelu needs 3 primitives: input, result, and eltwise_forward.
auto leaky_relu_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(leaky_relu_index);
auto functor = [&, leaky_relu_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
auto functor = [&, leaky_relu_desc, leaky_relu_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_leaky_relu(leaky_relu_desc, leaky_relu_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], input_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, leaky_relu_index);
......
......@@ -43,19 +43,17 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto lrn_index =
mkldnn_emitter->build_lrn_forward(input_data_desc,
result_desc,
static_cast<float>(lrn->get_alpha()),
static_cast<float>(lrn->get_beta()),
static_cast<float>(lrn->get_bias()),
static_cast<int>(lrn->get_nsize()));
auto lrn_desc = mkldnn_emitter->get_lrn_forward_desc(node);
// LRN needs 3 primitives: input, result, and lrn_forward.
auto lrn_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(lrn_index);
functor = [&, lrn_index](CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
functor = [&, lrn_desc, lrn_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_lrn_forward(lrn_desc, lrn_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, lrn_index);
......
......@@ -54,10 +54,22 @@ namespace ngraph
auto& dst_iter_tensor = external_function->get_tensor_data(out[1].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto lstm_index = mkldnn_emitter->build_rnn<ngraph::op::Lstm>(node, args, out);
auto lstm_desc =
mkldnn_emitter->get_rnn_forward_desc<ngraph::op::Lstm>(node, args, out);
// Lstm needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias,
// dst_layer, dst_iter, and rnn_forward.
// It needs a new workspace.
auto lstm_index =
mkldnn_emitter->reserve_primitive_space(9, true /* new workspace */);
auto& deps = mkldnn_emitter->get_primitive_deps(lstm_index);
auto functor = [&, lstm_index](CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
auto functor = [&, lstm_desc, lstm_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_rnn_forward(lstm_desc, lstm_index);
ctx->mkldnn_workspaces = mkldnn_emitter->get_mkldnn_workspaces().data();
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], src_layer_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], src_iter_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], weights_layer_tensor);
......
......@@ -51,22 +51,19 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t max_pool_index =
mkldnn_emitter->build_pooling_forward(mkldnn::algorithm::pooling_max,
input_desc,
result_desc,
window_movement_strides,
window_shape,
padding_below,
padding_above);
auto max_pool_desc =
mkldnn_emitter->get_max_pooling_forward_desc<ngraph::op::MaxPool>(node,
false);
// MaxPool needs 3 primitives: input, result, and pooling_forward.
size_t max_pool_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(max_pool_index);
auto functor = [&, max_pool_index](CPURuntimeContext* ctx,
auto functor = [&, max_pool_desc, max_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_pooling_forward(max_pool_desc, max_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, max_pool_index);
......@@ -124,40 +121,62 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto fwd_pool_desc =
mkldnn_emitter->get_max_pooling_forward_desc<ngraph::op::MaxPoolBackprop>(
node, true);
auto bwd_pool_desc =
mkldnn_emitter->get_max_pooling_backward_desc<ngraph::op::MaxPoolBackprop>(
node);
auto fprop_src_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto diff_dst_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto diff_src_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t max_pool_index = mkldnn_emitter->build_max_pooling_backward(
mkldnn::algorithm::pooling_max,
fprop_src_desc,
diff_dst_desc,
diff_src_desc,
mpb->get_window_movement_strides(),
mpb->get_window_shape(),
mpb->get_padding_below(),
mpb->get_padding_above());
auto& fdeps = mkldnn_emitter->get_primitive_deps(max_pool_index - 1);
auto functor_fprop = [&, max_pool_index](CPURuntimeContext* ctx,
// MaxPoolBackprop forward needs 4 primitives: fprop_src, diff_src, workspace,
// and pooling_forward.
// It needs a new workspace.
size_t fwd_pool_index =
mkldnn_emitter->reserve_primitive_space(4, true /* new workspace */);
auto& fdeps = mkldnn_emitter->get_primitive_deps(fwd_pool_index);
auto functor_fprop = [&, fwd_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, fdeps[0], arg_fwd_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, fdeps[1], out_tensor);
cpu::mkldnn_utils::set_memory_ptr(
ctx, fdeps[2], ctx->mkldnn_workspaces[fdeps[3]]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, max_pool_index - 1);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, fwd_pool_index);
};
auto& bdeps = mkldnn_emitter->get_primitive_deps(max_pool_index);
auto functor_bprop = [&, max_pool_index](CPURuntimeContext* ctx,
// MaxPoolBackprop backward needs 4 primitives: diff_dst, workspace, diff_src,
// and pooling_backward.
// It needs a new workspace.
size_t bwd_pool_index =
mkldnn_emitter->reserve_primitive_space(4, true /* new workspace */);
auto& bdeps = mkldnn_emitter->get_primitive_deps(bwd_pool_index);
auto functor_bprop = [&, bwd_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, bdeps[0], delta_tensor);
cpu::mkldnn_utils::set_memory_ptr(
ctx, bdeps[1], ctx->mkldnn_workspaces[bdeps[3]]);
cpu::mkldnn_utils::set_memory_ptr(ctx, bdeps[2], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, max_pool_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, bwd_pool_index);
};
auto functor = [&, functor_fprop, functor_bprop](CPURuntimeContext* ctx,
auto functor = [&,
bwd_pool_desc,
fwd_pool_desc,
fprop_src_desc,
fwd_pool_index,
bwd_pool_index,
functor_fprop,
functor_bprop](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_max_pooling_backward(bwd_pool_desc,
fwd_pool_desc,
fprop_src_desc,
fwd_pool_index,
bwd_pool_index);
ctx->mkldnn_workspaces = mkldnn_emitter->get_mkldnn_workspaces().data();
}
functor_fprop(ctx, ectx);
functor_bprop(ctx, ectx);
};
......@@ -202,8 +221,6 @@ namespace ngraph
throw ngraph_error("MaxPoolWithIndices isn't supported");
}
auto max_pool = static_cast<const ngraph::op::MaxPoolWithIndices*>(node);
auto& functors = external_function->get_functors();
auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
......@@ -211,22 +228,22 @@ namespace ngraph
auto& out1_tensor = external_function->get_tensor_data(out[1].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = runtime::cpu::mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = runtime::cpu::mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t max_pool_index = mkldnn_emitter->build_max_pooling_with_indices_forward(
mkldnn::algorithm::pooling_max,
input_desc,
result_desc,
max_pool->get_window_movement_strides(),
max_pool->get_window_shape(),
max_pool->get_padding_below(),
max_pool->get_padding_above());
auto max_pool_desc =
mkldnn_emitter
->get_max_pooling_with_indices_forward_desc<ngraph::op::MaxPoolWithIndices>(
node);
// MaxPoolWithIndices needs 4 primitives: src, dst, workspace, and pooling_forward.
size_t max_pool_index = mkldnn_emitter->reserve_primitive_space(4);
auto& deps = mkldnn_emitter->get_primitive_deps(max_pool_index);
auto functor = [&, max_pool_index](CPURuntimeContext* ctx,
auto functor = [&, max_pool_desc, max_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_max_pooling_with_indices_forward(max_pool_desc,
max_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out1_tensor);
......@@ -249,25 +266,27 @@ namespace ngraph
auto& arg2_tensor = external_function->get_tensor_data(args[2].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto mpb = static_cast<const ngraph::op::MaxPoolWithIndicesBackprop*>(node);
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto diff_dst_desc = runtime::cpu::mkldnn_utils::get_input_mkldnn_md(node, 1);
auto diff_src_desc = runtime::cpu::mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t max_pool_index = mkldnn_emitter->build_max_pooling_with_indices_backward(
mkldnn::algorithm::pooling_max,
diff_dst_desc,
diff_src_desc,
mpb->get_window_movement_strides(),
mpb->get_window_shape(),
mpb->get_padding_below(),
mpb->get_padding_above());
auto fwd_pool_desc =
mkldnn_emitter
->get_max_pooling_forward_desc<ngraph::op::MaxPoolWithIndicesBackprop>(
node, true);
auto bwd_pool_desc =
mkldnn_emitter
->get_max_pooling_backward_desc<ngraph::op::MaxPoolWithIndicesBackprop>(
node);
// MaxPoolWithIndicesBackprop needs 4 primitives: diff_dst, fprop_workspace,
// diff_dst, and pooling_backward.
size_t max_pool_index = mkldnn_emitter->reserve_primitive_space(4);
auto& deps = mkldnn_emitter->get_primitive_deps(max_pool_index);
auto functor = [&, max_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
auto functor = [&, bwd_pool_desc, fwd_pool_desc, max_pool_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_max_pooling_with_indices_backward(
bwd_pool_desc, fwd_pool_desc, max_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg2_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
......
......@@ -54,35 +54,26 @@ namespace ngraph
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto scale_const_op = std::dynamic_pointer_cast<ngraph::op::Constant>(
dequantize->get_argument(1));
std::vector<float> scales;
if (scale_const_op == nullptr)
{
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto scales_size = shape_size(args[1].get_shape());
size_t dequantize_index =
mkldnn_emitter->build_dequantization(node, input_desc, result_desc);
// Dequantize needs 3 primitives: input, result, and reorder.
size_t dequantize_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(dequantize_index);
functor = [&, input_desc, result_desc, scales_size, dequantize_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
// Create MKLDNN reorder primitive during the first iteration.
// Assumes the scales dont change for the duration of the graph
if (ctx->first_iteration)
{
mkldnn::primitive_attr attr;
vector<float> dyn_scales;
dyn_scales.assign(static_cast<float*>(arg1_tensor),
static_cast<float*>(arg1_tensor) + scales_size);
attr.set_output_scales(0, dyn_scales);
attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
auto reorder_desc = mkldnn::reorder::primitive_desc(
{input_desc, executor::global_cpu_engine},
{result_desc, executor::global_cpu_engine},
attr);
*ctx->mkldnn_primitives[dequantize_index] =
mkldnn::reorder(reorder_desc,
*ctx->mkldnn_primitives[deps[0]],
*ctx->mkldnn_primitives[deps[1]]);
mkldnn_emitter->build_quantize_reorder(
input_desc, result_desc, dyn_scales, dequantize_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
......@@ -92,11 +83,19 @@ namespace ngraph
}
else
{
size_t dequantize_index =
mkldnn_emitter->build_dequantization(node, input_desc, result_desc);
std::vector<float> scale = scale_const_op->get_vector<float>();
std::vector<float> scales;
scales.push_back(scale[0]);
size_t dequantize_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(dequantize_index);
functor = [&, dequantize_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
functor = [&, input_desc, result_desc, scales, dequantize_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_quantize_reorder(
input_desc, result_desc, scales, dequantize_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, dequantize_index);
......@@ -243,25 +242,21 @@ namespace ngraph
auto scale_const_op =
std::dynamic_pointer_cast<ngraph::op::Constant>(quantize->get_argument(1));
std::vector<float> scales;
if (scale_const_op == nullptr)
{
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto scales_size = shape_size(args[1].get_shape());
// Dummy value while we wait for the actual values that are provided during
// execution
scales.push_back(1.0f);
size_t quantize_index =
mkldnn_emitter->build_quantize_reorder(input_desc, result_desc, scales);
// Quantize needs 3 primitives: input, result, and reorder.
size_t quantize_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(quantize_index);
auto functor = [&, input_desc, result_desc, scales_size, quantize_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
// Create MKLDNN reorder primitive during the first iteration.
// Assumes the scales dont change for the duration of the graph
if (ctx->first_iteration)
{
mkldnn::primitive_attr attr;
vector<float> dyn_scales;
dyn_scales.assign(static_cast<float*>(arg1_tensor),
static_cast<float*>(arg1_tensor) + scales_size);
......@@ -271,16 +266,8 @@ namespace ngraph
}
// quantize across first dim (mask=2^0) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 1;
attr.set_output_scales(mask, dyn_scales);
attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
auto reorder_desc = mkldnn::reorder::primitive_desc(
{input_desc, executor::global_cpu_engine},
{result_desc, executor::global_cpu_engine},
attr);
*ctx->mkldnn_primitives[quantize_index] =
mkldnn::reorder(reorder_desc,
*ctx->mkldnn_primitives[deps[0]],
*ctx->mkldnn_primitives[deps[1]]);
mkldnn_emitter->build_quantize_reorder(
input_desc, result_desc, dyn_scales, quantize_index, mask);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
......@@ -291,12 +278,18 @@ namespace ngraph
else
{
auto scale = scale_const_op->get_vector<float>();
std::vector<float> scales;
scales.push_back(1.0 / scale[0]);
size_t quantize_index =
mkldnn_emitter->build_quantize_reorder(input_desc, result_desc, scales);
size_t quantize_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(quantize_index);
auto functor = [&, quantize_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
auto functor = [&, input_desc, result_desc, scales, quantize_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_quantize_reorder(
input_desc, result_desc, scales, quantize_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, quantize_index);
......
......@@ -35,15 +35,24 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& functors = external_function->get_functors();
auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
size_t qavg_pool_index = mkldnn_emitter->build_quantized_avg_pool(node);
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto qavg_pool_desc =
mkldnn_emitter->get_avg_pooling_forward_desc<ngraph::op::QuantizedAvgPool>(
node, false);
// QuantizedAvgPool needs 3 primitives: input, result, and pooling_forward.
size_t qavg_pool_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(qavg_pool_index);
auto functor = [&, qavg_pool_index](CPURuntimeContext* ctx,
auto functor = [&, qavg_pool_desc, qavg_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_pooling_forward(qavg_pool_desc, qavg_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, qavg_pool_index);
......
......@@ -48,8 +48,7 @@ namespace ngraph
auto conv_desc =
mkldnn_emitter
->get_convolution_forward_desc<ngraph::op::QuantizedConvolution>(
node, args, out);
->get_convolution_forward_desc<ngraph::op::QuantizedConvolution>(node);
auto conv_attr =
mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::QuantizedConvolution>(node);
......@@ -68,7 +67,7 @@ namespace ngraph
// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales);
mkldnn_emitter->convolution_forward<false>(
mkldnn_emitter->build_convolution_forward<false>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
......@@ -101,7 +100,7 @@ namespace ngraph
auto conv_desc =
mkldnn_emitter
->get_convolution_forward_desc<ngraph::op::QuantizedConvolutionRelu>(
node, args, out);
node);
auto conv_attr =
mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::QuantizedConvolutionRelu>(
......@@ -119,7 +118,7 @@ namespace ngraph
// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales);
mkldnn_emitter->convolution_forward<false>(
mkldnn_emitter->build_convolution_forward<false>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
......@@ -154,7 +153,7 @@ namespace ngraph
auto conv_desc =
mkldnn_emitter
->get_convolution_forward_desc<ngraph::op::QuantizedConvolutionBias>(
node, args, out);
node);
auto conv_attr =
mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::QuantizedConvolutionBias>(
......@@ -172,7 +171,7 @@ namespace ngraph
// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales);
mkldnn_emitter->convolution_forward<true>(
mkldnn_emitter->build_convolution_forward<true>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
......@@ -213,7 +212,7 @@ namespace ngraph
auto conv_desc =
mkldnn_emitter
->get_convolution_forward_desc<ngraph::op::QuantizedConvolutionBiasAdd>(
node, args, out);
node);
auto conv_attr =
mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::QuantizedConvolutionBiasAdd>(
......@@ -259,7 +258,7 @@ namespace ngraph
const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales);
conv_attr.set_post_ops(new_pops);
mkldnn_emitter->convolution_forward<true>(
mkldnn_emitter->build_convolution_forward<true>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
......@@ -305,7 +304,7 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_desc = mkldnn_emitter->get_convolution_forward_desc<
ngraph::op::QuantizedConvolutionBiasSignedAdd>(node, args, out);
ngraph::op::QuantizedConvolutionBiasSignedAdd>(node);
auto conv_attr = mkldnn_emitter->get_convolution_forward_attr<
ngraph::op::QuantizedConvolutionBiasSignedAdd>(node);
size_t conv_index = mkldnn_emitter->convolution_forward_init(true);
......@@ -349,7 +348,7 @@ namespace ngraph
// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales);
mkldnn_emitter->convolution_forward<true>(
mkldnn_emitter->build_convolution_forward<true>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
......
......@@ -35,16 +35,24 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& functors = external_function->get_functors();
auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
size_t qmax_pool_index = mkldnn_emitter->build_quantized_max_pool(node);
auto qmax_pool_desc =
mkldnn_emitter->get_max_pooling_forward_desc<ngraph::op::QuantizedMaxPool>(
node, false);
// QuantizedMaxPool needs 3 primitives: input, result, and pooling_forward.
size_t qmax_pool_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(qmax_pool_index);
auto functor = [&, qmax_pool_index](CPURuntimeContext* ctx,
auto functor = [&, qmax_pool_desc, qmax_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_pooling_forward(qmax_pool_desc, qmax_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, qmax_pool_index);
......
......@@ -40,15 +40,17 @@ namespace ngraph
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t relu_index = mkldnn_emitter->build_relu_forward(input_desc, result_desc);
auto relu_desc = mkldnn_emitter->get_relu_forward_desc(node);
// Relu needs 3 primitives: input, result, and eltwise_forward.
size_t relu_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(relu_index);
auto functor = [&, relu_index](CPURuntimeContext* ctx,
auto functor = [&, relu_desc, relu_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_relu_forward(relu_desc, relu_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, relu_index);
......@@ -74,16 +76,18 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t relu_index =
mkldnn_emitter->build_relu_backward(input_desc, delta_desc, result_desc);
auto bwd_desc = mkldnn_emitter->get_relu_backward_desc(node);
auto fwd_desc = mkldnn_emitter->get_relu_forward_desc(node);
// ReluBackprop needs 4 primitives: input, delta, result, and eltwise_backward.
size_t relu_index = mkldnn_emitter->reserve_primitive_space(4);
auto& deps = mkldnn_emitter->get_primitive_deps(relu_index);
auto functor = [&, relu_index](CPURuntimeContext* ctx,
auto functor = [&, bwd_desc, fwd_desc, relu_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_relu_backward(bwd_desc, fwd_desc, relu_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_fwd_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], delta_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
......
......@@ -49,9 +49,22 @@ namespace ngraph
auto& dst_iter_tensor = external_function->get_tensor_data(out[1].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto rnn_index = mkldnn_emitter->build_rnn<ngraph::op::Rnn>(node, args, out);
auto rnn_desc =
mkldnn_emitter->get_rnn_forward_desc<ngraph::op::Rnn>(node, args, out);
// Rnn needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias,
// dst_layer, dst_iter, and rnn_forward.
// It needs a new workspace.
auto rnn_index =
mkldnn_emitter->reserve_primitive_space(9, true /* new workspace */);
auto& deps = mkldnn_emitter->get_primitive_deps(rnn_index);
auto functor = [&, rnn_index](CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
auto functor = [&, rnn_desc, rnn_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_rnn_forward(rnn_desc, rnn_index);
ctx->mkldnn_workspaces = mkldnn_emitter->get_mkldnn_workspaces().data();
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], src_layer_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], src_iter_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], weights_layer_tensor);
......
......@@ -42,15 +42,17 @@ namespace ngraph
auto out_shape = out[0].get_shape();
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto out_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto sigmoid_index = mkldnn_emitter->build_sigmoid_forward(input_desc, out_desc);
auto sigmoid_desc = mkldnn_emitter->get_sigmoid_forward_desc(node, false);
// Sigmoid needs 3 primitives: input, result, and eltwise_forward.
auto sigmoid_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(sigmoid_index);
auto functor = [&, sigmoid_index](CPURuntimeContext* ctx,
auto functor = [&, sigmoid_desc, sigmoid_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_sigmoid_forward(sigmoid_desc, sigmoid_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, sigmoid_index);
......@@ -72,17 +74,18 @@ namespace ngraph
auto out_shape = out[0].get_shape();
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto out_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t sigmoid_index =
mkldnn_emitter->build_sigmoid_backward(input_desc, delta_desc, out_desc);
auto fwd_desc = mkldnn_emitter->get_sigmoid_forward_desc(node, true);
auto bwd_desc = mkldnn_emitter->get_sigmoid_backward_desc(node);
// SigmoidBackprop needs 4 primitives: input, delta, result, and eltwise_backward.
size_t sigmoid_index = mkldnn_emitter->reserve_primitive_space(4);
auto& deps = mkldnn_emitter->get_primitive_deps(sigmoid_index);
auto functor = [&, sigmoid_index](CPURuntimeContext* ctx,
auto functor = [&, bwd_desc, fwd_desc, sigmoid_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_sigmoid_backward(bwd_desc, fwd_desc, sigmoid_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
......
......@@ -84,13 +84,18 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto slice_index = mkldnn_emitter->build_slice(
input_desc, result_desc, lower_bounds, out_shape);
// Slice needs 3 primitives: input, result, and reorder.
auto slice_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(slice_index);
auto functor = [&, slice_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
auto functor =
[&, input_desc, result_desc, lower_bounds, out_shape, slice_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_slice(
input_desc, result_desc, lower_bounds, out_shape, slice_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, slice_index);
......
......@@ -46,23 +46,18 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
if (axes.size() != 1)
{
throw ngraph_error("MKLDNN supports softmax only across single axis");
}
int softmax_axis = static_cast<int>(*(axes.begin()));
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t softmax_index = mkldnn_emitter->build_softmax_forward(
input_desc, result_desc, softmax_axis);
auto softmax_desc = mkldnn_emitter->get_softmax_forward_desc(node);
// Softmax needs 3 primitives: input, result, and softmax_forward.
size_t softmax_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(softmax_index);
auto functor = [&, softmax_index](CPURuntimeContext* ctx,
auto functor = [&, softmax_desc, softmax_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_softmax_forward(softmax_desc, softmax_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, softmax_index);
......
......@@ -131,6 +131,11 @@ size_t MKLDNNEmitter::build_memory_primitive(const mkldnn::memory::desc& desc)
return index;
}
void MKLDNNEmitter::build_memory_primitive(const mkldnn::memory::desc& desc, size_t index)
{
m_mkldnn_primitives[index] = new mkldnn::memory({desc, executor::global_cpu_engine}, nullptr);
}
size_t MKLDNNEmitter::build_quantize_reorder(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
const std::vector<float>& scales)
......@@ -149,6 +154,27 @@ size_t MKLDNNEmitter::build_quantize_reorder(const mkldnn::memory::desc& input_d
return primitive_index;
}
void MKLDNNEmitter::build_quantize_reorder(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
const std::vector<float>& scales,
size_t quantize_index,
const int mask)
{
size_t input_index = m_primitive_deps[quantize_index][0];
build_memory_primitive(input_desc, input_index);
size_t result_index = m_primitive_deps[quantize_index][1];
build_memory_primitive(result_desc, result_index);
mkldnn::primitive_attr attr;
attr.set_output_scales(mask, scales);
attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
auto reorder_desc = mkldnn::reorder::primitive_desc({input_desc, executor::global_cpu_engine},
{result_desc, executor::global_cpu_engine},
attr);
m_mkldnn_primitives[quantize_index] = new mkldnn::reorder(
reorder_desc, *m_mkldnn_primitives[input_index], *m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_dequantization(const ngraph::Node* node,
const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc)
......@@ -478,6 +504,33 @@ size_t MKLDNNEmitter::build_convolution_backward_weights_bias(
return conv_index;
}
void MKLDNNEmitter::build_convolution_backward_weights_bias(
const mkldnn::convolution_backward_weights::desc& bwd_desc,
const mkldnn::convolution_forward::desc& fwd_desc,
size_t conv_index)
{
size_t in_data_index = m_primitive_deps[conv_index][0];
build_memory_primitive(bwd_desc.data.src_desc, in_data_index);
size_t in_delta_index = m_primitive_deps[conv_index][1];
build_memory_primitive(bwd_desc.data.diff_dst_desc, in_delta_index);
size_t out_weights_delta_index = m_primitive_deps[conv_index][2];
build_memory_primitive(bwd_desc.data.diff_weights_desc, out_weights_delta_index);
size_t out_bias_delta_index = m_primitive_deps[conv_index][3];
build_memory_primitive(bwd_desc.data.diff_bias_desc, out_bias_delta_index);
mkldnn::convolution_forward::primitive_desc fwd_pd{fwd_desc, executor::global_cpu_engine};
mkldnn::convolution_backward_weights::primitive_desc bwd_pd{
bwd_desc, executor::global_cpu_engine, fwd_pd};
m_mkldnn_primitives[conv_index] =
new mkldnn::convolution_backward_weights(bwd_pd,
*m_mkldnn_primitives[in_data_index],
*m_mkldnn_primitives[in_delta_index],
*m_mkldnn_primitives[out_weights_delta_index],
*m_mkldnn_primitives[out_bias_delta_index]);
}
size_t
MKLDNNEmitter::build_convolution_backward_weights(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& delta_desc,
......@@ -522,6 +575,28 @@ size_t
return primitive_index;
}
void MKLDNNEmitter::build_convolution_backward_weights(
const mkldnn::convolution_backward_weights::desc& bwd_desc,
const mkldnn::convolution_forward::desc& fwd_desc,
size_t conv_index)
{
size_t in_data_index = m_primitive_deps[conv_index][0];
build_memory_primitive(bwd_desc.data.src_desc, in_data_index);
size_t in_delta_index = m_primitive_deps[conv_index][1];
build_memory_primitive(bwd_desc.data.diff_dst_desc, in_delta_index);
size_t out_weights_delta_index = m_primitive_deps[conv_index][2];
build_memory_primitive(bwd_desc.data.diff_weights_desc, out_weights_delta_index);
m_mkldnn_primitives[conv_index] = new mkldnn::convolution_backward_weights(
{bwd_desc,
executor::global_cpu_engine,
// Forward primitive descriptor corresponding to this backward weights descriptor
{fwd_desc, executor::global_cpu_engine}},
*m_mkldnn_primitives[in_data_index],
*m_mkldnn_primitives[in_delta_index],
*m_mkldnn_primitives[out_weights_delta_index]);
}
size_t MKLDNNEmitter::build_convolution_backward_data(const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& delta_desc,
const mkldnn::memory::desc& result_desc,
......@@ -565,6 +640,28 @@ size_t MKLDNNEmitter::build_convolution_backward_data(const mkldnn::memory::desc
return primitive_index;
}
void MKLDNNEmitter::build_convolution_backward_data(
const mkldnn::convolution_backward_data::desc& bwd_desc,
const mkldnn::convolution_forward::desc& fwd_desc,
size_t conv_index)
{
size_t weights_index = m_primitive_deps[conv_index][0];
build_memory_primitive(bwd_desc.data.weights_desc, weights_index);
size_t delta_index = m_primitive_deps[conv_index][1];
build_memory_primitive(bwd_desc.data.diff_dst_desc, delta_index);
size_t result_index = m_primitive_deps[conv_index][2];
build_memory_primitive(bwd_desc.data.diff_src_desc, result_index);
m_mkldnn_primitives[conv_index] = new mkldnn::convolution_backward_data(
{bwd_desc,
executor::global_cpu_engine,
// Forward primitive descriptor corresponding to this backward data descriptor
{fwd_desc, executor::global_cpu_engine}},
*m_mkldnn_primitives[delta_index],
*m_mkldnn_primitives[weights_index],
*m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_pooling_forward(mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
......@@ -594,6 +691,20 @@ size_t MKLDNNEmitter::build_pooling_forward(mkldnn::algorithm pooling_algorithm,
return primitive_index;
}
void MKLDNNEmitter::build_pooling_forward(const mkldnn::pooling_forward::desc& pool_desc,
size_t pool_index)
{
size_t input_index = m_primitive_deps[pool_index][0];
build_memory_primitive(pool_desc.data.src_desc, input_index);
size_t result_index = m_primitive_deps[pool_index][1];
build_memory_primitive(pool_desc.data.dst_desc, result_index);
m_mkldnn_primitives[pool_index] =
new mkldnn::pooling_forward({pool_desc, executor::global_cpu_engine},
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_pooling_backward(mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& diff_dst_desc,
const mkldnn::memory::desc& diff_src_desc,
......@@ -632,6 +743,24 @@ size_t MKLDNNEmitter::build_pooling_backward(mkldnn::algorithm pooling_algorithm
return primitive_index;
}
void MKLDNNEmitter::build_pooling_backward(const mkldnn::pooling_backward::desc& pool_desc,
const mkldnn::pooling_forward::desc& pool_fwd_desc,
size_t pool_index)
{
size_t input_index = m_primitive_deps[pool_index][0];
build_memory_primitive(pool_desc.data.diff_dst_desc, input_index);
size_t result_index = m_primitive_deps[pool_index][1];
build_memory_primitive(pool_desc.data.diff_src_desc, result_index);
auto pool_fwd_pd =
mkldnn::pooling_forward::primitive_desc(pool_fwd_desc, executor::global_cpu_engine);
auto pool_pd = mkldnn::pooling_backward::primitive_desc(
pool_desc, executor::global_cpu_engine, pool_fwd_pd);
m_mkldnn_primitives[pool_index] = new mkldnn::pooling_backward(
pool_pd, *m_mkldnn_primitives[input_index], *m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_max_pooling_backward(mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& fprop_src_desc,
const mkldnn::memory::desc& diff_dst_desc,
......@@ -693,6 +822,48 @@ size_t MKLDNNEmitter::build_max_pooling_backward(mkldnn::algorithm pooling_algor
return bwd_primitive_index;
}
void MKLDNNEmitter::build_max_pooling_backward(const mkldnn::pooling_backward::desc& bwd_pool_desc,
const mkldnn::pooling_forward::desc& fwd_pool_desc,
const mkldnn::memory::desc& fprop_src_desc,
size_t fwd_pool_index,
size_t bwd_pool_index)
{
size_t fprop_src_index = m_primitive_deps[fwd_pool_index][0];
build_memory_primitive(fprop_src_desc, fprop_src_index);
size_t diff_dst_index = m_primitive_deps[bwd_pool_index][0];
build_memory_primitive(bwd_pool_desc.data.diff_dst_desc, diff_dst_index);
size_t diff_src_index = m_primitive_deps[fwd_pool_index][1];
build_memory_primitive(bwd_pool_desc.data.diff_src_desc, diff_src_index);
m_primitive_deps[bwd_pool_index][2] = diff_src_index;
mkldnn::pooling_forward::primitive_desc fwd_pd{fwd_pool_desc, executor::global_cpu_engine};
size_t ws_index = m_primitive_deps[fwd_pool_index][1];
build_memory_primitive(fwd_pd.workspace_primitive_desc().desc(), ws_index);
m_primitive_deps[bwd_pool_index][1] = ws_index;
// Allocate workspace
// TODO (jbobba): Might need to align memory
auto ws = std::unique_ptr<MKLDNNWorkspace>(
new MKLDNNWorkspace(fwd_pd.workspace_primitive_desc().get_size()));
auto ws_buf_index = insert_workspace(ws);
m_primitive_deps[fwd_pool_index][3] = ws_buf_index;
m_primitive_deps[bwd_pool_index][3] = ws_buf_index;
m_mkldnn_primitives[fwd_pool_index] = new mkldnn::pooling_forward(
fwd_pd,
*m_mkldnn_primitives[fprop_src_index],
*m_mkldnn_primitives
[diff_src_index], // HACK - Uses diff_src buffer. Safe since diff_src > fprop_dst
*m_mkldnn_primitives[ws_index]);
m_mkldnn_primitives[bwd_pool_index] =
new mkldnn::pooling_backward({bwd_pool_desc, executor::global_cpu_engine, fwd_pd},
*m_mkldnn_primitives[diff_dst_index],
*m_mkldnn_primitives[ws_index],
*m_mkldnn_primitives[diff_src_index]);
}
size_t MKLDNNEmitter::build_max_pooling_with_indices_forward(mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& src_desc,
const mkldnn::memory::desc& dst_desc,
......@@ -728,6 +899,26 @@ size_t MKLDNNEmitter::build_max_pooling_with_indices_forward(mkldnn::algorithm p
return fwd_primitive_index;
}
void MKLDNNEmitter::build_max_pooling_with_indices_forward(
const mkldnn::pooling_forward::desc& max_pool_desc, size_t max_pool_index)
{
size_t src_index = m_primitive_deps[max_pool_index][0];
build_memory_primitive(max_pool_desc.data.src_desc, src_index);
size_t dst_index = m_primitive_deps[max_pool_index][1];
build_memory_primitive(max_pool_desc.data.dst_desc, dst_index);
mkldnn::pooling_forward::primitive_desc fwd_pd{max_pool_desc, executor::global_cpu_engine};
size_t ws_index = m_primitive_deps[max_pool_index][2];
build_memory_primitive(fwd_pd.workspace_primitive_desc().desc(), ws_index);
m_mkldnn_primitives[max_pool_index] =
new mkldnn::pooling_forward(fwd_pd,
*m_mkldnn_primitives[src_index],
*m_mkldnn_primitives[dst_index],
*m_mkldnn_primitives[ws_index]);
}
size_t MKLDNNEmitter::build_max_pooling_with_indices_backward(
mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& diff_dst_desc,
......@@ -773,6 +964,28 @@ size_t MKLDNNEmitter::build_max_pooling_with_indices_backward(
return bwd_primitive_index;
}
void MKLDNNEmitter::build_max_pooling_with_indices_backward(
const mkldnn::pooling_backward::desc& bwd_pool_desc,
const mkldnn::pooling_forward::desc& fwd_pool_desc,
size_t max_pool_index)
{
size_t diff_dst_index = m_primitive_deps[max_pool_index][0];
build_memory_primitive(bwd_pool_desc.data.diff_dst_desc, diff_dst_index);
size_t diff_src_index = m_primitive_deps[max_pool_index][2];
build_memory_primitive(bwd_pool_desc.data.diff_src_desc, diff_src_index);
mkldnn::pooling_forward::primitive_desc fwd_pd{fwd_pool_desc, executor::global_cpu_engine};
size_t fprop_ws_index = m_primitive_deps[max_pool_index][1];
build_memory_primitive(fwd_pd.workspace_primitive_desc().desc(), fprop_ws_index);
m_mkldnn_primitives[max_pool_index] =
new mkldnn::pooling_backward({bwd_pool_desc, executor::global_cpu_engine, fwd_pd},
*m_mkldnn_primitives[diff_dst_index],
*m_mkldnn_primitives[fprop_ws_index],
*m_mkldnn_primitives[diff_src_index]);
}
size_t MKLDNNEmitter::build_reorder(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc)
{
......@@ -794,6 +1007,19 @@ size_t MKLDNNEmitter::build_reorder(const mkldnn::memory::desc& input_desc,
return primitive_index;
}
void MKLDNNEmitter::build_reorder(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
size_t reorder_index)
{
size_t input_index = m_primitive_deps[reorder_index][0];
build_memory_primitive(input_desc, input_index);
size_t result_index = m_primitive_deps[reorder_index][1];
build_memory_primitive(result_desc, result_index);
m_mkldnn_primitives[reorder_index] =
new mkldnn::reorder(*m_mkldnn_primitives[input_index], *m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_lrn_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
float alpha,
......@@ -820,6 +1046,39 @@ size_t MKLDNNEmitter::build_lrn_forward(const mkldnn::memory::desc& input_desc,
return primitive_index;
}
mkldnn::lrn_forward::desc MKLDNNEmitter::get_lrn_forward_desc(const ngraph::Node* node)
{
const ngraph::op::LRN* lrn = static_cast<const ngraph::op::LRN*>(node);
auto alpha = static_cast<float>(lrn->get_alpha());
auto beta = static_cast<float>(lrn->get_beta());
auto bias = static_cast<float>(lrn->get_bias());
auto nsize = static_cast<int>(lrn->get_nsize());
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
return mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring,
mkldnn::algorithm::lrn_across_channels,
input_desc,
nsize,
alpha,
beta,
bias);
}
void MKLDNNEmitter::build_lrn_forward(const mkldnn::lrn_forward::desc& lrn_desc, size_t lrn_index)
{
size_t input_index = m_primitive_deps[lrn_index][0];
build_memory_primitive(lrn_desc.data.data_desc, input_index);
size_t result_index = m_primitive_deps[lrn_index][1];
build_memory_primitive(lrn_desc.data.data_desc, result_index);
auto lrn_prim_desc = mkldnn::lrn_forward::primitive_desc(lrn_desc, executor::global_cpu_engine);
m_mkldnn_primitives[lrn_index] = new mkldnn::lrn_forward(
lrn_prim_desc, *m_mkldnn_primitives[input_index], *m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_relu_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc)
{
......@@ -838,6 +1097,30 @@ size_t MKLDNNEmitter::build_relu_forward(const mkldnn::memory::desc& input_desc,
return primitive_index;
}
mkldnn::eltwise_forward::desc MKLDNNEmitter::get_relu_forward_desc(const ngraph::Node* node)
{
const float negative_slope = 0.0f;
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
return mkldnn::eltwise_forward::desc(
mkldnn::prop_kind::forward, mkldnn::algorithm::eltwise_relu, input_desc, negative_slope);
}
void MKLDNNEmitter::build_relu_forward(const mkldnn::eltwise_forward::desc& relu_desc,
size_t relu_index)
{
size_t input_index = m_primitive_deps[relu_index][0];
build_memory_primitive(relu_desc.data.data_desc, input_index);
size_t result_index = m_primitive_deps[relu_index][1];
build_memory_primitive(relu_desc.data.data_desc, result_index);
m_mkldnn_primitives[relu_index] =
new mkldnn::eltwise_forward({relu_desc, executor::global_cpu_engine},
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_relu_backward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& delta_desc,
const mkldnn::memory::desc& result_desc)
......@@ -868,6 +1151,41 @@ size_t MKLDNNEmitter::build_relu_backward(const mkldnn::memory::desc& input_desc
return primitive_index;
}
mkldnn::eltwise_backward::desc MKLDNNEmitter::get_relu_backward_desc(const ngraph::Node* node)
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
const float negative_slope = 0.0f;
return mkldnn::eltwise_backward::desc(
mkldnn::algorithm::eltwise_relu, result_desc, input_desc, negative_slope);
}
void MKLDNNEmitter::build_relu_backward(const mkldnn::eltwise_backward::desc& bwd_desc,
const mkldnn::eltwise_forward::desc& fwd_desc,
size_t relu_index)
{
size_t input_index = m_primitive_deps[relu_index][0];
build_memory_primitive(bwd_desc.data.data_desc, input_index);
size_t delta_index = m_primitive_deps[relu_index][1];
build_memory_primitive(bwd_desc.data.diff_data_desc, delta_index);
size_t result_index = m_primitive_deps[relu_index][2];
build_memory_primitive(bwd_desc.data.data_desc, result_index);
/* create forward relu primitive descriptor*/
auto relu_pd = mkldnn::eltwise_forward::primitive_desc(fwd_desc, executor::global_cpu_engine);
/* create backward relu primitive_descriptor */
auto relu_bwd_pd =
mkldnn::eltwise_backward::primitive_desc(bwd_desc, executor::global_cpu_engine, relu_pd);
m_mkldnn_primitives[relu_index] =
new mkldnn::eltwise_backward(relu_bwd_pd,
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[delta_index],
*m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_sigmoid_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc)
{
......@@ -888,6 +1206,40 @@ size_t MKLDNNEmitter::build_sigmoid_forward(const mkldnn::memory::desc& input_de
return primitive_index;
}
mkldnn::eltwise_forward::desc MKLDNNEmitter::get_sigmoid_forward_desc(const ngraph::Node* node,
bool backward_op)
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
if (backward_op)
{
return mkldnn::eltwise_forward::desc(
mkldnn::prop_kind::forward, mkldnn::algorithm::eltwise_logistic, input_desc, 0, 0);
}
else
{
return mkldnn::eltwise_forward::desc(mkldnn::prop_kind::forward_training,
mkldnn::algorithm::eltwise_logistic,
input_desc,
0,
0);
}
}
void MKLDNNEmitter::build_sigmoid_forward(const mkldnn::eltwise_forward::desc& sigmoid_desc,
size_t sigmoid_index)
{
size_t input_index = m_primitive_deps[sigmoid_index][0];
build_memory_primitive(sigmoid_desc.data.data_desc, input_index);
size_t result_index = m_primitive_deps[sigmoid_index][1];
build_memory_primitive(sigmoid_desc.data.data_desc, result_index);
m_mkldnn_primitives[sigmoid_index] =
new mkldnn::eltwise_forward({sigmoid_desc, executor::global_cpu_engine},
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_sigmoid_backward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& delta_desc,
const mkldnn::memory::desc& result_desc)
......@@ -914,6 +1266,37 @@ size_t MKLDNNEmitter::build_sigmoid_backward(const mkldnn::memory::desc& input_d
return primitive_index;
}
mkldnn::eltwise_backward::desc MKLDNNEmitter::get_sigmoid_backward_desc(const ngraph::Node* node)
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
return mkldnn::eltwise_backward::desc(
mkldnn::algorithm::eltwise_logistic, delta_desc, input_desc, 0, 0);
}
void MKLDNNEmitter::build_sigmoid_backward(const mkldnn::eltwise_backward::desc& bwd_desc,
const mkldnn::eltwise_forward::desc& fwd_desc,
size_t sigmoid_index)
{
size_t input_index = m_primitive_deps[sigmoid_index][0];
build_memory_primitive(bwd_desc.data.data_desc, input_index);
size_t delta_index = m_primitive_deps[sigmoid_index][1];
build_memory_primitive(bwd_desc.data.diff_data_desc, delta_index);
size_t result_index = m_primitive_deps[sigmoid_index][2];
build_memory_primitive(bwd_desc.data.data_desc, result_index);
// sigmoid forward primitive desc
mkldnn::eltwise_forward::primitive_desc sigmoid_fwd_pd =
mkldnn::eltwise_forward::primitive_desc(fwd_desc, executor::global_cpu_engine);
m_mkldnn_primitives[sigmoid_index] =
new mkldnn::eltwise_backward({bwd_desc, executor::global_cpu_engine, sigmoid_fwd_pd},
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[delta_index],
*m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_elementwise_add(
const mkldnn::memory::desc& input0_data_desc,
const mkldnn::memory::desc& input1_data_desc,
......@@ -942,6 +1325,45 @@ size_t MKLDNNEmitter::build_elementwise_add(
return add_index;
}
mkldnn::sum::primitive_desc MKLDNNEmitter::get_elementwise_add_desc(const ngraph::Node* node)
{
std::vector<float> scale_vector(2, 1);
std::vector<mkldnn::memory::primitive_desc> inputs_pd;
auto input0_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto input1_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
inputs_pd.push_back(mkldnn::memory::primitive_desc(
input0_data_desc, ngraph::runtime::cpu::executor::global_cpu_engine));
inputs_pd.push_back(mkldnn::memory::primitive_desc(
input1_data_desc, ngraph::runtime::cpu::executor::global_cpu_engine));
// elementwise sum primtive descriptor
mkldnn::sum::primitive_desc sum_pd =
mkldnn::sum::primitive_desc(result_desc, scale_vector, inputs_pd);
return sum_pd;
}
void MKLDNNEmitter::build_elementwise_add(const mkldnn::sum::primitive_desc& sum_pd,
size_t add_index)
{
std::vector<mkldnn::memory::primitive::at> inputs_primitive;
size_t input0_data_index = m_primitive_deps[add_index][0];
build_memory_primitive(sum_pd.dst_primitive_desc().desc(), input0_data_index);
size_t input1_data_index = m_primitive_deps[add_index][1];
build_memory_primitive(sum_pd.dst_primitive_desc().desc(), input1_data_index);
size_t result_index = m_primitive_deps[add_index][2];
build_memory_primitive(sum_pd.dst_primitive_desc().desc(), result_index);
inputs_primitive.push_back(*m_mkldnn_primitives[input0_data_index]);
inputs_primitive.push_back(*m_mkldnn_primitives[input1_data_index]);
// sum primitive
m_mkldnn_primitives[add_index] =
new mkldnn::sum(sum_pd, inputs_primitive, *m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_batchnorm_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
......@@ -1002,6 +1424,60 @@ size_t MKLDNNEmitter::build_batchnorm_forward(const mkldnn::memory::desc& input_
}
}
void MKLDNNEmitter::build_batchnorm_forward(
const mkldnn::batch_normalization_forward::desc& batchnorm_desc,
const mkldnn::memory::desc& weights_desc,
bool bn_training_flag,
size_t batchnorm_index,
const mkldnn::post_ops& pops)
{
size_t input_index = m_primitive_deps[batchnorm_index][0];
build_memory_primitive(batchnorm_desc.data.data_desc, input_index);
mkldnn::primitive_attr bn_attr;
bn_attr.set_post_ops(pops);
auto use_global_stats = batchnorm_desc.data.flags & 0x1U;
if (bn_training_flag && !use_global_stats)
{
size_t weights_index = m_primitive_deps[batchnorm_index][1];
build_memory_primitive(weights_desc, weights_index);
size_t result_index = m_primitive_deps[batchnorm_index][2];
build_memory_primitive(batchnorm_desc.data.data_desc, result_index);
size_t mean_index = m_primitive_deps[batchnorm_index][3];
build_memory_primitive(batchnorm_desc.data.mean_desc, mean_index);
size_t variance_index = m_primitive_deps[batchnorm_index][4];
build_memory_primitive(batchnorm_desc.data.variance_desc, variance_index);
m_mkldnn_primitives[batchnorm_index] = new mkldnn::batch_normalization_forward(
{batchnorm_desc, bn_attr, executor::global_cpu_engine},
mkldnn::primitive::at(*m_mkldnn_primitives[input_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[weights_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[result_index]),
*m_mkldnn_primitives[mean_index],
*m_mkldnn_primitives[variance_index]);
}
else
{
size_t weights_index = m_primitive_deps[batchnorm_index][3];
build_memory_primitive(weights_desc, weights_index);
size_t result_index = m_primitive_deps[batchnorm_index][4];
build_memory_primitive(batchnorm_desc.data.data_desc, result_index);
size_t mean_index = m_primitive_deps[batchnorm_index][1];
build_memory_primitive(batchnorm_desc.data.mean_desc, mean_index);
size_t variance_index = m_primitive_deps[batchnorm_index][2];
build_memory_primitive(batchnorm_desc.data.variance_desc, variance_index);
m_mkldnn_primitives[batchnorm_index] = new mkldnn::batch_normalization_forward(
{batchnorm_desc, bn_attr, executor::global_cpu_engine},
mkldnn::primitive::at(*m_mkldnn_primitives[input_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[mean_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[variance_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[weights_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[result_index]));
}
}
size_t MKLDNNEmitter::build_batchnorm_backward(const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& mean_desc,
......@@ -1049,6 +1525,62 @@ size_t MKLDNNEmitter::build_batchnorm_backward(const mkldnn::memory::desc& weigh
return batchnorm_index;
}
mkldnn::batch_normalization_backward::desc
MKLDNNEmitter::get_batchnorm_backward_desc(const ngraph::Node* node)
{
const ngraph::op::BatchNormTrainingBackprop* batchnorm =
static_cast<const ngraph::op::BatchNormTrainingBackprop*>(node);
auto eps = batchnorm->get_eps_value();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 5);
return mkldnn::batch_normalization_backward::desc(
mkldnn::prop_kind::backward,
delta_desc,
input_desc,
eps,
mkldnn::batch_normalization_flag::use_scale_shift);
}
void MKLDNNEmitter::build_batchnorm_backward(
const mkldnn::batch_normalization_backward::desc& batchnorm_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& dweights_desc,
size_t batchnorm_index)
{
size_t weights_index = m_primitive_deps[batchnorm_index][0];
build_memory_primitive(weights_desc, weights_index);
size_t input_index = m_primitive_deps[batchnorm_index][1];
build_memory_primitive(batchnorm_desc.data.data_desc, input_index);
size_t mean_index = m_primitive_deps[batchnorm_index][2];
build_memory_primitive(batchnorm_desc.data.mean_desc, mean_index);
size_t variance_index = m_primitive_deps[batchnorm_index][3];
build_memory_primitive(batchnorm_desc.data.variance_desc, variance_index);
size_t delta_index = m_primitive_deps[batchnorm_index][4];
build_memory_primitive(batchnorm_desc.data.diff_data_desc, delta_index);
size_t dinput_index = m_primitive_deps[batchnorm_index][5];
build_memory_primitive(batchnorm_desc.data.data_desc, dinput_index);
size_t dweights_index = m_primitive_deps[batchnorm_index][6];
build_memory_primitive(dweights_desc, dweights_index);
m_mkldnn_primitives[batchnorm_index] = new mkldnn::batch_normalization_backward(
{batchnorm_desc,
executor::global_cpu_engine,
{{mkldnn::prop_kind::forward_training,
batchnorm_desc.data.data_desc,
static_cast<double>(batchnorm_desc.data.batch_norm_epsilon),
mkldnn::batch_normalization_flag::use_scale_shift},
executor::global_cpu_engine}},
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[mean_index],
*m_mkldnn_primitives[variance_index],
*m_mkldnn_primitives[delta_index],
*m_mkldnn_primitives[weights_index],
*m_mkldnn_primitives[dinput_index],
*m_mkldnn_primitives[dweights_index]);
}
size_t MKLDNNEmitter::build_rnn_forward(const mkldnn::memory::desc& src_layer_desc,
const mkldnn::memory::desc& src_iter_desc,
const mkldnn::memory::desc& weights_layer_desc,
......@@ -1085,6 +1617,7 @@ size_t MKLDNNEmitter::build_rnn_forward(const mkldnn::memory::desc& src_layer_de
build_memory_primitive(rnn_layer_prim_desc.workspace_primitive_desc().desc());
auto workspace = std::unique_ptr<MKLDNNWorkspace>(
new MKLDNNWorkspace(rnn_layer_prim_desc.workspace_primitive_desc().get_size()));
auto workspace_buf_index = insert_workspace(workspace);
size_t rnn_index = insert_primitive(new mkldnn::rnn_forward(
......@@ -1110,6 +1643,44 @@ size_t MKLDNNEmitter::build_rnn_forward(const mkldnn::memory::desc& src_layer_de
return rnn_index;
}
void MKLDNNEmitter::build_rnn_forward(const mkldnn::rnn_forward::desc& rnn_desc, size_t rnn_index)
{
size_t src_layer_index = m_primitive_deps[rnn_index][0];
build_memory_primitive(rnn_desc.data.src_layer_desc, src_layer_index);
size_t src_iter_index = m_primitive_deps[rnn_index][1];
build_memory_primitive(rnn_desc.data.src_iter_desc, src_iter_index);
size_t weights_layer_index = m_primitive_deps[rnn_index][2];
build_memory_primitive(rnn_desc.data.weights_layer_desc, weights_layer_index);
size_t weights_iter_index = m_primitive_deps[rnn_index][3];
build_memory_primitive(rnn_desc.data.weights_iter_desc, weights_iter_index);
size_t bias_index = m_primitive_deps[rnn_index][4];
build_memory_primitive(rnn_desc.data.bias_desc, bias_index);
size_t dst_layer_index = m_primitive_deps[rnn_index][5];
build_memory_primitive(rnn_desc.data.dst_layer_desc, dst_layer_index);
size_t dst_iter_index = m_primitive_deps[rnn_index][6];
build_memory_primitive(rnn_desc.data.dst_iter_desc, dst_iter_index);
auto rnn_layer_prim_desc =
mkldnn::rnn_forward::primitive_desc(rnn_desc, executor::global_cpu_engine);
size_t workspace_index = m_primitive_deps[rnn_index][7];
build_memory_primitive(rnn_layer_prim_desc.workspace_primitive_desc().desc(), workspace_index);
auto workspace = std::unique_ptr<MKLDNNWorkspace>(
new MKLDNNWorkspace(rnn_layer_prim_desc.workspace_primitive_desc().get_size()));
auto workspace_buf_index = insert_workspace(workspace);
m_primitive_deps[rnn_index][8] = workspace_buf_index;
m_mkldnn_primitives[rnn_index] =
new mkldnn::rnn_forward(rnn_layer_prim_desc,
mkldnn::primitive::at(*m_mkldnn_primitives[src_layer_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[src_iter_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[weights_layer_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[weights_iter_index]),
mkldnn::primitive::at(*m_mkldnn_primitives[bias_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[dst_layer_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[dst_iter_index]),
static_cast<mkldnn::memory>(*m_mkldnn_primitives[workspace_index]));
}
size_t MKLDNNEmitter::build_concat(const std::vector<mkldnn::memory::desc>& inputs_data_desc,
const mkldnn::memory::desc& result_desc,
const size_t concat_dim)
......@@ -1148,6 +1719,53 @@ size_t MKLDNNEmitter::build_concat(const std::vector<mkldnn::memory::desc>& inpu
return concat_index;
}
mkldnn::concat::primitive_desc MKLDNNEmitter::get_concat_desc(const ngraph::Node* node,
size_t nargs)
{
auto concat = static_cast<const ngraph::op::Concat*>(node);
std::vector<mkldnn::memory::primitive_desc> inputs_pd;
for (size_t i = 0; i < nargs; i++)
{
inputs_pd.push_back(mkldnn::memory::primitive_desc(
mkldnn_utils::get_input_mkldnn_md(node, i), runtime::cpu::executor::global_cpu_engine));
}
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t concat_dim = concat->get_concatenation_axis();
// concat primtive descriptor
return mkldnn::concat::primitive_desc(result_desc, static_cast<int>(concat_dim), inputs_pd);
}
void MKLDNNEmitter::build_concat(const mkldnn::concat::primitive_desc& concat_pd,
const std::vector<mkldnn::memory::desc>& inputs_data_desc,
size_t concat_index)
{
std::vector<mkldnn::memory::primitive::at> inputs_primitive;
std::vector<mkldnn::memory::primitive_desc> inputs_pd;
for (size_t i = 0; i < inputs_data_desc.size(); i++)
{
inputs_pd.push_back(mkldnn::memory::primitive_desc(
inputs_data_desc[i], runtime::cpu::executor::global_cpu_engine));
}
for (size_t i = 0; i < inputs_data_desc.size(); i++)
{
size_t inputs_data_index = m_primitive_deps[concat_index][i];
build_memory_primitive(inputs_data_desc[i], inputs_data_index);
inputs_primitive.push_back(*m_mkldnn_primitives[inputs_data_index]);
}
size_t result_index = m_primitive_deps[concat_index][inputs_data_desc.size()];
build_memory_primitive(concat_pd.dst_primitive_desc().desc(), result_index);
// concat primitive
m_mkldnn_primitives[concat_index] =
new mkldnn::concat(concat_pd, inputs_primitive, *m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_slice(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Coordinate& lower_bounds,
......@@ -1179,6 +1797,35 @@ size_t MKLDNNEmitter::build_slice(const mkldnn::memory::desc& input_desc,
return reorder_index;
}
void MKLDNNEmitter::build_slice(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Coordinate& lower_bounds,
const ngraph::Shape& result_shape,
size_t slice_index)
{
std::vector<size_t> in_out_index;
mkldnn::memory::primitive_desc input_pd =
mkldnn::memory::primitive_desc(input_desc, runtime::cpu::executor::global_cpu_engine);
size_t input_index = m_primitive_deps[slice_index][0];
build_memory_primitive(input_desc, input_index);
auto dims = mkldnn::memory::dims(result_shape.begin(), result_shape.end());
auto offsets = mkldnn::memory::dims(lower_bounds.begin(), lower_bounds.end());
auto view_pd = mkldnn::view::primitive_desc(input_pd, dims, offsets).dst_primitive_desc();
mkldnn::memory::primitive_desc result_pd =
mkldnn::memory::primitive_desc(result_desc, runtime::cpu::executor::global_cpu_engine);
size_t result_index = m_primitive_deps[slice_index][1];
build_memory_primitive(result_desc, result_index);
// reorder primitive descriptor
mkldnn::reorder::primitive_desc reorder_pd =
mkldnn::reorder::primitive_desc(view_pd, result_pd);
// reorder primitive
m_mkldnn_primitives[slice_index] = new mkldnn::reorder(
reorder_pd, *m_mkldnn_primitives[input_index], *m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_softmax_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
int softmax_axis)
......@@ -1196,6 +1843,37 @@ size_t MKLDNNEmitter::build_softmax_forward(const mkldnn::memory::desc& input_de
return primitive_index;
}
mkldnn::softmax_forward::desc MKLDNNEmitter::get_softmax_forward_desc(const ngraph::Node* node)
{
auto softmax = static_cast<const ngraph::op::Softmax*>(node);
auto axes = softmax->get_axes();
if (axes.size() != 1)
{
throw ngraph_error("MKLDNN supports softmax only across single axis");
}
int softmax_axis = static_cast<int>(*(axes.begin()));
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
return mkldnn::softmax_forward::desc(
mkldnn::prop_kind::forward_scoring, input_desc, softmax_axis);
}
void MKLDNNEmitter::build_softmax_forward(const mkldnn::softmax_forward::desc& softmax_desc,
size_t softmax_index)
{
size_t input_index = m_primitive_deps[softmax_index][0];
build_memory_primitive(softmax_desc.data.data_desc, input_index);
size_t result_index = m_primitive_deps[softmax_index][1];
build_memory_primitive(softmax_desc.data.data_desc, result_index);
m_mkldnn_primitives[softmax_index] =
new mkldnn::softmax_forward({softmax_desc, executor::global_cpu_engine},
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_leaky_relu(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
float alpha)
......@@ -1217,6 +1895,33 @@ size_t MKLDNNEmitter::build_leaky_relu(const mkldnn::memory::desc& input_desc,
return primitive_index;
}
mkldnn::eltwise_forward::desc MKLDNNEmitter::get_leaky_relu_desc(const ngraph::Node* node)
{
auto alpha = static_cast<const op::LeakyRelu*>(node)->get_alpha();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
return mkldnn::eltwise_forward::desc(mkldnn::prop_kind::forward_training,
mkldnn::algorithm::eltwise_relu,
input_desc,
alpha,
0.0f);
}
void MKLDNNEmitter::build_leaky_relu(const mkldnn::eltwise_forward::desc& leaky_relu_desc,
size_t leaky_relu_index)
{
size_t input_index = m_primitive_deps[leaky_relu_index][0];
build_memory_primitive(leaky_relu_desc.data.data_desc, input_index);
size_t result_index = m_primitive_deps[leaky_relu_index][1];
build_memory_primitive(leaky_relu_desc.data.data_desc, result_index);
m_mkldnn_primitives[leaky_relu_index] =
new mkldnn::eltwise_forward({leaky_relu_desc, executor::global_cpu_engine},
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::build_bounded_relu(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
float alpha)
......@@ -1238,6 +1943,33 @@ size_t MKLDNNEmitter::build_bounded_relu(const mkldnn::memory::desc& input_desc,
return primitive_index;
}
mkldnn::eltwise_forward::desc MKLDNNEmitter::get_bounded_relu_desc(const ngraph::Node* node)
{
auto alpha = static_cast<const op::BoundedRelu*>(node)->get_alpha();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
return mkldnn::eltwise_forward::desc(mkldnn::prop_kind::forward_training,
mkldnn::algorithm::eltwise_bounded_relu,
input_desc,
alpha,
0.0f);
}
void MKLDNNEmitter::build_bounded_relu(const mkldnn::eltwise_forward::desc& bounded_relu_desc,
size_t bounded_relu_index)
{
size_t input_index = m_primitive_deps[bounded_relu_index][0];
build_memory_primitive(bounded_relu_desc.data.data_desc, input_index);
size_t result_index = m_primitive_deps[bounded_relu_index][1];
build_memory_primitive(bounded_relu_desc.data.data_desc, result_index);
m_mkldnn_primitives[bounded_relu_index] =
new mkldnn::eltwise_forward({bounded_relu_desc, executor::global_cpu_engine},
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[result_index]);
}
size_t MKLDNNEmitter::convolution_forward_init(bool with_bias)
{
size_t size = m_mkldnn_primitives.size();
......@@ -1255,3 +1987,18 @@ size_t MKLDNNEmitter::convolution_forward_init(bool with_bias)
}
return m_mkldnn_primitives.size() - 1;
}
size_t MKLDNNEmitter::reserve_primitive_space(size_t count, bool new_workspace)
{
size_t size = m_mkldnn_primitives.size();
m_mkldnn_primitives.resize(size + count, nullptr);
for (auto i = 0; i < count - 1; i++)
{
m_primitive_deps[m_mkldnn_primitives.size() - 1].push_back(size + i);
}
if (new_workspace)
{
m_primitive_deps[m_mkldnn_primitives.size() - 1].push_back(0);
}
return m_mkldnn_primitives.size() - 1;
}
......@@ -14,6 +14,10 @@
// limitations under the License.
//*****************************************************************************
// For direct execution, we reserve space for primitives then create those primitives the first
// time functor is called. This could be extended to create primitives when shapes are changed.
// Different ops need different numbers of primitives.
#pragma once
#include <memory>
......@@ -25,11 +29,18 @@
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/avg_pool.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/experimental/quantized_avg_pool.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/op/experimental/quantized_max_pool.hpp"
#include "ngraph/op/lrn.hpp"
#include "ngraph/op/max_pool.hpp"
#include "ngraph/op/softmax.hpp"
#include "ngraph/runtime/cpu/cpu_executor.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
......@@ -37,6 +48,9 @@
#include "ngraph/runtime/cpu/op/conv_add.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/group_conv_bias.hpp"
#include "ngraph/runtime/cpu/op/leaky_relu.hpp"
#include "ngraph/runtime/cpu/op/rnn_utils.hpp"
#include "ngraph/shape.hpp"
#include "ngraph/strides.hpp"
......@@ -75,6 +89,9 @@ namespace ngraph
const std::vector<mkldnn::primitive*>& get_mkldnn_primitives() const;
const std::vector<char*>& get_mkldnn_workspaces();
// reserve the space for primitives for each op, different op requires different number of primitives.
// some ops require a new workspace.
size_t reserve_primitive_space(size_t count, bool new_workspace = false);
size_t insert_primitive(mkldnn::primitive* primitive);
size_t insert_workspace(std::unique_ptr<MKLDNNWorkspace>& workspace);
const std::vector<size_t>& get_primitive_deps(size_t index) const;
......@@ -90,6 +107,7 @@ namespace ngraph
const mkldnn::memory::dims& strides,
mkldnn::memory::data_type dtype) const;
size_t build_memory_primitive(const mkldnn::memory::desc& desc);
void build_memory_primitive(const mkldnn::memory::desc& desc, size_t index);
size_t build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
......@@ -173,9 +191,13 @@ namespace ngraph
// MKLDNN relies on named formats for kernel selection
if (weights_desc.data.format == mkldnn_nchw)
{
weights_desc.data.format = mkldnn_oihw;
}
if (weights_desc.data.format == mkldnn_ncdhw)
{
weights_desc.data.format = mkldnn_oidhw;
}
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
......@@ -284,6 +306,11 @@ namespace ngraph
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above);
void build_convolution_backward_weights(
const mkldnn::convolution_backward_weights::desc& bwd_desc,
const mkldnn::convolution_forward::desc& fwd_desc,
size_t conv_index);
size_t build_convolution_backward_data(const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& delta_desc,
const mkldnn::memory::desc& result_desc,
......@@ -291,6 +318,12 @@ namespace ngraph
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above);
void build_convolution_backward_data(
const mkldnn::convolution_backward_data::desc& bwd_desc,
const mkldnn::convolution_forward::desc& fwd_desc,
size_t conv_index);
/**
* Convolution + bias backprop for weights and bias
*/
......@@ -304,6 +337,11 @@ namespace ngraph
const ngraph::CoordinateDiff& ng_padding_below,
const ngraph::CoordinateDiff& ng_padding_above);
void build_convolution_backward_weights_bias(
const mkldnn::convolution_backward_weights::desc& bwd_desc,
const mkldnn::convolution_forward::desc& fwd_desc,
size_t conv_index);
template <typename OP>
size_t build_convolution_backward(const ngraph::Node* node,
const std::vector<TensorViewWrapper>& args,
......@@ -326,9 +364,13 @@ namespace ngraph
{
// MKLDNN relies on named formats for kernel selection
if (arg0_desc.data.format == mkldnn_nchw)
{
arg0_desc.data.format = mkldnn_oihw;
}
if (arg0_desc.data.format == mkldnn_ncdhw)
{
arg0_desc.data.format = mkldnn_oidhw;
}
return build_convolution_backward_data(
arg0_desc,
......@@ -375,6 +417,102 @@ namespace ngraph
const ngraph::Shape& padding_below,
const ngraph::Shape& padding_above);
template <typename OP>
mkldnn::pooling_forward::desc get_avg_pooling_forward_desc(const ngraph::Node* node,
bool training)
{
auto pool = static_cast<const OP*>(node);
auto window_shape = pool->get_window_shape();
auto window_strides = pool->get_window_movement_strides();
auto padding_below = pool->get_padding_below();
auto padding_above = pool->get_padding_above();
auto include_padding_in_avg_computation =
pool->get_include_padding_in_avg_computation();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
if (training)
{
return mkldnn::pooling_forward::desc(
mkldnn::prop_kind::forward_training,
(include_padding_in_avg_computation
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding),
result_desc,
input_desc,
mkldnn::memory::dims(window_strides.begin(), window_strides.end()),
mkldnn::memory::dims(window_shape.begin(), window_shape.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero);
}
else
{
return mkldnn::pooling_forward::desc(
mkldnn::prop_kind::forward_inference,
(include_padding_in_avg_computation
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding),
input_desc,
result_desc,
mkldnn::memory::dims(window_strides.begin(), window_strides.end()),
mkldnn::memory::dims(window_shape.begin(), window_shape.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero);
}
}
template <typename OP>
mkldnn::pooling_forward::desc get_max_pooling_forward_desc(const ngraph::Node* node,
bool training)
{
auto pool = static_cast<const OP*>(node);
auto window_shape = pool->get_window_shape();
auto window_strides = pool->get_window_movement_strides();
auto padding_below = pool->get_padding_below();
auto padding_above = pool->get_padding_above();
if (training)
{
auto diff_dst_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto diff_src_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn::pooling_forward::desc(
mkldnn::prop_kind::forward_training,
mkldnn::algorithm::pooling_max,
diff_src_desc,
diff_dst_desc,
mkldnn::memory::dims(window_strides.begin(), window_strides.end()),
mkldnn::memory::dims(window_shape.begin(), window_shape.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero);
}
else
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn::pooling_forward::desc(
mkldnn::prop_kind::forward_inference,
mkldnn::algorithm::pooling_max,
input_desc,
result_desc,
mkldnn::memory::dims(window_strides.begin(), window_strides.end()),
mkldnn::memory::dims(window_shape.begin(), window_shape.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero);
}
}
void build_pooling_forward(const mkldnn::pooling_forward::desc& pool_desc,
size_t pool_index);
size_t build_pooling_backward(mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& diff_dst_desc,
const mkldnn::memory::desc& diff_src_desc,
......@@ -383,6 +521,39 @@ namespace ngraph
const ngraph::Shape& padding_below,
const ngraph::Shape& padding_above);
template <typename OP>
mkldnn::pooling_backward::desc
get_avg_pooling_backward_desc(const ngraph::Node* node)
{
auto pool = static_cast<const OP*>(node);
auto window_shape = pool->get_window_shape();
auto window_strides = pool->get_window_movement_strides();
auto padding_below = pool->get_padding_below();
auto padding_above = pool->get_padding_above();
auto include_padding_in_avg_computation =
pool->get_include_padding_in_avg_computation();
auto diff_dst_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto diff_src_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn::pooling_backward::desc(
(include_padding_in_avg_computation
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding),
diff_src_desc,
diff_dst_desc,
mkldnn::memory::dims(window_strides.begin(), window_strides.end()),
mkldnn::memory::dims(window_shape.begin(), window_shape.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero);
}
void build_pooling_backward(const mkldnn::pooling_backward::desc& pool_desc,
const mkldnn::pooling_forward::desc& pool_fwd_desc,
size_t pool_index);
size_t build_max_pooling_with_indices_forward(mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& src_desc,
const mkldnn::memory::desc& dst_desc,
......@@ -391,6 +562,35 @@ namespace ngraph
const ngraph::Shape& padding_below,
const ngraph::Shape& padding_above);
template <typename OP>
mkldnn::pooling_forward::desc
get_max_pooling_with_indices_forward_desc(const ngraph::Node* node)
{
auto pool = static_cast<const OP*>(node);
auto window_shape = pool->get_window_shape();
auto window_strides = pool->get_window_movement_strides();
auto padding_below = pool->get_padding_below();
auto padding_above = pool->get_padding_above();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn::pooling_forward::desc(
mkldnn::prop_kind::forward_training,
mkldnn::algorithm::pooling_max,
input_desc,
result_desc,
mkldnn::memory::dims(window_strides.begin(), window_strides.end()),
mkldnn::memory::dims(window_shape.begin(), window_shape.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero);
}
void build_max_pooling_with_indices_forward(
const mkldnn::pooling_forward::desc& max_pool_desc, size_t max_pool_index);
size_t build_max_pooling_backward(mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& fprop_src_desc,
const mkldnn::memory::desc& diff_dst_desc,
......@@ -400,6 +600,37 @@ namespace ngraph
const ngraph::Shape& padding_below,
const ngraph::Shape& padding_above);
template <typename OP>
mkldnn::pooling_backward::desc
get_max_pooling_backward_desc(const ngraph::Node* node)
{
auto pool = static_cast<const OP*>(node);
auto window_shape = pool->get_window_shape();
auto window_strides = pool->get_window_movement_strides();
auto padding_below = pool->get_padding_below();
auto padding_above = pool->get_padding_above();
auto diff_dst_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto diff_src_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn::pooling_backward::desc(
mkldnn::algorithm::pooling_max,
diff_src_desc,
diff_dst_desc,
mkldnn::memory::dims(window_strides.begin(), window_strides.end()),
mkldnn::memory::dims(window_shape.begin(), window_shape.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero);
}
void build_max_pooling_backward(const mkldnn::pooling_backward::desc& bwd_pool_desc,
const mkldnn::pooling_forward::desc& fwd_pool_desc,
const mkldnn::memory::desc& fprop_src_desc,
size_t fwd_pool_index,
size_t bwd_pool_index);
size_t build_max_pooling_with_indices_backward(
mkldnn::algorithm pooling_algorithm,
const mkldnn::memory::desc& diff_dst_desc,
......@@ -409,9 +640,18 @@ namespace ngraph
const ngraph::Shape& padding_below,
const ngraph::Shape& padding_above);
void build_max_pooling_with_indices_backward(
const mkldnn::pooling_backward::desc& bwd_pool_desc,
const mkldnn::pooling_forward::desc& fwd_pool_desc,
size_t max_pool_index);
size_t build_reorder(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc);
void build_reorder(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
size_t reorder_index);
size_t build_lrn_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
float alpha,
......@@ -419,20 +659,47 @@ namespace ngraph
float bias,
int nsize);
mkldnn::lrn_forward::desc get_lrn_forward_desc(const ngraph::Node* node);
void build_lrn_forward(const mkldnn::lrn_forward::desc& lrn_desc, size_t lrn_index);
size_t build_relu_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc);
mkldnn::eltwise_forward::desc get_relu_forward_desc(const ngraph::Node* node);
void build_relu_forward(const mkldnn::eltwise_forward::desc& relu_desc,
size_t relu_index);
size_t build_relu_backward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& delta_desc,
const mkldnn::memory::desc& result_desc);
mkldnn::eltwise_backward::desc get_relu_backward_desc(const ngraph::Node* node);
void build_relu_backward(const mkldnn::eltwise_backward::desc& bwd_desc,
const mkldnn::eltwise_forward::desc& fwd_desc,
size_t relu_index);
size_t build_sigmoid_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc);
mkldnn::eltwise_forward::desc get_sigmoid_forward_desc(const ngraph::Node* node,
bool backward_op);
void build_sigmoid_forward(const mkldnn::eltwise_forward::desc& sigmoid_desc,
size_t sigmoid_index);
size_t build_sigmoid_backward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& delta_desc,
const mkldnn::memory::desc& result_desc);
mkldnn::eltwise_backward::desc get_sigmoid_backward_desc(const ngraph::Node* node);
void build_sigmoid_backward(const mkldnn::eltwise_backward::desc& bwd_desc,
const mkldnn::eltwise_forward::desc& fwd_desc,
size_t sigmoid_index);
size_t build_elementwise_add(
const mkldnn::memory::desc& input0_data_desc,
const mkldnn::memory::desc& input1_data_desc,
......@@ -440,6 +707,11 @@ namespace ngraph
const std::vector<float>& scale_vector,
const std::vector<mkldnn::memory::primitive_desc>& input_pd);
mkldnn::sum::primitive_desc get_elementwise_add_desc(const ngraph::Node* node);
void build_elementwise_add(const mkldnn::sum::primitive_desc& sum_pd,
size_t add_index);
size_t build_batchnorm_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
......@@ -450,6 +722,41 @@ namespace ngraph
bool bn_training_flag,
const mkldnn::post_ops& pops = mkldnn::post_ops());
template <typename OP>
mkldnn::batch_normalization_forward::desc
get_batchnorm_forward_desc(const ngraph::Node* node, bool training_with_3args)
{
const OP* batchnorm = static_cast<const OP*>(node);
auto eps = batchnorm->get_eps_value();
if (training_with_3args)
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
return mkldnn::batch_normalization_forward::desc(
mkldnn::prop_kind::forward_training,
input_desc,
eps,
mkldnn::batch_normalization_flag::use_scale_shift);
}
else
{
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
return mkldnn::batch_normalization_forward::desc(
mkldnn::prop_kind::forward_training,
input_desc,
eps,
mkldnn::batch_normalization_flag::use_scale_shift |
mkldnn::batch_normalization_flag::use_global_stats);
}
}
void build_batchnorm_forward(
const mkldnn::batch_normalization_forward::desc& batchnorm_desc,
const mkldnn::memory::desc& weights_desc,
bool bn_training_flag,
size_t batchnorm_index,
const mkldnn::post_ops& pops = mkldnn::post_ops());
size_t build_batchnorm_backward(const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& mean_desc,
......@@ -459,6 +766,15 @@ namespace ngraph
const mkldnn::memory::desc& dweights_desc,
const double eps);
mkldnn::batch_normalization_backward::desc
get_batchnorm_backward_desc(const ngraph::Node* node);
void build_batchnorm_backward(
const mkldnn::batch_normalization_backward::desc& batchnorm_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& dweights_desc,
size_t batchnorm_index);
template <typename OP>
size_t build_rnn(const ngraph::Node* node,
const std::vector<TensorViewWrapper>& args,
......@@ -571,27 +887,57 @@ namespace ngraph
const mkldnn::rnn_direction& rnn_direction,
const mkldnn::algorithm& rnn_algorithm);
void build_rnn_forward(const mkldnn::rnn_forward::desc& desc, size_t rnn_idx);
size_t build_concat(const std::vector<mkldnn::memory::desc>& inputs_data_desc,
const mkldnn::memory::desc& result_desc,
const size_t concat_dim);
mkldnn::concat::primitive_desc get_concat_desc(const ngraph::Node* node,
size_t nargs);
void build_concat(const mkldnn::concat::primitive_desc& concat_pd,
const std::vector<mkldnn::memory::desc>& inputs_data_desc,
size_t concat_index);
size_t build_slice(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Coordinate& lower_bounds,
const ngraph::Shape& result_shape);
void build_slice(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Coordinate& lower_bounds,
const ngraph::Shape& result_shape,
size_t slice_index);
size_t build_softmax_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
int softmax_axis);
mkldnn::softmax_forward::desc get_softmax_forward_desc(const ngraph::Node* node);
void build_softmax_forward(const mkldnn::softmax_forward::desc& sigmoid_desc,
size_t softmax_index);
size_t build_leaky_relu(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
float alpha);
mkldnn::eltwise_forward::desc get_leaky_relu_desc(const ngraph::Node* node);
void build_leaky_relu(const mkldnn::eltwise_forward::desc& leaky_relu_desc,
size_t leaky_relu_index);
size_t build_bounded_relu(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
float alpha);
mkldnn::eltwise_forward::desc get_bounded_relu_desc(const ngraph::Node* node);
void build_bounded_relu(const mkldnn::eltwise_forward::desc& bounded_relu_desc,
size_t bounded_relu_index);
size_t build_quantized_max_pool(const ngraph::Node* node);
size_t build_quantized_avg_pool(const ngraph::Node* node);
......@@ -603,23 +949,33 @@ namespace ngraph
size_t build_quantize_reorder(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
const std::vector<float>& scales);
void build_quantize_reorder(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
const std::vector<float>& scales,
size_t quantize_index,
const int mask = 0);
template <typename OP>
size_t get_scale_index()
{
size_t index = 0;
if (std::is_same<OP, ngraph::op::QuantizedConvolution>() ||
std::is_same<OP, ngraph::op::QuantizedConvolutionRelu>())
{
return 2;
index = 2;
}
if (std::is_same<OP, ngraph::op::QuantizedConvolutionBias>())
else if (std::is_same<OP, ngraph::op::QuantizedConvolutionBias>())
{
return 3;
index = 3;
}
if (std::is_same<OP, ngraph::op::QuantizedConvolutionBiasAdd>() ||
else if (std::is_same<OP, ngraph::op::QuantizedConvolutionBiasAdd>() ||
std::is_same<OP, ngraph::op::QuantizedConvolutionBiasSignedAdd>())
{
return 4;
index = 4;
}
NGRAPH_ASSERT(index != 0);
return index;
}
template <typename OP, typename T>
......@@ -640,7 +996,8 @@ namespace ngraph
template <typename OP,
typename std::enable_if<
(std::is_same<OP, ngraph::op::Convolution>::value ||
std::is_same<OP, ngraph::op::QuantizedConvolution>::value),
std::is_same<OP, ngraph::op::QuantizedConvolution>::value ||
std::is_same<OP, ngraph::op::GroupConvolution>::value),
std::nullptr_t>::type = nullptr>
bool has_relu(const ngraph::Node* node)
{
......@@ -650,7 +1007,8 @@ namespace ngraph
template <typename OP,
typename std::enable_if<
(!std::is_same<OP, ngraph::op::Convolution>::value &&
!std::is_same<OP, ngraph::op::QuantizedConvolution>::value),
!std::is_same<OP, ngraph::op::QuantizedConvolution>::value &&
!std::is_same<OP, ngraph::op::GroupConvolution>::value),
std::nullptr_t>::type = nullptr>
bool has_relu(const ngraph::Node* node)
{
......@@ -662,9 +1020,11 @@ namespace ngraph
{
if (std::is_same<OP, ngraph::op::ConvolutionBias>() ||
std::is_same<OP, ngraph::op::ConvolutionBiasAdd>() ||
std::is_same<OP, ngraph::op::ConvolutionBiasBackpropFiltersBias>() ||
std::is_same<OP, ngraph::op::QuantizedConvolutionBias>() ||
std::is_same<OP, ngraph::op::QuantizedConvolutionBiasAdd>() ||
std::is_same<OP, ngraph::op::QuantizedConvolutionBiasSignedAdd>())
std::is_same<OP, ngraph::op::QuantizedConvolutionBiasSignedAdd>() ||
std::is_same<OP, ngraph::op::GroupConvolutionBias>())
{
return true;
}
......@@ -692,10 +1052,113 @@ namespace ngraph
}
template <typename OP>
mkldnn::convolution_forward::desc
get_convolution_forward_desc(const ngraph::Node* node,
mkldnn::rnn_forward::desc
get_rnn_forward_desc(const ngraph::Node* node,
const std::vector<TensorViewWrapper>& args,
const std::vector<TensorViewWrapper>& out)
{
auto rnn_node = static_cast<const OP*>(node);
auto src_sequence_length_max =
static_cast<unsigned long>(rnn_node->get_src_sequence_length());
auto direction = static_cast<unsigned long>(rnn_node->get_direction());
auto num_fused_layers =
static_cast<unsigned long>(rnn_node->get_num_fused_layers());
auto feature_size =
static_cast<unsigned long>(rnn_node->get_src_iter_feature_size());
auto batch = static_cast<unsigned long>(rnn_node->get_batch_size());
auto rnn_cell_n_gates =
static_cast<unsigned long>(rnn_node->get_gates_per_cell());
auto rnn_cell_n_states =
static_cast<unsigned long>(rnn_node->get_num_cell_states());
auto get_mkldnn_rnn_cell_type = [&]() {
switch (rnn_node->get_rnn_type())
{
case rnn_utils::rnntype::vanilla_rnn: return mkldnn::algorithm::vanilla_rnn;
case rnn_utils::rnntype::vanilla_gru: return mkldnn::algorithm::vanilla_gru;
case rnn_utils::rnntype::vanilla_lstm:
return mkldnn::algorithm::vanilla_lstm;
default: throw ngraph_error("unsupported mkldnn rnn algorithm");
}
};
auto get_mkldnn_rnn_direction = [&]() {
switch (direction)
{
case 1: return mkldnn::rnn_direction::unidirectional_left2right;
case 2: return mkldnn::rnn_direction::bidirectional_concat;
default: throw ngraph_error("unsupported mkldnn rnn direction");
}
};
if (out[0].get_shape().size() == 2 &&
(out[0].get_shape()[1] != direction * feature_size))
{
throw ngraph_error(
"input slc{ht} feature size is not equal to output dlc{ht} feature "
"size ");
}
if (out[1].get_shape().size() == 2 && (out[1].get_shape()[1] != feature_size) &&
rnn_node->get_num_timesteps() != 1)
{
throw ngraph_error(
"input sic{ht_1|ct_1} feature size is not equal to output "
"dlc{ht_1|ct_1} "
"feature size ");
}
Shape src_layer_tz{
src_sequence_length_max,
batch,
static_cast<unsigned long>(rnn_node->get_src_layer_feature_size())};
Shape src_iter_tz{
num_fused_layers, direction, rnn_cell_n_states, batch, feature_size};
Shape wei_layer_tz{
num_fused_layers,
direction,
static_cast<unsigned long>(rnn_node->get_src_layer_feature_size()),
rnn_cell_n_gates,
feature_size};
Shape wei_iter_tz{
num_fused_layers, direction, feature_size, rnn_cell_n_gates, feature_size};
Shape bias_tz{num_fused_layers, direction, rnn_cell_n_gates, feature_size};
Shape dst_layer_tz{src_sequence_length_max, batch, direction * feature_size};
Shape dst_iter_tz{
num_fused_layers, direction, rnn_cell_n_states, batch, feature_size};
// We create the memory descriptors used by the user
auto src_layer_desc = build_memory_descriptor(
src_layer_tz, args[0].get_element_type(), mkldnn::memory::format::tnc);
auto src_iter_desc = build_memory_descriptor(
src_iter_tz, args[1].get_element_type(), mkldnn::memory::format::ldsnc);
auto weights_layer_desc = build_memory_descriptor(
wei_layer_tz, args[2].get_element_type(), mkldnn::memory::format::ldigo);
auto weights_iter_desc = build_memory_descriptor(
wei_iter_tz, args[3].get_element_type(), mkldnn::memory::format::ldigo);
auto bias_desc = build_memory_descriptor(
bias_tz, args[4].get_element_type(), mkldnn::memory::format::ldgo);
auto dst_layer_desc = build_memory_descriptor(
dst_layer_tz, out[0].get_element_type(), mkldnn::memory::format::tnc);
auto dst_iter_desc = build_memory_descriptor(
dst_iter_tz, out[1].get_element_type(), mkldnn::memory::format::ldsnc);
mkldnn::rnn_cell::desc rnn_cell_desc(get_mkldnn_rnn_cell_type());
return mkldnn::rnn_forward::desc(mkldnn::prop_kind::forward_training,
rnn_cell_desc,
get_mkldnn_rnn_direction(),
src_layer_desc,
src_iter_desc,
weights_layer_desc,
weights_iter_desc,
bias_desc,
dst_layer_desc,
dst_iter_desc);
}
template <typename OP>
mkldnn::convolution_forward::desc
get_convolution_forward_desc(const ngraph::Node* node)
{
auto convolution = static_cast<const OP*>(node);
// For dilation, MKLDNN wants to know how many elements to insert between, not how far
......@@ -789,10 +1252,10 @@ namespace ngraph
size_t convolution_forward_init(bool with_bias = false);
template <bool with_bias>
void convolution_forward(const mkldnn::convolution_forward::desc& desc,
void build_convolution_forward(const mkldnn::convolution_forward::desc& desc,
const mkldnn::primitive_attr& attr,
const mkldnn::engine& engine,
size_t& conv_idx)
size_t conv_idx)
{
size_t input_idx, weights_idx, results_idx, bias_idx;
input_idx = m_primitive_deps[conv_idx][0];
......@@ -835,6 +1298,174 @@ namespace ngraph
m_mkldnn_primitives[conv_idx] = prim;
}
template <typename OP>
mkldnn::convolution_backward_data::desc
get_convolution_backward_data_desc(const ngraph::Node* node)
{
auto convolution = static_cast<const OP*>(node);
// For dilation, MKLDNN wants to know how many elements to insert between, not how far
// apart to space the elements like nGraph. So we have to subtract 1 from each pos.
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides_forward())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
// MKLDNN relies on named formats for kernel selection
if (weights_desc.data.format == mkldnn_nchw)
{
weights_desc.data.format = mkldnn_oihw;
}
if (weights_desc.data.format == mkldnn_ncdhw)
{
weights_desc.data.format = mkldnn_oidhw;
}
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn::convolution_backward_data::desc(
mkldnn::algorithm::convolution_direct,
result_desc,
weights_desc,
delta_desc,
MKLDNN_DIMS(convolution->get_window_movement_strides_forward()),
MKLDNN_DIMS(window_dilation_strides_adjusted),
MKLDNN_DIMS(convolution->get_padding_below_forward()),
MKLDNN_DIMS(convolution->get_padding_above_forward()),
mkldnn::padding_kind::zero);
}
template <typename OP>
mkldnn::convolution_backward_weights::desc
get_convolution_backward_weights_desc(const ngraph::Node* node)
{
auto convolution = static_cast<const OP*>(node);
// For dilation, MKLDNN wants to know how many elements to insert between, not how far
// apart to space the elements like nGraph. So we have to subtract 1 from each pos.
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides_forward())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto in_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto in_delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto out_weights_delta_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
if (has_bias<OP>())
{
auto out_bias_delta_desc = mkldnn_utils::get_output_mkldnn_md(node, 1);
return mkldnn::convolution_backward_weights::desc(
mkldnn::algorithm::convolution_direct,
in_data_desc,
out_weights_delta_desc,
out_bias_delta_desc,
in_delta_desc,
MKLDNN_DIMS(convolution->get_window_movement_strides_forward()),
MKLDNN_DIMS(window_dilation_strides_adjusted),
MKLDNN_DIMS(convolution->get_padding_below_forward()),
MKLDNN_DIMS(convolution->get_padding_above_forward()),
mkldnn::padding_kind::zero);
}
else
{
return mkldnn::convolution_backward_weights::desc(
mkldnn::algorithm::convolution_direct,
in_data_desc,
out_weights_delta_desc,
in_delta_desc,
MKLDNN_DIMS(convolution->get_window_movement_strides_forward()),
MKLDNN_DIMS(window_dilation_strides_adjusted),
MKLDNN_DIMS(convolution->get_padding_below_forward()),
MKLDNN_DIMS(convolution->get_padding_above_forward()),
mkldnn::padding_kind::zero);
}
}
template <typename OP>
mkldnn::convolution_forward::desc
get_convolution_forward_desc_for_backward_op(const ngraph::Node* node)
{
auto convolution = static_cast<const OP*>(node);
// For dilation, MKLDNN wants to know how many elements to insert between, not how far
// apart to space the elements like nGraph. So we have to subtract 1 from each pos.
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides_forward())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
if (std::is_same<OP, ngraph::op::ConvolutionBackpropData>())
{
auto weights_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
// MKLDNN relies on named formats for kernel selection
if (weights_desc.data.format == mkldnn_nchw)
{
weights_desc.data.format = mkldnn_oihw;
}
if (weights_desc.data.format == mkldnn_ncdhw)
{
weights_desc.data.format = mkldnn_oidhw;
}
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn::convolution_forward::desc(
mkldnn::prop_kind::forward,
mkldnn::algorithm::convolution_direct,
result_desc,
weights_desc,
delta_desc,
MKLDNN_DIMS(convolution->get_window_movement_strides_forward()),
MKLDNN_DIMS(window_dilation_strides_adjusted),
MKLDNN_DIMS(convolution->get_padding_below_forward()),
MKLDNN_DIMS(convolution->get_padding_above_forward()),
mkldnn::padding_kind::zero);
}
else if (std::is_same<OP, ngraph::op::ConvolutionBackpropFilters>())
{
auto in_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto in_delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto out_weights_delta_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
return mkldnn::convolution_forward::desc(
mkldnn::prop_kind::forward,
mkldnn::algorithm::convolution_direct,
in_data_desc,
out_weights_delta_desc,
in_delta_desc,
MKLDNN_DIMS(convolution->get_window_movement_strides_forward()),
MKLDNN_DIMS(window_dilation_strides_adjusted),
MKLDNN_DIMS(convolution->get_padding_below_forward()),
MKLDNN_DIMS(convolution->get_padding_above_forward()),
mkldnn::padding_kind::zero);
}
else
{
auto in_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto in_delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1);
auto out_weights_delta_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto out_bias_delta_desc = mkldnn_utils::get_output_mkldnn_md(node, 1);
return mkldnn::convolution_forward::desc(
mkldnn::prop_kind::forward,
mkldnn::algorithm::convolution_direct,
in_data_desc,
out_weights_delta_desc,
out_bias_delta_desc,
in_delta_desc,
MKLDNN_DIMS(convolution->get_window_movement_strides_forward()),
MKLDNN_DIMS(window_dilation_strides_adjusted),
MKLDNN_DIMS(convolution->get_padding_below_forward()),
MKLDNN_DIMS(convolution->get_padding_above_forward()),
mkldnn::padding_kind::zero);
}
}
private:
std::vector<mkldnn::primitive*> m_mkldnn_primitives;
std::vector<mkldnn::stream> m_mkldnn_streams;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment