Commit eda11da7 authored by Amy Zhuang's avatar Amy Zhuang Committed by Adam Procter

Refactor to create MKLDNN primitives on the first iteration: (#2363)

* Refactor to create MKLDNN primitives on the first iteration:
  add, avg_pool, batch_norm, bounded_relu, concat, convert_layout,
  leaky_relu, lrn, max_pool, quantized_avg_pool, quantized_max_pool,
  relu, sigmoid, slice, softmax.

* Refactor to create MKLDNN primitives on the first iteration:
  pooling backward, convolution.

* Refactor to create MKLDNN primitives on the first iteration:
  convolution backward, rnn, lstm, quantization, dequantization.

* Delete one duplicate declaration.

* Create and pass mkldnn descriptors/primitive-descriptors for ops.

* Create and pass mkldnn descriptors for convolution backward ops.

* Remove one unused variable.

* Remove unused variables.

* Remove unused variables.

* Address PR feedback.

* Fix a bug.

* Add one parameter to build_quantize_reorder.

* Address PR feedback.

* Fix bi-rnn issue.
parent c571b7a7
...@@ -38,28 +38,22 @@ namespace ngraph ...@@ -38,28 +38,22 @@ namespace ngraph
{ {
auto& functors = external_function->get_functors(); auto& functors = external_function->get_functors();
vector<float> scale_vector(2, 1);
vector<mkldnn::memory::primitive_desc> inputs_pd;
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input0_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto sum_pd = mkldnn_emitter->get_elementwise_add_desc(node);
auto input1_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 1); // Add needs 4 primitives: input0, input1, result, and sum.
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); size_t add_index = mkldnn_emitter->reserve_primitive_space(4);
inputs_pd.push_back(mkldnn::memory::primitive_desc(
input0_data_desc, runtime::cpu::executor::global_cpu_engine));
inputs_pd.push_back(mkldnn::memory::primitive_desc(
input1_data_desc, runtime::cpu::executor::global_cpu_engine));
size_t add_index = mkldnn_emitter->build_elementwise_add(
input0_data_desc, input1_data_desc, result_desc, scale_vector, inputs_pd);
auto& deps = mkldnn_emitter->get_primitive_deps(add_index); auto& deps = mkldnn_emitter->get_primitive_deps(add_index);
auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name()); auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name()); auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name()); auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto functor = [&, add_index](CPURuntimeContext* ctx, auto functor = [&, sum_pd, add_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_elementwise_add(sum_pd, add_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
......
...@@ -52,24 +52,19 @@ namespace ngraph ...@@ -52,24 +52,19 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto avg_pool_desc =
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); mkldnn_emitter->get_avg_pooling_forward_desc<ngraph::op::AvgPool>(node,
false);
size_t avg_pool_index = mkldnn_emitter->build_pooling_forward( // AvgPool needs 3 primitives: input, result, and pooling_forward.
(include_padding_in_avg_computation size_t avg_pool_index = mkldnn_emitter->reserve_primitive_space(3);
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding),
input_desc,
result_desc,
window_movement_strides,
window_shape,
padding_below,
padding_above);
auto& deps = mkldnn_emitter->get_primitive_deps(avg_pool_index); auto& deps = mkldnn_emitter->get_primitive_deps(avg_pool_index);
auto functor = [&, avg_pool_index](CPURuntimeContext* ctx, auto functor = [&, avg_pool_desc, avg_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_pooling_forward(avg_pool_desc, avg_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, avg_pool_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, avg_pool_index);
...@@ -130,23 +125,23 @@ namespace ngraph ...@@ -130,23 +125,23 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto diff_dst_desc = runtime::cpu::mkldnn_utils::get_input_mkldnn_md(node, 0); auto avg_pool_fwd_desc =
auto diff_src_desc = runtime::cpu::mkldnn_utils::get_output_mkldnn_md(node, 0); mkldnn_emitter->get_avg_pooling_forward_desc<ngraph::op::AvgPoolBackprop>(
node, true);
size_t avg_pool_index = mkldnn_emitter->build_pooling_backward( auto avg_pool_desc =
(apb->get_include_padding_in_avg_computation() mkldnn_emitter->get_avg_pooling_backward_desc<ngraph::op::AvgPoolBackprop>(
? mkldnn::algorithm::pooling_avg_include_padding node);
: mkldnn::algorithm::pooling_avg_exclude_padding), // AvgPoolBackprop needs 3 primitives: input, result, and pooling_backward.
diff_dst_desc, size_t avg_pool_index = mkldnn_emitter->reserve_primitive_space(3);
diff_src_desc,
apb->get_window_movement_strides(),
apb->get_window_shape(),
apb->get_padding_below(),
apb->get_padding_above());
auto& deps = mkldnn_emitter->get_primitive_deps(avg_pool_index); auto& deps = mkldnn_emitter->get_primitive_deps(avg_pool_index);
auto functor = [&, avg_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { auto functor = [&, avg_pool_desc, avg_pool_fwd_desc, avg_pool_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_pooling_backward(
avg_pool_desc, avg_pool_fwd_desc, avg_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], delta_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], delta_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, avg_pool_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, avg_pool_index);
......
...@@ -48,8 +48,6 @@ namespace ngraph ...@@ -48,8 +48,6 @@ namespace ngraph
auto& arg2_tensor = external_function->get_tensor_data(args[2].get_name()); auto& arg2_tensor = external_function->get_tensor_data(args[2].get_name());
auto& out0_tensor = external_function->get_tensor_data(out[0].get_name()); auto& out0_tensor = external_function->get_tensor_data(out[0].get_name());
const OP* batchnorm = static_cast<const OP*>(node);
// Kill clang diagnostics bug // Kill clang diagnostics bug
#pragma clang diagnostic push #pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wmissing-braces" #pragma clang diagnostic ignored "-Wmissing-braces"
...@@ -80,28 +78,32 @@ namespace ngraph ...@@ -80,28 +78,32 @@ namespace ngraph
auto& out2_tensor = external_function->get_tensor_data(out[2].get_name()); auto& out2_tensor = external_function->get_tensor_data(out[2].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2); auto batchnorm_desc =
mkldnn_emitter->get_batchnorm_forward_desc<OP>(node, true);
auto weights_shape = Shape{2, args[0].get_size()}; auto weights_shape = Shape{2, args[0].get_size()};
auto weights_desc = mkldnn_emitter->build_memory_descriptor( auto weights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc); weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto results_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto mean_desc = mkldnn_utils::get_output_mkldnn_md(node, 1);
auto variance_desc = mkldnn_utils::get_output_mkldnn_md(node, 2);
auto batchnorm_index =
mkldnn_emitter->build_batchnorm_forward(input_desc,
weights_desc,
results_desc,
mean_desc,
variance_desc,
batchnorm->get_eps_value(),
false,
training,
ops);
// batchnorm forward needs 6 primitives: input, weights, result, mean,
// variance, and batch_normalization_forward.
auto batchnorm_index = mkldnn_emitter->reserve_primitive_space(6);
auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index); auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index);
auto functor = [&, batchnorm_index, stacked_weights, weight_sizes](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) { auto functor = [&,
batchnorm_desc,
weights_desc,
training,
ops,
batchnorm_index,
stacked_weights,
weight_sizes](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_batchnorm_forward(
batchnorm_desc, weights_desc, training, batchnorm_index, ops);
}
memcpy(stacked_weights.get(), arg0_tensor, weight_sizes[0]); memcpy(stacked_weights.get(), arg0_tensor, weight_sizes[0]);
memcpy( memcpy(
stacked_weights.get() + weight_sizes[0], arg1_tensor, weight_sizes[1]); stacked_weights.get() + weight_sizes[0], arg1_tensor, weight_sizes[1]);
...@@ -122,29 +124,32 @@ namespace ngraph ...@@ -122,29 +124,32 @@ namespace ngraph
auto& arg4_tensor = external_function->get_tensor_data(args[4].get_name()); auto& arg4_tensor = external_function->get_tensor_data(args[4].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto batchnorm_desc =
mkldnn_emitter->get_batchnorm_forward_desc<OP>(node, false);
auto weights_shape = Shape{2, args[0].get_size()}; auto weights_shape = Shape{2, args[0].get_size()};
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto weights_desc = mkldnn_emitter->build_memory_descriptor( auto weights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc); weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto mean_desc = mkldnn_utils::get_input_mkldnn_md(node, 3);
auto variance_desc = mkldnn_utils::get_input_mkldnn_md(node, 4);
auto results_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto batchnorm_index =
mkldnn_emitter->build_batchnorm_forward(input_desc,
weights_desc,
results_desc,
mean_desc,
variance_desc,
batchnorm->get_eps_value(),
true,
training,
ops);
// batchnorm forward needs 6 primitives: input, weights, result, mean,
// variance, and batch_normalization_forward.
auto batchnorm_index = mkldnn_emitter->reserve_primitive_space(6);
auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index); auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index);
auto functor = [&, batchnorm_index, stacked_weights, weight_sizes]( auto functor = [&,
CPURuntimeContext* ctx, CPUExecutionContext* ectx) { batchnorm_desc,
weights_desc,
training,
ops,
batchnorm_index,
stacked_weights,
weight_sizes](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_batchnorm_forward(
batchnorm_desc, weights_desc, training, batchnorm_index, ops);
}
memcpy(stacked_weights.get(), arg0_tensor, weight_sizes[0]); memcpy(stacked_weights.get(), arg0_tensor, weight_sizes[0]);
memcpy( memcpy(
stacked_weights.get() + weight_sizes[0], arg1_tensor, weight_sizes[1]); stacked_weights.get() + weight_sizes[0], arg1_tensor, weight_sizes[1]);
...@@ -295,9 +300,6 @@ namespace ngraph ...@@ -295,9 +300,6 @@ namespace ngraph
template <> template <>
void Builder::BUILDER_DECL(ngraph::op::BatchNormTrainingBackprop) void Builder::BUILDER_DECL(ngraph::op::BatchNormTrainingBackprop)
{ {
const ngraph::op::BatchNormTrainingBackprop* batchnorm =
static_cast<const ngraph::op::BatchNormTrainingBackprop*>(node);
auto& functors = external_function->get_functors(); auto& functors = external_function->get_functors();
auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name()); auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
...@@ -326,34 +328,31 @@ namespace ngraph ...@@ -326,34 +328,31 @@ namespace ngraph
std::default_delete<uint8_t[]>()); std::default_delete<uint8_t[]>());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto batchnorm_desc = mkldnn_emitter->get_batchnorm_backward_desc(node);
auto weights_shape = Shape{2, args[0].get_size()}; auto weights_shape = Shape{2, args[0].get_size()};
auto weights_desc = mkldnn_emitter->build_memory_descriptor( auto weights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc); weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
auto mean_desc = mkldnn_utils::get_input_mkldnn_md(node, 3);
auto variance_desc = mkldnn_utils::get_input_mkldnn_md(node, 4);
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 5);
auto dinput_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto dweights_desc = mkldnn_emitter->build_memory_descriptor( auto dweights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc); weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
auto batchnorm_index = // batchnorm backward needs 8 primitives: weights, input, mean, variance,
mkldnn_emitter->build_batchnorm_backward(weights_desc, // dinput, dweights, and batch_normalization_backward.
input_desc, auto batchnorm_index = mkldnn_emitter->reserve_primitive_space(8);
mean_desc,
variance_desc,
delta_desc,
dinput_desc,
dweights_desc,
batchnorm->get_eps_value());
auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index); auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index);
auto functor = [&, auto functor = [&,
batchnorm_desc,
weights_desc,
dweights_desc,
batchnorm_index, batchnorm_index,
stacked_weights, stacked_weights,
stacked_dweights, stacked_dweights,
weight_sizes](CPURuntimeContext* ctx, CPUExecutionContext* ectx) { weight_sizes](CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_batchnorm_backward(
batchnorm_desc, weights_desc, dweights_desc, batchnorm_index);
}
memcpy(stacked_weights.get(), arg0_tensor, weight_sizes[0]); memcpy(stacked_weights.get(), arg0_tensor, weight_sizes[0]);
memcpy(stacked_weights.get() + weight_sizes[0], arg1_tensor, weight_sizes[1]); memcpy(stacked_weights.get() + weight_sizes[0], arg1_tensor, weight_sizes[1]);
......
...@@ -43,13 +43,18 @@ namespace ngraph ...@@ -43,13 +43,18 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto bounded_relu_desc = mkldnn_emitter->get_bounded_relu_desc(node);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); // BoundedRelu needs 3 primitives: input, result, and eltwise_forward.
auto bounded_relu_index = auto bounded_relu_index = mkldnn_emitter->reserve_primitive_space(3);
mkldnn_emitter->build_bounded_relu(input_desc, result_desc, alpha);
auto& deps = mkldnn_emitter->get_primitive_deps(bounded_relu_index); auto& deps = mkldnn_emitter->get_primitive_deps(bounded_relu_index);
auto functor = [&, bounded_relu_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { auto functor = [&, bounded_relu_desc, bounded_relu_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_bounded_relu(bounded_relu_desc,
bounded_relu_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], input_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], input_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, bounded_relu_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, bounded_relu_index);
......
...@@ -92,29 +92,31 @@ namespace ngraph ...@@ -92,29 +92,31 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto concat_pd = mkldnn_emitter->get_concat_desc(node, nargs);
std::vector<mkldnn::memory::desc> inputs_data_desc; std::vector<mkldnn::memory::desc> inputs_data_desc;
for (size_t i = 0; i < args.size(); i++) for (size_t i = 0; i < nargs; i++)
{ {
inputs_data_desc.push_back(mkldnn_utils::get_input_mkldnn_md(node, i)); inputs_data_desc.push_back(mkldnn_utils::get_input_mkldnn_md(node, i));
} }
// Concat needs number of inputs plus 2 primitives; those two are for result and concat.
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); auto concat_index = mkldnn_emitter->reserve_primitive_space(nargs + 2);
size_t concat_dim =
(dynamic_cast<const ngraph::op::Concat*>(node))->get_concatenation_axis();
auto concat_index =
mkldnn_emitter->build_concat(inputs_data_desc, result_desc, concat_dim);
auto& deps = mkldnn_emitter->get_primitive_deps(concat_index); auto& deps = mkldnn_emitter->get_primitive_deps(concat_index);
auto functor = [&, arg_tensors, nargs, concat_index]( auto functor =
CPURuntimeContext* ctx, CPUExecutionContext* ectx) { [&, concat_pd, inputs_data_desc, arg_tensors, nargs, concat_index](
for (size_t i = 0; i < nargs; i++) CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
{ if (ctx->first_iteration)
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[i], arg_tensors[i]); {
} mkldnn_emitter->build_concat(
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[nargs], out_tensor); concat_pd, inputs_data_desc, concat_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, concat_index); }
}; for (size_t i = 0; i < nargs; i++)
{
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[i], arg_tensors[i]);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[nargs], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, concat_index);
};
functors.emplace_back(functor); functors.emplace_back(functor);
} }
......
...@@ -81,11 +81,15 @@ namespace ngraph ...@@ -81,11 +81,15 @@ namespace ngraph
mkldnn::memory::format::goihw); mkldnn::memory::format::goihw);
} }
size_t reorder_index = mkldnn_emitter->build_reorder(input_desc, result_desc); // ConvertLayout needs 3 primitives: input, result, and reorder.
size_t reorder_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(reorder_index); auto& deps = mkldnn_emitter->get_primitive_deps(reorder_index);
auto functor = [&, reorder_index](CPURuntimeContext* ctx, auto functor = [&, input_desc, result_desc, reorder_index](
CPUExecutionContext* ectx) { CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_reorder(input_desc, result_desc, reorder_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, reorder_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, reorder_index);
......
...@@ -43,13 +43,17 @@ namespace ngraph ...@@ -43,13 +43,17 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto leaky_relu_desc = mkldnn_emitter->get_leaky_relu_desc(node);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); // LeakyRelu needs 3 primitives: input, result, and eltwise_forward.
auto leaky_relu_index = auto leaky_relu_index = mkldnn_emitter->reserve_primitive_space(3);
mkldnn_emitter->build_leaky_relu(input_desc, result_desc, alpha);
auto& deps = mkldnn_emitter->get_primitive_deps(leaky_relu_index); auto& deps = mkldnn_emitter->get_primitive_deps(leaky_relu_index);
auto functor = [&, leaky_relu_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { auto functor = [&, leaky_relu_desc, leaky_relu_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_leaky_relu(leaky_relu_desc, leaky_relu_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], input_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], input_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, leaky_relu_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, leaky_relu_index);
......
...@@ -43,19 +43,17 @@ namespace ngraph ...@@ -43,19 +43,17 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto lrn_desc = mkldnn_emitter->get_lrn_forward_desc(node);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); // LRN needs 3 primitives: input, result, and lrn_forward.
auto lrn_index = mkldnn_emitter->reserve_primitive_space(3);
auto lrn_index =
mkldnn_emitter->build_lrn_forward(input_data_desc,
result_desc,
static_cast<float>(lrn->get_alpha()),
static_cast<float>(lrn->get_beta()),
static_cast<float>(lrn->get_bias()),
static_cast<int>(lrn->get_nsize()));
auto& deps = mkldnn_emitter->get_primitive_deps(lrn_index); auto& deps = mkldnn_emitter->get_primitive_deps(lrn_index);
functor = [&, lrn_index](CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
functor = [&, lrn_desc, lrn_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_lrn_forward(lrn_desc, lrn_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, lrn_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, lrn_index);
......
...@@ -54,10 +54,22 @@ namespace ngraph ...@@ -54,10 +54,22 @@ namespace ngraph
auto& dst_iter_tensor = external_function->get_tensor_data(out[1].get_name()); auto& dst_iter_tensor = external_function->get_tensor_data(out[1].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto lstm_index = mkldnn_emitter->build_rnn<ngraph::op::Lstm>(node, args, out); auto lstm_desc =
mkldnn_emitter->get_rnn_forward_desc<ngraph::op::Lstm>(node, args, out);
// Lstm needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias,
// dst_layer, dst_iter, and rnn_forward.
// It needs a new workspace.
auto lstm_index =
mkldnn_emitter->reserve_primitive_space(9, true /* new workspace */);
auto& deps = mkldnn_emitter->get_primitive_deps(lstm_index); auto& deps = mkldnn_emitter->get_primitive_deps(lstm_index);
auto functor = [&, lstm_index](CPURuntimeContext* ctx, CPUExecutionContext* ectx) { auto functor = [&, lstm_desc, lstm_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_rnn_forward(lstm_desc, lstm_index);
ctx->mkldnn_workspaces = mkldnn_emitter->get_mkldnn_workspaces().data();
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], src_layer_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], src_layer_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], src_iter_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], src_iter_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], weights_layer_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], weights_layer_tensor);
......
This diff is collapsed.
...@@ -54,35 +54,26 @@ namespace ngraph ...@@ -54,35 +54,26 @@ namespace ngraph
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
auto scale_const_op = std::dynamic_pointer_cast<ngraph::op::Constant>( auto scale_const_op = std::dynamic_pointer_cast<ngraph::op::Constant>(
dequantize->get_argument(1)); dequantize->get_argument(1));
std::vector<float> scales;
if (scale_const_op == nullptr) if (scale_const_op == nullptr)
{ {
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name()); auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto scales_size = shape_size(args[1].get_shape()); auto scales_size = shape_size(args[1].get_shape());
size_t dequantize_index = // Dequantize needs 3 primitives: input, result, and reorder.
mkldnn_emitter->build_dequantization(node, input_desc, result_desc); size_t dequantize_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(dequantize_index); auto& deps = mkldnn_emitter->get_primitive_deps(dequantize_index);
functor = [&, input_desc, result_desc, scales_size, dequantize_index]( functor = [&, input_desc, result_desc, scales_size, dequantize_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) { CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
// Create MKLDNN reorder primitive during the first iteration. // Create MKLDNN reorder primitive during the first iteration.
// Assumes the scales dont change for the duration of the graph // Assumes the scales dont change for the duration of the graph
if (ctx->first_iteration) if (ctx->first_iteration)
{ {
mkldnn::primitive_attr attr;
vector<float> dyn_scales; vector<float> dyn_scales;
dyn_scales.assign(static_cast<float*>(arg1_tensor), dyn_scales.assign(static_cast<float*>(arg1_tensor),
static_cast<float*>(arg1_tensor) + scales_size); static_cast<float*>(arg1_tensor) + scales_size);
attr.set_output_scales(0, dyn_scales); mkldnn_emitter->build_quantize_reorder(
attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest); input_desc, result_desc, dyn_scales, dequantize_index);
auto reorder_desc = mkldnn::reorder::primitive_desc(
{input_desc, executor::global_cpu_engine},
{result_desc, executor::global_cpu_engine},
attr);
*ctx->mkldnn_primitives[dequantize_index] =
mkldnn::reorder(reorder_desc,
*ctx->mkldnn_primitives[deps[0]],
*ctx->mkldnn_primitives[deps[1]]);
} }
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
...@@ -92,11 +83,19 @@ namespace ngraph ...@@ -92,11 +83,19 @@ namespace ngraph
} }
else else
{ {
size_t dequantize_index = std::vector<float> scale = scale_const_op->get_vector<float>();
mkldnn_emitter->build_dequantization(node, input_desc, result_desc); std::vector<float> scales;
scales.push_back(scale[0]);
size_t dequantize_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(dequantize_index); auto& deps = mkldnn_emitter->get_primitive_deps(dequantize_index);
functor = [&, dequantize_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { functor = [&, input_desc, result_desc, scales, dequantize_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_quantize_reorder(
input_desc, result_desc, scales, dequantize_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, dequantize_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, dequantize_index);
...@@ -243,25 +242,21 @@ namespace ngraph ...@@ -243,25 +242,21 @@ namespace ngraph
auto scale_const_op = auto scale_const_op =
std::dynamic_pointer_cast<ngraph::op::Constant>(quantize->get_argument(1)); std::dynamic_pointer_cast<ngraph::op::Constant>(quantize->get_argument(1));
std::vector<float> scales;
if (scale_const_op == nullptr) if (scale_const_op == nullptr)
{ {
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name()); auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto scales_size = shape_size(args[1].get_shape()); auto scales_size = shape_size(args[1].get_shape());
// Dummy value while we wait for the actual values that are provided during // Quantize needs 3 primitives: input, result, and reorder.
// execution size_t quantize_index = mkldnn_emitter->reserve_primitive_space(3);
scales.push_back(1.0f);
size_t quantize_index =
mkldnn_emitter->build_quantize_reorder(input_desc, result_desc, scales);
auto& deps = mkldnn_emitter->get_primitive_deps(quantize_index); auto& deps = mkldnn_emitter->get_primitive_deps(quantize_index);
auto functor = [&, input_desc, result_desc, scales_size, quantize_index]( auto functor = [&, input_desc, result_desc, scales_size, quantize_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) { CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
// Create MKLDNN reorder primitive during the first iteration. // Create MKLDNN reorder primitive during the first iteration.
// Assumes the scales dont change for the duration of the graph // Assumes the scales dont change for the duration of the graph
if (ctx->first_iteration) if (ctx->first_iteration)
{ {
mkldnn::primitive_attr attr;
vector<float> dyn_scales; vector<float> dyn_scales;
dyn_scales.assign(static_cast<float*>(arg1_tensor), dyn_scales.assign(static_cast<float*>(arg1_tensor),
static_cast<float*>(arg1_tensor) + scales_size); static_cast<float*>(arg1_tensor) + scales_size);
...@@ -271,16 +266,8 @@ namespace ngraph ...@@ -271,16 +266,8 @@ namespace ngraph
} }
// quantize across first dim (mask=2^0) if dyn_scales is a vector // quantize across first dim (mask=2^0) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 1; const int mask = scales_size == 1 ? 0 : 1;
attr.set_output_scales(mask, dyn_scales); mkldnn_emitter->build_quantize_reorder(
attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest); input_desc, result_desc, dyn_scales, quantize_index, mask);
auto reorder_desc = mkldnn::reorder::primitive_desc(
{input_desc, executor::global_cpu_engine},
{result_desc, executor::global_cpu_engine},
attr);
*ctx->mkldnn_primitives[quantize_index] =
mkldnn::reorder(reorder_desc,
*ctx->mkldnn_primitives[deps[0]],
*ctx->mkldnn_primitives[deps[1]]);
} }
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
...@@ -291,12 +278,18 @@ namespace ngraph ...@@ -291,12 +278,18 @@ namespace ngraph
else else
{ {
auto scale = scale_const_op->get_vector<float>(); auto scale = scale_const_op->get_vector<float>();
std::vector<float> scales;
scales.push_back(1.0 / scale[0]); scales.push_back(1.0 / scale[0]);
size_t quantize_index = size_t quantize_index = mkldnn_emitter->reserve_primitive_space(3);
mkldnn_emitter->build_quantize_reorder(input_desc, result_desc, scales);
auto& deps = mkldnn_emitter->get_primitive_deps(quantize_index); auto& deps = mkldnn_emitter->get_primitive_deps(quantize_index);
auto functor = [&, quantize_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { auto functor = [&, input_desc, result_desc, scales, quantize_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_quantize_reorder(
input_desc, result_desc, scales, quantize_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, quantize_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, quantize_index);
......
...@@ -35,15 +35,24 @@ namespace ngraph ...@@ -35,15 +35,24 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& functors = external_function->get_functors(); auto& functors = external_function->get_functors();
auto& arg_tensor = external_function->get_tensor_data(args[0].get_name()); auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name()); auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
size_t qavg_pool_index = mkldnn_emitter->build_quantized_avg_pool(node); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto qavg_pool_desc =
mkldnn_emitter->get_avg_pooling_forward_desc<ngraph::op::QuantizedAvgPool>(
node, false);
// QuantizedAvgPool needs 3 primitives: input, result, and pooling_forward.
size_t qavg_pool_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(qavg_pool_index); auto& deps = mkldnn_emitter->get_primitive_deps(qavg_pool_index);
auto functor = [&, qavg_pool_index](CPURuntimeContext* ctx, auto functor = [&, qavg_pool_desc, qavg_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_pooling_forward(qavg_pool_desc, qavg_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, qavg_pool_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, qavg_pool_index);
......
...@@ -48,8 +48,7 @@ namespace ngraph ...@@ -48,8 +48,7 @@ namespace ngraph
auto conv_desc = auto conv_desc =
mkldnn_emitter mkldnn_emitter
->get_convolution_forward_desc<ngraph::op::QuantizedConvolution>( ->get_convolution_forward_desc<ngraph::op::QuantizedConvolution>(node);
node, args, out);
auto conv_attr = auto conv_attr =
mkldnn_emitter mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::QuantizedConvolution>(node); ->get_convolution_forward_attr<ngraph::op::QuantizedConvolution>(node);
...@@ -68,7 +67,7 @@ namespace ngraph ...@@ -68,7 +67,7 @@ namespace ngraph
// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector // use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 2; const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales); conv_attr.set_output_scales(mask, dyn_scales);
mkldnn_emitter->convolution_forward<false>( mkldnn_emitter->build_convolution_forward<false>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index); conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
} }
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
...@@ -101,7 +100,7 @@ namespace ngraph ...@@ -101,7 +100,7 @@ namespace ngraph
auto conv_desc = auto conv_desc =
mkldnn_emitter mkldnn_emitter
->get_convolution_forward_desc<ngraph::op::QuantizedConvolutionRelu>( ->get_convolution_forward_desc<ngraph::op::QuantizedConvolutionRelu>(
node, args, out); node);
auto conv_attr = auto conv_attr =
mkldnn_emitter mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::QuantizedConvolutionRelu>( ->get_convolution_forward_attr<ngraph::op::QuantizedConvolutionRelu>(
...@@ -119,7 +118,7 @@ namespace ngraph ...@@ -119,7 +118,7 @@ namespace ngraph
// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector // use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 2; const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales); conv_attr.set_output_scales(mask, dyn_scales);
mkldnn_emitter->convolution_forward<false>( mkldnn_emitter->build_convolution_forward<false>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index); conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
} }
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
...@@ -154,7 +153,7 @@ namespace ngraph ...@@ -154,7 +153,7 @@ namespace ngraph
auto conv_desc = auto conv_desc =
mkldnn_emitter mkldnn_emitter
->get_convolution_forward_desc<ngraph::op::QuantizedConvolutionBias>( ->get_convolution_forward_desc<ngraph::op::QuantizedConvolutionBias>(
node, args, out); node);
auto conv_attr = auto conv_attr =
mkldnn_emitter mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::QuantizedConvolutionBias>( ->get_convolution_forward_attr<ngraph::op::QuantizedConvolutionBias>(
...@@ -172,7 +171,7 @@ namespace ngraph ...@@ -172,7 +171,7 @@ namespace ngraph
// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector // use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 2; const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales); conv_attr.set_output_scales(mask, dyn_scales);
mkldnn_emitter->convolution_forward<true>( mkldnn_emitter->build_convolution_forward<true>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index); conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
} }
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
...@@ -213,7 +212,7 @@ namespace ngraph ...@@ -213,7 +212,7 @@ namespace ngraph
auto conv_desc = auto conv_desc =
mkldnn_emitter mkldnn_emitter
->get_convolution_forward_desc<ngraph::op::QuantizedConvolutionBiasAdd>( ->get_convolution_forward_desc<ngraph::op::QuantizedConvolutionBiasAdd>(
node, args, out); node);
auto conv_attr = auto conv_attr =
mkldnn_emitter mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::QuantizedConvolutionBiasAdd>( ->get_convolution_forward_attr<ngraph::op::QuantizedConvolutionBiasAdd>(
...@@ -259,7 +258,7 @@ namespace ngraph ...@@ -259,7 +258,7 @@ namespace ngraph
const int mask = scales_size == 1 ? 0 : 2; const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales); conv_attr.set_output_scales(mask, dyn_scales);
conv_attr.set_post_ops(new_pops); conv_attr.set_post_ops(new_pops);
mkldnn_emitter->convolution_forward<true>( mkldnn_emitter->build_convolution_forward<true>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index); conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
} }
...@@ -305,7 +304,7 @@ namespace ngraph ...@@ -305,7 +304,7 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_desc = mkldnn_emitter->get_convolution_forward_desc< auto conv_desc = mkldnn_emitter->get_convolution_forward_desc<
ngraph::op::QuantizedConvolutionBiasSignedAdd>(node, args, out); ngraph::op::QuantizedConvolutionBiasSignedAdd>(node);
auto conv_attr = mkldnn_emitter->get_convolution_forward_attr< auto conv_attr = mkldnn_emitter->get_convolution_forward_attr<
ngraph::op::QuantizedConvolutionBiasSignedAdd>(node); ngraph::op::QuantizedConvolutionBiasSignedAdd>(node);
size_t conv_index = mkldnn_emitter->convolution_forward_init(true); size_t conv_index = mkldnn_emitter->convolution_forward_init(true);
...@@ -349,7 +348,7 @@ namespace ngraph ...@@ -349,7 +348,7 @@ namespace ngraph
// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector // use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 2; const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales); conv_attr.set_output_scales(mask, dyn_scales);
mkldnn_emitter->convolution_forward<true>( mkldnn_emitter->build_convolution_forward<true>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index); conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
} }
......
...@@ -35,16 +35,24 @@ namespace ngraph ...@@ -35,16 +35,24 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& functors = external_function->get_functors(); auto& functors = external_function->get_functors();
auto& arg_tensor = external_function->get_tensor_data(args[0].get_name()); auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name()); auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto qmax_pool_desc =
size_t qmax_pool_index = mkldnn_emitter->build_quantized_max_pool(node); mkldnn_emitter->get_max_pooling_forward_desc<ngraph::op::QuantizedMaxPool>(
node, false);
// QuantizedMaxPool needs 3 primitives: input, result, and pooling_forward.
size_t qmax_pool_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(qmax_pool_index); auto& deps = mkldnn_emitter->get_primitive_deps(qmax_pool_index);
auto functor = [&, qmax_pool_index](CPURuntimeContext* ctx, auto functor = [&, qmax_pool_desc, qmax_pool_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_pooling_forward(qmax_pool_desc, qmax_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, qmax_pool_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, qmax_pool_index);
......
...@@ -40,15 +40,17 @@ namespace ngraph ...@@ -40,15 +40,17 @@ namespace ngraph
auto& out_tensor = external_function->get_tensor_data(out[0].get_name()); auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto relu_desc = mkldnn_emitter->get_relu_forward_desc(node);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); // Relu needs 3 primitives: input, result, and eltwise_forward.
size_t relu_index = mkldnn_emitter->reserve_primitive_space(3);
size_t relu_index = mkldnn_emitter->build_relu_forward(input_desc, result_desc);
auto& deps = mkldnn_emitter->get_primitive_deps(relu_index); auto& deps = mkldnn_emitter->get_primitive_deps(relu_index);
auto functor = [&, relu_index](CPURuntimeContext* ctx, auto functor = [&, relu_desc, relu_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_relu_forward(relu_desc, relu_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, relu_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, relu_index);
...@@ -74,16 +76,18 @@ namespace ngraph ...@@ -74,16 +76,18 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto bwd_desc = mkldnn_emitter->get_relu_backward_desc(node);
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1); auto fwd_desc = mkldnn_emitter->get_relu_forward_desc(node);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); // ReluBackprop needs 4 primitives: input, delta, result, and eltwise_backward.
size_t relu_index = mkldnn_emitter->reserve_primitive_space(4);
size_t relu_index =
mkldnn_emitter->build_relu_backward(input_desc, delta_desc, result_desc);
auto& deps = mkldnn_emitter->get_primitive_deps(relu_index); auto& deps = mkldnn_emitter->get_primitive_deps(relu_index);
auto functor = [&, relu_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { auto functor = [&, bwd_desc, fwd_desc, relu_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_relu_backward(bwd_desc, fwd_desc, relu_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_fwd_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_fwd_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], delta_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], delta_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
......
...@@ -49,9 +49,22 @@ namespace ngraph ...@@ -49,9 +49,22 @@ namespace ngraph
auto& dst_iter_tensor = external_function->get_tensor_data(out[1].get_name()); auto& dst_iter_tensor = external_function->get_tensor_data(out[1].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto rnn_index = mkldnn_emitter->build_rnn<ngraph::op::Rnn>(node, args, out); auto rnn_desc =
mkldnn_emitter->get_rnn_forward_desc<ngraph::op::Rnn>(node, args, out);
// Rnn needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias,
// dst_layer, dst_iter, and rnn_forward.
// It needs a new workspace.
auto rnn_index =
mkldnn_emitter->reserve_primitive_space(9, true /* new workspace */);
auto& deps = mkldnn_emitter->get_primitive_deps(rnn_index); auto& deps = mkldnn_emitter->get_primitive_deps(rnn_index);
auto functor = [&, rnn_index](CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
auto functor = [&, rnn_desc, rnn_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_rnn_forward(rnn_desc, rnn_index);
ctx->mkldnn_workspaces = mkldnn_emitter->get_mkldnn_workspaces().data();
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], src_layer_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], src_layer_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], src_iter_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], src_iter_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], weights_layer_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], weights_layer_tensor);
......
...@@ -42,15 +42,17 @@ namespace ngraph ...@@ -42,15 +42,17 @@ namespace ngraph
auto out_shape = out[0].get_shape(); auto out_shape = out[0].get_shape();
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto sigmoid_desc = mkldnn_emitter->get_sigmoid_forward_desc(node, false);
auto out_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); // Sigmoid needs 3 primitives: input, result, and eltwise_forward.
auto sigmoid_index = mkldnn_emitter->reserve_primitive_space(3);
auto sigmoid_index = mkldnn_emitter->build_sigmoid_forward(input_desc, out_desc);
auto& deps = mkldnn_emitter->get_primitive_deps(sigmoid_index); auto& deps = mkldnn_emitter->get_primitive_deps(sigmoid_index);
auto functor = [&, sigmoid_index](CPURuntimeContext* ctx, auto functor = [&, sigmoid_desc, sigmoid_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_sigmoid_forward(sigmoid_desc, sigmoid_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, sigmoid_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, sigmoid_index);
...@@ -72,17 +74,18 @@ namespace ngraph ...@@ -72,17 +74,18 @@ namespace ngraph
auto out_shape = out[0].get_shape(); auto out_shape = out[0].get_shape();
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto fwd_desc = mkldnn_emitter->get_sigmoid_forward_desc(node, true);
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto bwd_desc = mkldnn_emitter->get_sigmoid_backward_desc(node);
auto delta_desc = mkldnn_utils::get_input_mkldnn_md(node, 1); // SigmoidBackprop needs 4 primitives: input, delta, result, and eltwise_backward.
auto out_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); size_t sigmoid_index = mkldnn_emitter->reserve_primitive_space(4);
size_t sigmoid_index =
mkldnn_emitter->build_sigmoid_backward(input_desc, delta_desc, out_desc);
auto& deps = mkldnn_emitter->get_primitive_deps(sigmoid_index); auto& deps = mkldnn_emitter->get_primitive_deps(sigmoid_index);
auto functor = [&, sigmoid_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { auto functor = [&, bwd_desc, fwd_desc, sigmoid_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_sigmoid_backward(bwd_desc, fwd_desc, sigmoid_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
......
...@@ -84,17 +84,22 @@ namespace ngraph ...@@ -84,17 +84,22 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
// Slice needs 3 primitives: input, result, and reorder.
auto slice_index = mkldnn_emitter->build_slice( auto slice_index = mkldnn_emitter->reserve_primitive_space(3);
input_desc, result_desc, lower_bounds, out_shape);
auto& deps = mkldnn_emitter->get_primitive_deps(slice_index); auto& deps = mkldnn_emitter->get_primitive_deps(slice_index);
auto functor = [&, slice_index](CPURuntimeContext* ctx, auto functor =
CPUExecutionContext* ectx) { [&, input_desc, result_desc, lower_bounds, out_shape, slice_index](
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor); CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); if (ctx->first_iteration)
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, slice_index); {
}; mkldnn_emitter->build_slice(
input_desc, result_desc, lower_bounds, out_shape, slice_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, slice_index);
};
functors.emplace_back(functor); functors.emplace_back(functor);
} }
......
...@@ -46,23 +46,18 @@ namespace ngraph ...@@ -46,23 +46,18 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
if (axes.size() != 1)
{
throw ngraph_error("MKLDNN supports softmax only across single axis");
}
int softmax_axis = static_cast<int>(*(axes.begin()));
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto softmax_desc = mkldnn_emitter->get_softmax_forward_desc(node);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0); // Softmax needs 3 primitives: input, result, and softmax_forward.
size_t softmax_index = mkldnn_emitter->reserve_primitive_space(3);
size_t softmax_index = mkldnn_emitter->build_softmax_forward(
input_desc, result_desc, softmax_axis);
auto& deps = mkldnn_emitter->get_primitive_deps(softmax_index); auto& deps = mkldnn_emitter->get_primitive_deps(softmax_index);
auto functor = [&, softmax_index](CPURuntimeContext* ctx, auto functor = [&, softmax_desc, softmax_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) { CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_softmax_forward(softmax_desc, softmax_index);
}
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, softmax_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, softmax_index);
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment