From dc4acb4ea6a7c45e0ffea2c97388a92807332f68 Mon Sep 17 00:00:00 2001 From: Louis Feng <louis.feng@intel.com> Date: Thu, 22 Feb 2018 16:01:31 -0800 Subject: [PATCH] added conv+bias forward mkldnn. --- src/ngraph/runtime/cpu/cpu_emitter.cpp | 117 +++++++++++++++++----- src/ngraph/runtime/cpu/mkldnn_emitter.cpp | 74 +++++++------- src/ngraph/runtime/cpu/mkldnn_emitter.hpp | 3 +- 3 files changed, 132 insertions(+), 62 deletions(-) diff --git a/src/ngraph/runtime/cpu/cpu_emitter.cpp b/src/ngraph/runtime/cpu/cpu_emitter.cpp index 2dea139b8..5eebfbebe 100644 --- a/src/ngraph/runtime/cpu/cpu_emitter.cpp +++ b/src/ngraph/runtime/cpu/cpu_emitter.cpp @@ -86,6 +86,7 @@ #include "ngraph/ops/tanh.hpp" #include "ngraph/runtime/cpu/cpu_emitter.hpp" #include "ngraph/runtime/cpu/cpu_kernel_emitters.hpp" +#include "ngraph/runtime/cpu/ops/conv_bias.hpp" #include "ngraph/runtime/cpu/ops/convert_layout.hpp" #include "ngraph/runtime/cpu/ops/matmul_bias.hpp" #include "ngraph/types/element_type.hpp" @@ -2048,36 +2049,23 @@ namespace ngraph out[0], mkldnn::memory::format::nchw); size_t conv_index = 0; - if (!filter_dilated) + // For dilation, MKLDNN wants to know how many elements to insert between, not how far + // apart to space the elements like nGraph. So we have to subtract 1 from each pos. + Strides window_dilation_strides_adjusted; + + for (size_t s : convolution->get_window_dilation_strides()) { - conv_index = mkldnn_emitter->build_convolution_forward( - input_data_desc, - weights_desc, - result_desc, - convolution->get_window_movement_strides(), - convolution->get_padding_below(), - convolution->get_padding_above()); + window_dilation_strides_adjusted.push_back(s - 1); } - else - { - // For dilation, MKLDNN wants to know how many elements to insert between, not how far - // apart to space the elements like nGraph. So we have to subtract 1 from each pos. - Strides window_dilation_strides_adjusted; - - for (size_t s : convolution->get_window_dilation_strides()) - { - window_dilation_strides_adjusted.push_back(s - 1); - } - conv_index = mkldnn_emitter->build_convolution_forward( - input_data_desc, - weights_desc, - result_desc, - convolution->get_window_movement_strides(), - window_dilation_strides_adjusted, - convolution->get_padding_below(), - convolution->get_padding_above()); - } + conv_index = mkldnn_emitter->build_convolution_forward( + input_data_desc, + weights_desc, + result_desc, + convolution->get_window_movement_strides(), + window_dilation_strides_adjusted, + convolution->get_padding_below(), + convolution->get_padding_above()); auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) @@ -2340,6 +2328,81 @@ namespace ngraph } } + template <> + void CPU_Emitter::EMITTER_DECL(ngraph::op::ConvolutionBias) + { + auto convolution = static_cast<const ngraph::op::Convolution*>(node); + + const TensorViewWrapper& data = args[0]; + const TensorViewWrapper& weights = args[1]; + const TensorViewWrapper& bias = args[2]; + const TensorViewWrapper& result = out[0]; + const vector<size_t>& data_shape = data.get_shape(); + const vector<size_t>& weights_shape = weights.get_shape(); + const vector<size_t>& bias_shape = bias.get_shape(); + const vector<size_t>& result_shape = result.get_shape(); + const size_t data_rank = data_shape.size(); + const size_t weights_rank = weights_shape.size(); + const element::Type& elem_type = data.get_element_type(); + + bool data_dilated = false; + for (size_t s : convolution->get_data_dilation_strides()) + { + data_dilated = data_dilated || (s != 1); + } + + if (!data_dilated && data_rank == 4 && weights_rank == 4 && + elem_type == element::f32) + { + auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); + auto data_desc = mkldnn_emitter->build_memory_descriptor( + data, mkldnn::memory::format::nchw); + auto weights_desc = mkldnn_emitter->build_memory_descriptor( + weights, mkldnn::memory::format::oihw); + auto bias_desc = mkldnn_emitter->build_memory_descriptor( + bias, mkldnn::memory::format::x); + auto result_desc = mkldnn_emitter->build_memory_descriptor( + result, mkldnn::memory::format::nchw); + size_t conv_index = 0; + + // For dilation, MKLDNN wants to know how many elements to insert between, not how far + // apart to space the elements like nGraph. So we have to subtract 1 from each pos. + Strides window_dilation_strides_adjusted; + + for (size_t s : convolution->get_window_dilation_strides()) + { + window_dilation_strides_adjusted.push_back(s - 1); + } + + conv_index = mkldnn_emitter->build_convolution_forward( + data_desc, + weights_desc, + bias_desc, + result_desc, + convolution->get_window_movement_strides(), + window_dilation_strides_adjusted, + convolution->get_padding_below(), + convolution->get_padding_above()); + + auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); + writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) + << ", " << data.get_name() << ");\n"; + writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) + << ", " << weights.get_name() << ");\n"; + writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2]) + << ", " << bias.get_name() << ");\n"; + writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3]) + << ", " << result.get_name() << ");\n"; + + writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, " + << to_string(conv_index) << ");\n"; + } + else + { + throw ngraph_error("ConvolutionBias does not yet support this layout rank: "+std::to_string(data_rank)); + } + } + template <> void CPU_Emitter::EMITTER_DECL(ngraph::op::Not) { diff --git a/src/ngraph/runtime/cpu/mkldnn_emitter.cpp b/src/ngraph/runtime/cpu/mkldnn_emitter.cpp index ffe9e59d5..821b4a6f8 100644 --- a/src/ngraph/runtime/cpu/mkldnn_emitter.cpp +++ b/src/ngraph/runtime/cpu/mkldnn_emitter.cpp @@ -76,6 +76,7 @@ size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& inpu const mkldnn::memory::desc& weights_desc, const mkldnn::memory::desc& result_desc, const ngraph::Strides& strides, + const ngraph::Strides& dilation_strides, const ngraph::CoordinateDiff& padding_below, const ngraph::CoordinateDiff& padding_above) @@ -85,19 +86,20 @@ size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& inpu size_t result_index = build_memory_primitive(result_desc); size_t conv_index = insert_primitive(new mkldnn::convolution_forward( - {{mkldnn::prop_kind::forward, - mkldnn::algorithm::convolution_direct, - input_data_desc, - weights_desc, - result_desc, - mkldnn::memory::dims(strides.begin(), strides.end()), - mkldnn::memory::dims(padding_below.begin(), padding_below.end()), - mkldnn::memory::dims(padding_above.begin(), padding_above.end()), - mkldnn::padding_kind::zero}, - mkldnn_utils::global_cpu_engine}, - *mkldnn_primitives[input_data_index], - *mkldnn_primitives[weights_index], - *mkldnn_primitives[result_index])); + {{mkldnn::prop_kind::forward, + mkldnn::algorithm::convolution_direct, + input_data_desc, + weights_desc, + result_desc, + mkldnn::memory::dims(strides.begin(), strides.end()), + mkldnn::memory::dims(dilation_strides.begin(), dilation_strides.end()), + mkldnn::memory::dims(padding_below.begin(), padding_below.end()), + mkldnn::memory::dims(padding_above.begin(), padding_above.end()), + mkldnn::padding_kind::zero}, + mkldnn_utils::global_cpu_engine}, + *mkldnn_primitives[input_data_index], + *mkldnn_primitives[weights_index], + *mkldnn_primitives[result_index])); primitive_deps[conv_index] = {input_data_index, weights_index, result_index}; return conv_index; @@ -105,6 +107,7 @@ size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& inpu size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& input_data_desc, const mkldnn::memory::desc& weights_desc, + const mkldnn::memory::desc& bias_desc, const mkldnn::memory::desc& result_desc, const ngraph::Strides& strides, const ngraph::Strides& dilation_strides, @@ -112,26 +115,29 @@ size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& inpu const ngraph::CoordinateDiff& padding_above) { - size_t input_data_index = build_memory_primitive(input_data_desc); - size_t weights_index = build_memory_primitive(weights_desc); - size_t result_index = build_memory_primitive(result_desc); - - size_t conv_index = insert_primitive(new mkldnn::convolution_forward( - {{mkldnn::prop_kind::forward, - mkldnn::algorithm::convolution_direct, - input_data_desc, - weights_desc, - result_desc, - mkldnn::memory::dims(strides.begin(), strides.end()), - mkldnn::memory::dims(dilation_strides.begin(), dilation_strides.end()), - mkldnn::memory::dims(padding_below.begin(), padding_below.end()), - mkldnn::memory::dims(padding_above.begin(), padding_above.end()), - mkldnn::padding_kind::zero}, - mkldnn_utils::global_cpu_engine}, - *mkldnn_primitives[input_data_index], - *mkldnn_primitives[weights_index], - *mkldnn_primitives[result_index])); - - primitive_deps[conv_index] = {input_data_index, weights_index, result_index}; + const size_t input_data_index = build_memory_primitive(input_data_desc); + const size_t weights_index = build_memory_primitive(weights_desc); + const size_t bias_index = build_memory_primitive(bias_desc); + const size_t result_index = build_memory_primitive(result_desc); + + const size_t conv_index = insert_primitive(new mkldnn::convolution_forward( + {{mkldnn::prop_kind::forward, + mkldnn::algorithm::convolution_direct, + input_data_desc, + weights_desc, + bias_desc, + result_desc, + mkldnn::memory::dims(strides.begin(), strides.end()), + mkldnn::memory::dims(dilation_strides.begin(), dilation_strides.end()), + mkldnn::memory::dims(padding_below.begin(), padding_below.end()), + mkldnn::memory::dims(padding_above.begin(), padding_above.end()), + mkldnn::padding_kind::zero}, + mkldnn_utils::global_cpu_engine}, + *mkldnn_primitives[input_data_index], + *mkldnn_primitives[weights_index], + *mkldnn_primitives[bias_index], + *mkldnn_primitives[result_index])); + + primitive_deps[conv_index] = {input_data_index, weights_index, bias_index, result_index}; return conv_index; } diff --git a/src/ngraph/runtime/cpu/mkldnn_emitter.hpp b/src/ngraph/runtime/cpu/mkldnn_emitter.hpp index 2b4b664a9..c506ef04b 100644 --- a/src/ngraph/runtime/cpu/mkldnn_emitter.hpp +++ b/src/ngraph/runtime/cpu/mkldnn_emitter.hpp @@ -57,17 +57,18 @@ namespace ngraph const mkldnn::memory::desc& weights_desc, const mkldnn::memory::desc& result_desc, const ngraph::Strides& strides, + const ngraph::Strides& dilation_strides, const ngraph::CoordinateDiff& padding_below, const ngraph::CoordinateDiff& padding_above); size_t build_convolution_forward(const mkldnn::memory::desc& input_data_desc, const mkldnn::memory::desc& weights_desc, + const mkldnn::memory::desc& bias_desc, const mkldnn::memory::desc& result_desc, const ngraph::Strides& strides, const ngraph::Strides& dilation_strides, const ngraph::CoordinateDiff& padding_below, const ngraph::CoordinateDiff& padding_above); - private: std::shared_ptr<CPU_ExternalFunction> external_function; std::vector<mkldnn::primitive*> mkldnn_primitives; -- 2.18.0