From dc4acb4ea6a7c45e0ffea2c97388a92807332f68 Mon Sep 17 00:00:00 2001
From: Louis Feng <louis.feng@intel.com>
Date: Thu, 22 Feb 2018 16:01:31 -0800
Subject: [PATCH] added conv+bias forward mkldnn.

---
 src/ngraph/runtime/cpu/cpu_emitter.cpp    | 117 +++++++++++++++++-----
 src/ngraph/runtime/cpu/mkldnn_emitter.cpp |  74 +++++++-------
 src/ngraph/runtime/cpu/mkldnn_emitter.hpp |   3 +-
 3 files changed, 132 insertions(+), 62 deletions(-)

diff --git a/src/ngraph/runtime/cpu/cpu_emitter.cpp b/src/ngraph/runtime/cpu/cpu_emitter.cpp
index 2dea139b8..5eebfbebe 100644
--- a/src/ngraph/runtime/cpu/cpu_emitter.cpp
+++ b/src/ngraph/runtime/cpu/cpu_emitter.cpp
@@ -86,6 +86,7 @@
 #include "ngraph/ops/tanh.hpp"
 #include "ngraph/runtime/cpu/cpu_emitter.hpp"
 #include "ngraph/runtime/cpu/cpu_kernel_emitters.hpp"
+#include "ngraph/runtime/cpu/ops/conv_bias.hpp"
 #include "ngraph/runtime/cpu/ops/convert_layout.hpp"
 #include "ngraph/runtime/cpu/ops/matmul_bias.hpp"
 #include "ngraph/types/element_type.hpp"
@@ -2048,36 +2049,23 @@ namespace ngraph
                         out[0], mkldnn::memory::format::nchw);
                     size_t conv_index = 0;
 
-                    if (!filter_dilated)
+                    // For dilation, MKLDNN wants to know how many elements to insert between, not how far
+                    // apart to space the elements like nGraph. So we have to subtract 1 from each pos.
+                    Strides window_dilation_strides_adjusted;
+
+                    for (size_t s : convolution->get_window_dilation_strides())
                     {
-                        conv_index = mkldnn_emitter->build_convolution_forward(
-                            input_data_desc,
-                            weights_desc,
-                            result_desc,
-                            convolution->get_window_movement_strides(),
-                            convolution->get_padding_below(),
-                            convolution->get_padding_above());
+                        window_dilation_strides_adjusted.push_back(s - 1);
                     }
-                    else
-                    {
-                        // For dilation, MKLDNN wants to know how many elements to insert between, not how far
-                        // apart to space the elements like nGraph. So we have to subtract 1 from each pos.
-                        Strides window_dilation_strides_adjusted;
-
-                        for (size_t s : convolution->get_window_dilation_strides())
-                        {
-                            window_dilation_strides_adjusted.push_back(s - 1);
-                        }
 
-                        conv_index = mkldnn_emitter->build_convolution_forward(
-                            input_data_desc,
-                            weights_desc,
-                            result_desc,
-                            convolution->get_window_movement_strides(),
-                            window_dilation_strides_adjusted,
-                            convolution->get_padding_below(),
-                            convolution->get_padding_above());
-                    }
+                    conv_index = mkldnn_emitter->build_convolution_forward(
+                        input_data_desc,
+                        weights_desc,
+                        result_desc,
+                        convolution->get_window_movement_strides(),
+                        window_dilation_strides_adjusted,
+                        convolution->get_padding_below(),
+                        convolution->get_padding_above());
 
                     auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
                     writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
@@ -2340,6 +2328,81 @@ namespace ngraph
                 }
             }
 
+            template <>
+            void CPU_Emitter::EMITTER_DECL(ngraph::op::ConvolutionBias)
+            {
+                auto convolution = static_cast<const ngraph::op::Convolution*>(node);
+
+                const TensorViewWrapper& data = args[0];
+                const TensorViewWrapper& weights = args[1];
+                const TensorViewWrapper& bias = args[2];
+                const TensorViewWrapper& result = out[0];
+                const vector<size_t>& data_shape = data.get_shape();
+                const vector<size_t>& weights_shape = weights.get_shape();
+                const vector<size_t>& bias_shape = bias.get_shape();
+                const vector<size_t>& result_shape = result.get_shape();
+                const size_t data_rank = data_shape.size();
+                const size_t weights_rank = weights_shape.size();
+                const element::Type& elem_type = data.get_element_type();
+
+                bool data_dilated = false;
+                for (size_t s : convolution->get_data_dilation_strides())
+                {
+                    data_dilated = data_dilated || (s != 1);
+                }
+
+                if (!data_dilated && data_rank == 4 && weights_rank == 4 &&
+                    elem_type == element::f32)
+                {
+                    auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
+                    auto data_desc = mkldnn_emitter->build_memory_descriptor(
+                            data, mkldnn::memory::format::nchw);
+                    auto weights_desc = mkldnn_emitter->build_memory_descriptor(
+                            weights, mkldnn::memory::format::oihw);
+                    auto bias_desc = mkldnn_emitter->build_memory_descriptor(
+                            bias, mkldnn::memory::format::x);
+                    auto result_desc = mkldnn_emitter->build_memory_descriptor(
+                            result, mkldnn::memory::format::nchw);
+                    size_t conv_index = 0;
+
+                    // For dilation, MKLDNN wants to know how many elements to insert between, not how far
+                    // apart to space the elements like nGraph. So we have to subtract 1 from each pos.
+                    Strides window_dilation_strides_adjusted;
+
+                    for (size_t s : convolution->get_window_dilation_strides())
+                    {
+                        window_dilation_strides_adjusted.push_back(s - 1);
+                    }
+
+                    conv_index = mkldnn_emitter->build_convolution_forward(
+                            data_desc,
+                            weights_desc,
+                            bias_desc,
+                            result_desc,
+                            convolution->get_window_movement_strides(),
+                            window_dilation_strides_adjusted,
+                            convolution->get_padding_below(),
+                            convolution->get_padding_above());
+
+                    auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
+                    writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
+                           << ", " << data.get_name() << ");\n";
+                    writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
+                           << ", " << weights.get_name() << ");\n";
+                    writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
+                           << ", " << bias.get_name() << ");\n";
+                    writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3])
+                           << ", " << result.get_name() << ");\n";
+
+                    writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
+                           << to_string(conv_index) << ");\n";
+                }
+                else
+                {
+                    throw ngraph_error("ConvolutionBias does not yet support this layout rank: "+std::to_string(data_rank));
+                }
+            }
+
             template <>
             void CPU_Emitter::EMITTER_DECL(ngraph::op::Not)
             {
diff --git a/src/ngraph/runtime/cpu/mkldnn_emitter.cpp b/src/ngraph/runtime/cpu/mkldnn_emitter.cpp
index ffe9e59d5..821b4a6f8 100644
--- a/src/ngraph/runtime/cpu/mkldnn_emitter.cpp
+++ b/src/ngraph/runtime/cpu/mkldnn_emitter.cpp
@@ -76,6 +76,7 @@ size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& inpu
                                                 const mkldnn::memory::desc& weights_desc,
                                                 const mkldnn::memory::desc& result_desc,
                                                 const ngraph::Strides& strides,
+                                                const ngraph::Strides& dilation_strides,
                                                 const ngraph::CoordinateDiff& padding_below,
                                                 const ngraph::CoordinateDiff& padding_above)
 
@@ -85,19 +86,20 @@ size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& inpu
     size_t result_index = build_memory_primitive(result_desc);
 
     size_t conv_index = insert_primitive(new mkldnn::convolution_forward(
-        {{mkldnn::prop_kind::forward,
-          mkldnn::algorithm::convolution_direct,
-          input_data_desc,
-          weights_desc,
-          result_desc,
-          mkldnn::memory::dims(strides.begin(), strides.end()),
-          mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
-          mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
-          mkldnn::padding_kind::zero},
-         mkldnn_utils::global_cpu_engine},
-        *mkldnn_primitives[input_data_index],
-        *mkldnn_primitives[weights_index],
-        *mkldnn_primitives[result_index]));
+            {{mkldnn::prop_kind::forward,
+              mkldnn::algorithm::convolution_direct,
+              input_data_desc,
+              weights_desc,
+              result_desc,
+              mkldnn::memory::dims(strides.begin(), strides.end()),
+              mkldnn::memory::dims(dilation_strides.begin(), dilation_strides.end()),
+              mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
+              mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
+              mkldnn::padding_kind::zero},
+             mkldnn_utils::global_cpu_engine},
+            *mkldnn_primitives[input_data_index],
+            *mkldnn_primitives[weights_index],
+            *mkldnn_primitives[result_index]));
 
     primitive_deps[conv_index] = {input_data_index, weights_index, result_index};
     return conv_index;
@@ -105,6 +107,7 @@ size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& inpu
 
 size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
                                                 const mkldnn::memory::desc& weights_desc,
+                                                const mkldnn::memory::desc& bias_desc,
                                                 const mkldnn::memory::desc& result_desc,
                                                 const ngraph::Strides& strides,
                                                 const ngraph::Strides& dilation_strides,
@@ -112,26 +115,29 @@ size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& inpu
                                                 const ngraph::CoordinateDiff& padding_above)
 
 {
-    size_t input_data_index = build_memory_primitive(input_data_desc);
-    size_t weights_index = build_memory_primitive(weights_desc);
-    size_t result_index = build_memory_primitive(result_desc);
-
-    size_t conv_index = insert_primitive(new mkldnn::convolution_forward(
-        {{mkldnn::prop_kind::forward,
-          mkldnn::algorithm::convolution_direct,
-          input_data_desc,
-          weights_desc,
-          result_desc,
-          mkldnn::memory::dims(strides.begin(), strides.end()),
-          mkldnn::memory::dims(dilation_strides.begin(), dilation_strides.end()),
-          mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
-          mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
-          mkldnn::padding_kind::zero},
-         mkldnn_utils::global_cpu_engine},
-        *mkldnn_primitives[input_data_index],
-        *mkldnn_primitives[weights_index],
-        *mkldnn_primitives[result_index]));
-
-    primitive_deps[conv_index] = {input_data_index, weights_index, result_index};
+    const size_t input_data_index = build_memory_primitive(input_data_desc);
+    const size_t weights_index = build_memory_primitive(weights_desc);
+    const size_t bias_index = build_memory_primitive(bias_desc);
+    const size_t result_index = build_memory_primitive(result_desc);
+
+    const size_t conv_index = insert_primitive(new mkldnn::convolution_forward(
+            {{mkldnn::prop_kind::forward,
+              mkldnn::algorithm::convolution_direct,
+              input_data_desc,
+              weights_desc,
+              bias_desc,
+              result_desc,
+              mkldnn::memory::dims(strides.begin(), strides.end()),
+              mkldnn::memory::dims(dilation_strides.begin(), dilation_strides.end()),
+              mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
+              mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
+              mkldnn::padding_kind::zero},
+             mkldnn_utils::global_cpu_engine},
+            *mkldnn_primitives[input_data_index],
+            *mkldnn_primitives[weights_index],
+            *mkldnn_primitives[bias_index],
+            *mkldnn_primitives[result_index]));
+
+    primitive_deps[conv_index] = {input_data_index, weights_index, bias_index, result_index};
     return conv_index;
 }
diff --git a/src/ngraph/runtime/cpu/mkldnn_emitter.hpp b/src/ngraph/runtime/cpu/mkldnn_emitter.hpp
index 2b4b664a9..c506ef04b 100644
--- a/src/ngraph/runtime/cpu/mkldnn_emitter.hpp
+++ b/src/ngraph/runtime/cpu/mkldnn_emitter.hpp
@@ -57,17 +57,18 @@ namespace ngraph
                                                  const mkldnn::memory::desc& weights_desc,
                                                  const mkldnn::memory::desc& result_desc,
                                                  const ngraph::Strides& strides,
+                                                 const ngraph::Strides& dilation_strides,
                                                  const ngraph::CoordinateDiff& padding_below,
                                                  const ngraph::CoordinateDiff& padding_above);
 
                 size_t build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
                                                  const mkldnn::memory::desc& weights_desc,
+                                                 const mkldnn::memory::desc& bias_desc,
                                                  const mkldnn::memory::desc& result_desc,
                                                  const ngraph::Strides& strides,
                                                  const ngraph::Strides& dilation_strides,
                                                  const ngraph::CoordinateDiff& padding_below,
                                                  const ngraph::CoordinateDiff& padding_above);
-
             private:
                 std::shared_ptr<CPU_ExternalFunction> external_function;
                 std::vector<mkldnn::primitive*> mkldnn_primitives;
-- 
2.18.0