Merge pull request #513 from NervanaSystems/jmenon/mkldnn-compile

MKLDNN Emitter

Merge pull request #513 from NervanaSystems/jmenon/mkldnn-compile
MKLDNN Emitter
233e4b1b · Jayaram Bobba · GitHub · 96cabff0 · e05a356e · 233e4b1b
Unverified Commit 233e4b1b authored Feb 21, 2018 by Jayaram Bobba Committed by GitHub Feb 21, 2018
14 changed files
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -176,6 +176,8 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
        runtime/cpu/cpu_tensor_view_wrapper.cpp
        runtime/cpu/cpu_layout_descriptor.cpp
        runtime/cpu/cpu_tracing.cpp
+        runtime/cpu/mkldnn_emitter.cpp
+        runtime/cpu/mkldnn_invoke.cpp
        runtime/cpu/mkldnn_utils.cpp
        runtime/cpu/ops/convert_layout.cpp
        runtime/cpu/ops/matmul_bias.cpp

--- a/src/ngraph/runtime/cpu/cpu_call_frame.cpp
+++ b/src/ngraph/runtime/cpu/cpu_call_frame.cpp
@@ -142,6 +142,8 @@ void runtime::cpu::CPU_CallFrame::setup_runtime_context()
    {
        ctx->op_durations = new int64_t[m_external_function->get_op_attrs().size()];
    }
+    const auto& mkldnn_emitter = m_external_function->get_mkldnn_emitter();
+    ctx->mkldnn_primitives = mkldnn_emitter->get_mkldnn_primitives().data();
 }

 void runtime::cpu::CPU_CallFrame::cleanup_runtime_context()

--- a/src/ngraph/runtime/cpu/cpu_emitter.cpp
+++ b/src/ngraph/runtime/cpu/cpu_emitter.cpp
@@ -2036,95 +2036,59 @@ namespace ngraph
                    data_dilated = data_dilated || (s != 1);
                }

-                // TODO(jmenon): MKLDNN streams should be static so we need to either implement
-                // codegen for statics or move primitive and stream construction out
-                // of the generated function and only generate code to run/rerun the stream
-
-                if (!filter_dilated && !data_dilated && arg0_rank == 4 && arg1_rank == 4 &&
+                if (!data_dilated && arg0_rank == 4 && arg1_rank == 4 &&
                    args[0].get_element_type() == element::f32)
                {
-                    const string& et =
-                        get_mkldnn_data_type(args[0].get_element_type().c_type_string());
-
-                    writer << "{\n";
-                    writer.indent++;
-
-                    writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
-                    writer << "memory::desc input_data_desc = memory::desc({" << join(arg0_shape)
-                           << "}, " << et << ", memory::format::nchw);\n";
-                    writer << "memory::desc weights_desc = memory::desc({" << join(arg1_shape)
-                           << "}, " << et << ", memory::format::oihw);\n";
-                    writer << "memory::desc result_desc = memory::desc({" << join(result_shape)
-                           << "}, " << et << ", memory::format::nchw);\n";
-
-                    writer << "memory input_data = memory({input_data_desc, cpu_engine}, "
-                           << args[0].get_name() << ");\n";
-                    writer << "memory weights = memory({weights_desc, cpu_engine}, "
-                           << args[1].get_name() << ");\n";
-                    writer << "memory result = memory({result_desc, cpu_engine}, "
-                           << out[0].get_name() << ");\n";
-                    writer
-                        << "convolution_forward conv = convolution_forward({"
-                        << "{prop_kind::forward, algorithm::convolution_direct, input_data_desc, "
-                           "weights_desc, result_desc, {"
-                        << join(convolution->get_window_movement_strides()) << "}, {"
-                        << join(convolution->get_padding_below()) << "}, {"
-                        << join(convolution->get_padding_above())
-                        << "}, padding_kind::zero}, cpu_engine}, "
-                        << "input_data, weights, result);\n";
-
-                    writer << "stream s = stream(stream::kind::eager);\n"
-                           << "s.submit({conv}).wait();\n";
-                    writer.indent--;
-                    writer << "}\n";
-                }
-                else if (filter_dilated && !data_dilated && arg0_rank == 4 && arg1_rank == 4 &&
-                         args[0].get_element_type() == element::f32)
-                {
-                    // For dilation, MKLDNN wants to know how many elements to insert between, not how far
-                    // apart to space the elements like nGraph. So we have to subtract 1 from each pos.
-                    Strides window_dilation_strides_adjusted;
+                    auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
+                    auto input_data_desc = mkldnn_emitter->build_memory_descriptor(
+                        args[0], mkldnn::memory::format::nchw);
+                    auto weights_desc = mkldnn_emitter->build_memory_descriptor(
+                        args[1], mkldnn::memory::format::oihw);
+                    auto result_desc = mkldnn_emitter->build_memory_descriptor(
+                        out[0], mkldnn::memory::format::nchw);
+                    size_t conv_index = 0;

-                    for (size_t s : convolution->get_window_dilation_strides())
+                    if (!filter_dilated)
                    {
-                        window_dilation_strides_adjusted.push_back(s - 1);
+                        conv_index = mkldnn_emitter->build_convolution_forward(
+                            input_data_desc,
+                            weights_desc,
+                            result_desc,
+                            convolution->get_window_movement_strides(),
+                            convolution->get_padding_below(),
+                            convolution->get_padding_above());
                    }
+                    else
+                    {
+                        // For dilation, MKLDNN wants to know how many elements to insert between, not how far
+                        // apart to space the elements like nGraph. So we have to subtract 1 from each pos.
+                        Strides window_dilation_strides_adjusted;

-                    const string& et =
-                        get_mkldnn_data_type(args[0].get_element_type().c_type_string());
-
-                    writer << "{\n";
-                    writer.indent++;
+                        for (size_t s : convolution->get_window_dilation_strides())
+                        {
+                            window_dilation_strides_adjusted.push_back(s - 1);
+                        }

-                    writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
-                    writer << "memory::desc input_data_desc = memory::desc({" << join(arg0_shape)
-                           << "}, " << et << ", memory::format::nchw);\n";
-                    writer << "memory::desc weights_desc = memory::desc({" << join(arg1_shape)
-                           << "}, " << et << ", memory::format::oihw);\n";
-                    writer << "memory::desc result_desc = memory::desc({" << join(result_shape)
-                           << "}, " << et << ", memory::format::nchw);\n";
+                        conv_index = mkldnn_emitter->build_convolution_forward(
+                            input_data_desc,
+                            weights_desc,
+                            result_desc,
+                            convolution->get_window_movement_strides(),
+                            window_dilation_strides_adjusted,
+                            convolution->get_padding_below(),
+                            convolution->get_padding_above());
+                    }

-                    writer << "memory input_data = memory({input_data_desc, cpu_engine}, "
-                           << args[0].get_name() << ");\n";
-                    writer << "memory weights = memory({weights_desc, cpu_engine}, "
-                           << args[1].get_name() << ");\n";
-                    writer << "memory result = memory({result_desc, cpu_engine}, "
-                           << out[0].get_name() << ");\n";
-                    writer
-                        << "convolution_forward conv = convolution_forward({"
-                        << "{prop_kind::forward, algorithm::convolution_direct, input_data_desc, "
-                           "weights_desc, result_desc, {"
-                        << join(convolution->get_window_movement_strides()) << "}, {"
-                        << join(window_dilation_strides_adjusted) << "}, {"
-                        << join(convolution->get_padding_below()) << "}, {"
-                        << join(convolution->get_padding_above())
-                        << "}, padding_kind::zero}, cpu_engine}, "
-                        << "input_data, weights, result);\n";
+                    auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
+                    writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
+                           << ", " << args[0].get_name() << ");\n";
+                    writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
+                           << ", " << args[1].get_name() << ");\n";
+                    writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
+                           << ", " << out[0].get_name() << ");\n";

-                    writer << "stream s = stream(stream::kind::eager);\n"
-                           << "s.submit({conv}).wait();\n";
-                    writer.indent--;
-                    writer << "}\n";
+                    writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
+                           << to_string(conv_index) << ");\n";
                }
                else
                {

--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
@@ -249,6 +249,8 @@ void runtime::cpu::CPU_ExternalFunction::compile()

    string function_name = m_function->get_name();

+    m_mkldnn_emitter.reset(new MKLDNNEmitter(shared_from_this()));
+
    ngraph::pass::Manager pass_manager;

    pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
@@ -284,6 +286,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
 #include "ngraph/runtime/cpu/cpu_eigen_utils.hpp"
 #include "ngraph/runtime/cpu/cpu_kernels.hpp"
 #include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
+#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
 #include "ngraph/runtime/kernel/avg_pool.hpp"
 #include "ngraph/runtime/kernel/broadcast.hpp"
 #include "ngraph/runtime/kernel/concat.hpp"

--- a/src/ngraph/runtime/cpu/cpu_external_function.hpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.hpp
@@ -31,6 +31,7 @@
 #include "ngraph/runtime/cpu/cpu_call_frame.hpp"
 #include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
 #include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
+#include "ngraph/runtime/cpu/mkldnn_emitter.hpp"
 #include "ngraph/runtime/external_function.hpp"

 namespace ngraph
@@ -80,6 +81,11 @@ namespace ngraph
                const LayoutDescriptorPtrs& get_result_layout_descriptors();

                const std::vector<OpAttributes>& get_op_attrs() const { return m_op_attrs; }
+                const std::unique_ptr<MKLDNNEmitter>& get_mkldnn_emitter() const
+                {
+                    return m_mkldnn_emitter;
+                }
+
            protected:
                void compile();

@@ -115,6 +121,8 @@ namespace ngraph
                LayoutDescriptorPtrs parameter_layout_descriptors;
                LayoutDescriptorPtrs result_layout_descriptors;
                std::vector<OpAttributes> m_op_attrs;
+
+                std::unique_ptr<MKLDNNEmitter> m_mkldnn_emitter;
            };
        }
    }

--- a/src/ngraph/runtime/cpu/cpu_runtime_context.hpp
+++ b/src/ngraph/runtime/cpu/cpu_runtime_context.hpp
@@ -17,6 +17,11 @@
 #include <chrono>
 #include <cstdint>

+namespace mkldnn
+{
+    class primitive;
+}
+
 namespace ngraph
 {
    namespace runtime
@@ -31,6 +36,7 @@ namespace ngraph
            struct CPURuntimeContext
            {
                int64_t* op_durations;
+                mkldnn::primitive* const* mkldnn_primitives;
            };
            }
        }

--- a/src/ngraph/runtime/cpu/cpu_tensor_view_wrapper.cpp
+++ b/src/ngraph/runtime/cpu/cpu_tensor_view_wrapper.cpp
@@ -69,3 +69,9 @@ bool runtime::cpu::TensorViewWrapper::is_output() const
 {
    return m_tensor_view->get_tensor().is_output();
 }
+
+const std::shared_ptr<descriptor::TensorView>
+    runtime::cpu::TensorViewWrapper::get_tensor_view() const
+{
+    return m_tensor_view;
+}
--- a/src/ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp
+++ b/src/ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp
@@ -45,6 +45,7 @@ public:
    const std::string& get_name() const;
    const std::string& get_type() const;
    bool is_output() const;
+    const std::shared_ptr<descriptor::TensorView> get_tensor_view() const;

 private:
    std::shared_ptr<descriptor::TensorView> m_tensor_view;

--- a/src/ngraph/runtime/cpu/mkldnn_emitter.cpp
+++ b/src/ngraph/runtime/cpu/mkldnn_emitter.cpp
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <memory>
+
+#include "mkldnn_emitter.hpp"
+
+#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
+#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
+#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
+
+using namespace ngraph::runtime::cpu;
+
+const std::vector<mkldnn::primitive*>& MKLDNNEmitter::get_mkldnn_primitives() const
+{
+    return mkldnn_primitives;
+}
+
+size_t MKLDNNEmitter::insert_primitive(mkldnn::primitive* primitive)
+{
+    mkldnn_primitives.emplace_back(primitive);
+    return (mkldnn_primitives.size() - 1);
+}
+
+const std::vector<size_t>& MKLDNNEmitter::get_primitive_deps(size_t index) const
+{
+    return primitive_deps.at(index);
+}
+
+mkldnn::memory::desc MKLDNNEmitter::build_memory_descriptor(const TensorViewWrapper& tvw,
+                                                            mkldnn::memory::format fmt) const
+{
+    return mkldnn::memory::desc(
+        mkldnn::memory::dims(tvw.get_shape().begin(), tvw.get_shape().end()),
+        mkldnn_utils::GetDataType(tvw.get_element_type()),
+        fmt);
+}
+
+mkldnn::memory::desc MKLDNNEmitter::build_memory_descriptor(const TensorViewWrapper& tvw) const
+{
+    auto layout =
+        std::static_pointer_cast<LayoutDescriptor>(tvw.get_tensor_view()->get_tensor_view_layout());
+
+    return build_memory_descriptor(tvw, layout->get_mkldnn_format());
+}
+
+mkldnn::memory MKLDNNEmitter::build_memory_primitive(const TensorViewWrapper& tvw) const
+{
+    return mkldnn::memory({build_memory_descriptor(tvw), mkldnn_utils::global_cpu_engine}, nullptr);
+}
+
+size_t MKLDNNEmitter::build_memory_primitive(const mkldnn::memory::desc& desc)
+{
+    // The MKL-DNN C++ API forces proper initialization of a memory primitive
+    // with a non-null pointer (unlike the C API)
+    // Primitives are initialized at runtime so we use a known-invalid address here
+    // to bypass this check
+    return insert_primitive(
+        new mkldnn::memory({desc, mkldnn_utils::global_cpu_engine}, reinterpret_cast<void*>(0x42)));
+}
+
+size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
+                                                const mkldnn::memory::desc& weights_desc,
+                                                const mkldnn::memory::desc& result_desc,
+                                                const ngraph::Strides& strides,
+                                                const ngraph::CoordinateDiff& padding_below,
+                                                const ngraph::CoordinateDiff& padding_above)
+
+{
+    size_t input_data_index = build_memory_primitive(input_data_desc);
+    size_t weights_index = build_memory_primitive(weights_desc);
+    size_t result_index = build_memory_primitive(result_desc);
+
+    size_t conv_index = insert_primitive(new mkldnn::convolution_forward(
+        {{mkldnn::prop_kind::forward,
+          mkldnn::algorithm::convolution_direct,
+          input_data_desc,
+          weights_desc,
+          result_desc,
+          mkldnn::memory::dims(strides.begin(), strides.end()),
+          mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
+          mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
+          mkldnn::padding_kind::zero},
+         mkldnn_utils::global_cpu_engine},
+        *mkldnn_primitives[input_data_index],
+        *mkldnn_primitives[weights_index],
+        *mkldnn_primitives[result_index]));
+
+    primitive_deps[conv_index] = {input_data_index, weights_index, result_index};
+    return conv_index;
+}
+
+size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
+                                                const mkldnn::memory::desc& weights_desc,
+                                                const mkldnn::memory::desc& result_desc,
+                                                const ngraph::Strides& strides,
+                                                const ngraph::Strides& dilation_strides,
+                                                const ngraph::CoordinateDiff& padding_below,
+                                                const ngraph::CoordinateDiff& padding_above)
+
+{
+    size_t input_data_index = build_memory_primitive(input_data_desc);
+    size_t weights_index = build_memory_primitive(weights_desc);
+    size_t result_index = build_memory_primitive(result_desc);
+
+    size_t conv_index = insert_primitive(new mkldnn::convolution_forward(
+        {{mkldnn::prop_kind::forward,
+          mkldnn::algorithm::convolution_direct,
+          input_data_desc,
+          weights_desc,
+          result_desc,
+          mkldnn::memory::dims(strides.begin(), strides.end()),
+          mkldnn::memory::dims(dilation_strides.begin(), dilation_strides.end()),
+          mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
+          mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
+          mkldnn::padding_kind::zero},
+         mkldnn_utils::global_cpu_engine},
+        *mkldnn_primitives[input_data_index],
+        *mkldnn_primitives[weights_index],
+        *mkldnn_primitives[result_index]));
+
+    primitive_deps[conv_index] = {input_data_index, weights_index, result_index};
+    return conv_index;
+}
--- a/src/ngraph/runtime/cpu/mkldnn_emitter.hpp
+++ b/src/ngraph/runtime/cpu/mkldnn_emitter.hpp
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include <mkldnn.hpp>
+
+#include "ngraph/common.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            class CPU_ExternalFunction;
+            class TensorViewWrapper;
+
+            class MKLDNNEmitter
+            {
+            public:
+                MKLDNNEmitter(std::shared_ptr<CPU_ExternalFunction> ef)
+                    : external_function(ef)
+                {
+                }
+
+                const std::vector<mkldnn::primitive*>& get_mkldnn_primitives() const;
+
+                size_t insert_primitive(mkldnn::primitive* primitive);
+                const std::vector<size_t>& get_primitive_deps(size_t index) const;
+
+                // TODO(jmenon): Get rid of TensorViewWrappers at some point
+                mkldnn::memory::desc build_memory_descriptor(const TensorViewWrapper& tvw,
+                                                             mkldnn::memory::format fmt) const;
+                mkldnn::memory::desc build_memory_descriptor(const TensorViewWrapper& tvw) const;
+                mkldnn::memory build_memory_primitive(const TensorViewWrapper& tvw) const;
+                size_t build_memory_primitive(const mkldnn::memory::desc& desc);
+
+                size_t build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
+                                                 const mkldnn::memory::desc& weights_desc,
+                                                 const mkldnn::memory::desc& result_desc,
+                                                 const ngraph::Strides& strides,
+                                                 const ngraph::CoordinateDiff& padding_below,
+                                                 const ngraph::CoordinateDiff& padding_above);
+
+                size_t build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
+                                                 const mkldnn::memory::desc& weights_desc,
+                                                 const mkldnn::memory::desc& result_desc,
+                                                 const ngraph::Strides& strides,
+                                                 const ngraph::Strides& dilation_strides,
+                                                 const ngraph::CoordinateDiff& padding_below,
+                                                 const ngraph::CoordinateDiff& padding_above);
+
+            private:
+                std::shared_ptr<CPU_ExternalFunction> external_function;
+                std::vector<mkldnn::primitive*> mkldnn_primitives;
+                std::vector<mkldnn::stream> mkldnn_streams;
+                std::unordered_map<size_t, std::vector<size_t>> primitive_deps;
+            };
+        }
+    }
+}
--- a/src/ngraph/runtime/cpu/mkldnn_invoke.cpp
+++ b/src/ngraph/runtime/cpu/mkldnn_invoke.cpp
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <mkldnn.hpp>
+
+#include "mkldnn_invoke.hpp"
+#include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
+#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
+
+mkldnn::engine ngraph::runtime::cpu::mkldnn_utils::global_cpu_engine(mkldnn::engine::cpu, 0);
+
+extern "C" void ngraph::runtime::cpu::mkldnn_utils::set_memory_ptr(CPURuntimeContext* ctx,
+                                                                   size_t primitive_index,
+                                                                   void* ptr)
+{
+    auto primitive = static_cast<mkldnn::memory*>(ctx->mkldnn_primitives[primitive_index]);
+    primitive->set_data_handle(ptr);
+}
+
+extern "C" void ngraph::runtime::cpu::mkldnn_utils::mkldnn_invoke_primitive(CPURuntimeContext* ctx,
+                                                                            size_t primitive_index)
+{
+    mkldnn::stream s(mkldnn::stream::kind::eager);
+    s.submit({*ctx->mkldnn_primitives[primitive_index]}).wait();
+}
--- a/src/ngraph/runtime/cpu/mkldnn_invoke.hpp
+++ b/src/ngraph/runtime/cpu/mkldnn_invoke.hpp
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include <cstddef>
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            struct CPURuntimeContext;
+
+            namespace mkldnn_utils
+            {
+                extern "C" void
+                    set_memory_ptr(CPURuntimeContext* ctx, size_t primitive_index, void* ptr);
+                extern "C" void mkldnn_invoke_primitive(CPURuntimeContext* ctx,
+                                                        size_t primitive_index);
+            }
+        }
+    }
+}
--- a/src/ngraph/runtime/cpu/mkldnn_utils.cpp
+++ b/src/ngraph/runtime/cpu/mkldnn_utils.cpp
@@ -17,6 +17,7 @@
 #include <string>
 #include <typeindex>
 #include <typeinfo>
+#include <unordered_map>
 #include <unordered_set>

 #include "ngraph/node.hpp"
@@ -37,7 +38,7 @@ namespace ngraph
            {
 #define TI(x) std::type_index(typeid(x))

-                const std::unordered_set<std::type_index> s_op_registry{
+                static const std::unordered_set<std::type_index> s_op_registry{
                    TI(ngraph::op::AvgPool),
                    TI(ngraph::op::AvgPoolBackprop),
                    TI(ngraph::op::BatchNorm),
@@ -47,6 +48,28 @@ namespace ngraph
                    TI(ngraph::op::MaxPool),
                    TI(ngraph::op::MaxPoolBackprop)};

+                static const std::unordered_map<std::string, const mkldnn::memory::data_type>
+                    s_data_type_map{{"char", mkldnn::memory::data_type::s8},
+                                    {"float", mkldnn::memory::data_type::f32},
+                                    {"double", mkldnn::memory::data_type::data_undef},
+                                    {"int8_t", mkldnn::memory::data_type::s8},
+                                    {"int16_t", mkldnn::memory::data_type::s16},
+                                    {"int32_t", mkldnn::memory::data_type::s32},
+                                    {"int64_t", mkldnn::memory::data_type::data_undef},
+                                    {"uint8_t", mkldnn::memory::data_type::u8},
+                                    {"uint16_t", mkldnn::memory::data_type::data_undef},
+                                    {"uint32_t", mkldnn::memory::data_type::data_undef},
+                                    {"uint64_t", mkldnn::memory::data_type::data_undef}};
+
+                mkldnn::memory::data_type GetDataType(const ngraph::element::Type& et)
+                {
+                    auto it = s_data_type_map.find(et.c_type_string());
+                    if (it == s_data_type_map.end() ||
+                        it->second == mkldnn::memory::data_type::data_undef)
+                        throw ngraph_error("No MKLDNN data type exists for the given element type");
+                    return it->second;
+                }
+
                bool IsMKLDNNOp(ngraph::Node& op)
                {
                    return (s_op_registry.find(TI(op)) != s_op_registry.end());

--- a/src/ngraph/runtime/cpu/mkldnn_utils.hpp
+++ b/src/ngraph/runtime/cpu/mkldnn_utils.hpp
@@ -16,15 +16,11 @@

 #pragma once

-#include <string>
-#include <typeindex>
-#include <typeinfo>
-#include <unordered_set>
-
 #include <mkldnn.hpp>

 #include "ngraph/node.hpp"
 #include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
+#include "ngraph/types/element_type.hpp"

 namespace ngraph
 {
@@ -34,7 +30,12 @@ namespace ngraph
        {
            namespace mkldnn_utils
            {
+                extern mkldnn::engine global_cpu_engine;
+
+                mkldnn::memory::data_type GetDataType(const ngraph::element::Type& et);
+
                bool IsMKLDNNOp(ngraph::Node& op);
+
                mkldnn::memory::format
                    CreateNativeDataFormat(const ngraph::runtime::cpu::LayoutDescriptor& layout);
            }