update MKLDNN version from v0.14 -> v0.16 (#1370)

* - update MKLDNN version from v0.14 -> v0.15 * - added support to query tensor size from mkldnn desc for MKLDNN Op's - moved tensor size calculation to tensor_view_layout - populate mkldnn layout desc witk MKLDNN queried size in the cpu_layout pass * style fix * - Fixed failing test cases (cherry picked from commit d68d3738673c2081f458f9aa458c16361cc47657) * style fix * renamed tensor->logical_size() member function to tensor->size() * Addressed PR comments * - update mkldnn from v0.15 to v0.16 - update mkl SHA1 hash for lnx, max, win * style fix * Addressed Scott's PR comments * merge branch master with pruthvi/mkldnn_v0.15

update MKLDNN version from v0.14 -> v0.16 (#1370)
* - update MKLDNN version from v0.14 -> v0.15 * - added support to query tensor size from mkldnn desc for MKLDNN Op's - moved tensor size calculation to tensor_view_layout - populate mkldnn layout desc witk MKLDNN queried size in the cpu_layout pass * style fix * - Fixed failing test cases (cherry picked from commit d68d3738673c2081f458f9aa458c16361cc47657) * style fix * renamed tensor->logical_size() member function to tensor->size() * Addressed PR comments * - update mkldnn from v0.15 to v0.16 - update mkl SHA1 hash for lnx, max, win * style fix * Addressed Scott's PR comments * merge branch master with pruthvi/mkldnn_v0.15
0050950d · Pruthvi · Scott Cyphers · 951e77b4 · 0050950d · 0050950d
Commit 0050950d authored Aug 21, 2018 by Pruthvi Committed by Scott Cyphers Aug 21, 2018
9 changed files
--- a/cmake/external_mkldnn.cmake
+++ b/cmake/external_mkldnn.cmake
@@ -43,19 +43,19 @@ endif()
 # This section sets up MKL as an external project to be used later by MKLDNN
-set(MKLURLROOT "https://github.com/intel/mkl-dnn/releases/download/v0.14/")
+set(MKLURLROOT "https://github.com/intel/mkl-dnn/releases/download/v0.16/")
-set(MKLVERSION "2018.0.3.20180406")
+set(MKLVERSION "2019.0.20180710")
 if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
    set(MKLPACKAGE "mklml_lnx_${MKLVERSION}.tgz")
-    set(MKL_SHA1_HASH aea0d9ce65773cfcf5d8292b8db553bde965fc8f)
+    set(MKL_SHA1_HASH e7c34105d486908b87b4b8c667c3a089f079ca51)
    set(MKL_LIBS libiomp5.so libmklml_intel.so)
 elseif (APPLE)
    set(MKLPACKAGE "mklml_mac_${MKLVERSION}.tgz")
-    set(MKL_SHA1_HASH d76083fd5a79767a96572ad0e23e7f4c892818f2)
+    set(MKL_SHA1_HASH c873d2bd36a0100344d1aac1b1e56c8c3a43a845)
    set(MKL_LIBS libmklml.dylib libiomp5.dylib)
 elseif (WIN32)
    set(MKLPACKAGE "mklml_win_${MKLVERSION}.zip")
-    set(MKL_SHA1_HASH d607ca92d7bfc101f0828c0b005098b75531669b)
+    set(MKL_SHA1_HASH 3767d9a1ad679d647b8c6edf1f97c767881d0c5c)
    set(MKL_LIBS mklml.dll libiomp5md.dll)
 endif()
 set(MKLURL ${MKLURLROOT}${MKLPACKAGE})
@@ -82,7 +82,7 @@ foreach(LIB ${MKL_LIBS})
 endforeach()
 set(MKLDNN_GIT_REPO_URL https://github.com/intel/mkl-dnn)
-set(MKLDNN_GIT_TAG "0e7ca73")
+set(MKLDNN_GIT_TAG "4e33378")
 # The 'BUILD_BYPRODUCTS' argument was introduced in CMake 3.2.
 if(${CMAKE_VERSION} VERSION_LESS 3.2)
@@ -135,6 +135,7 @@ else()
        CMAKE_ARGS
            -DWITH_TEST=FALSE
            -DWITH_EXAMPLE=FALSE
+            -DMKLDNN_ENABLE_CONCURRENT_EXEC=TRUE
            -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
            -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
            -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}

--- a/src/ngraph/descriptor/layout/tensor_view_layout.cpp
+++ b/src/ngraph/descriptor/layout/tensor_view_layout.cpp
@@ -43,3 +43,8 @@ void descriptor::layout::TensorViewLayout::set_tensor_view_type(const element::T
    m_element_type = element_type;
    m_shape = shape;
 }
+size_t descriptor::layout::TensorViewLayout::get_allocated_size()
+{
+    return get_size() * get_element_type().size();
+}
--- a/src/ngraph/descriptor/layout/tensor_view_layout.hpp
+++ b/src/ngraph/descriptor/layout/tensor_view_layout.hpp
@@ -50,7 +50,7 @@ namespace ngraph
                ///
                /// When we support non-linear buffers, this will need to be something other than size_t.
                virtual size_t get_size() = 0;
+                virtual size_t get_allocated_size();
                /// Offset of an index; useful for slice implementation.
                ///
                /// With non-linear buffers, this will need to be something other than size_t.

--- a/src/ngraph/descriptor/tensor.cpp
+++ b/src/ngraph/descriptor/tensor.cpp
@@ -15,6 +15,7 @@
 *******************************************************************************/
 #include "ngraph/descriptor/tensor.hpp"
+#include "ngraph/descriptor/layout/tensor_view_layout.hpp"
 #include "ngraph/descriptor/primary_tensor_view.hpp"
 #include "ngraph/node.hpp"
@@ -29,12 +30,6 @@ descriptor::Tensor::Tensor(const element::Type& element_type,
    , m_name{name}
    , m_next_view_id{0}
 {
-    size_t size = 1;
-    for (size_t s : primary_tensor_view->get_tensor_view_type()->get_shape())
-    {
-        size *= s;
-    }
-    m_size = size * m_element_type.size();
 }
 string descriptor::Tensor::make_tensor_name(const Node* node, size_t value_index)
@@ -47,11 +42,6 @@ string descriptor::Tensor::get_next_view_name()
    return m_name + "_TV" + to_string(m_next_view_id++);
 }
-size_t descriptor::Tensor::size() const
-{
-    return m_size;
-}
 void descriptor::Tensor::set_pool_offset(size_t offset)
 {
    m_pool_offset = offset;
@@ -62,6 +52,19 @@ size_t descriptor::Tensor::get_pool_offset() const
    return m_pool_offset;
 }
+size_t descriptor::Tensor::size() const
+{
+    if (auto tvl = m_primary_tensor_view->get_tensor_view_layout())
+    {
+        return tvl->get_allocated_size();
+    }
+    else
+    {
+        auto tvt = m_primary_tensor_view->get_tensor_view_type();
+        return shape_size(tvt->get_shape()) * m_element_type.size();
+    }
+}
 void descriptor::Tensor::set_element_type(const element::Type& element_type)
 {
    m_element_type = element_type;

--- a/src/ngraph/descriptor/tensor.hpp
+++ b/src/ngraph/descriptor/tensor.hpp
@@ -49,24 +49,22 @@ private:
    Tensor(const element::Type& element_type,
           PrimaryTensorView* tensor_view,
           const std::string& name);
    std::string get_next_view_name();
 public:
    const std::string& get_name() const { return m_name; }
-    size_t size() const;
    void set_pool_offset(size_t);
+    size_t size() const;
    size_t get_pool_offset() const;
    const element::Type& get_element_type() const { return m_element_type; }
    void set_element_type(const element::Type& element_type);
    static std::string make_tensor_name(const Node* node, size_t value_index);
+    PrimaryTensorView* get_primary_tensor_view() const { return m_primary_tensor_view; }
 protected:
    element::Type m_element_type;
    PrimaryTensorView* m_primary_tensor_view;
    std::string m_name;
    size_t m_next_view_id;
-    size_t m_size;
    size_t m_pool_offset;
 };

--- a/src/ngraph/descriptor/tensor_view.hpp
+++ b/src/ngraph/descriptor/tensor_view.hpp
@@ -56,7 +56,6 @@ namespace ngraph
            virtual Tensor& get_tensor() = 0;
            virtual std::shared_ptr<const TensorViewType> get_value_type() const;
            const std::string& get_name() const { return m_name; }
            std::shared_ptr<const TensorViewType> get_tensor_view_type() const
            {

--- a/src/ngraph/runtime/cpu/cpu_layout_descriptor.cpp
+++ b/src/ngraph/runtime/cpu/cpu_layout_descriptor.cpp
@@ -17,7 +17,6 @@
 #include "cpu_layout_descriptor.hpp"
 #include <algorithm>
 #include <numeric>
 #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
 namespace ngraph
@@ -46,6 +45,9 @@ namespace ngraph
                    s *= shape[shape.size() - (i + 1)];
                }
                std::reverse(m_strides.begin(), m_strides.end());
+                auto tvt = tv.get_tensor_view_type();
+                m_mkldnn_memory_size =
+                    shape_size(tvt->get_shape()) * tvt->get_element_type().size();
            }
            size_t LayoutDescriptor::get_index_offset(const std::vector<size_t>& indices)
@@ -98,6 +100,27 @@ namespace ngraph
                return true;
            }
+            void LayoutDescriptor::set_mkldnn_md(const mkldnn::memory::desc md)
+            {
+                m_mkldnn_md = md;
+                // Since MKLDNN could internally pad the tensor to make blocked layouts
+                // we need to compute MKLDNN memory requirement based on its memory desc
+                // http://intel.github.io/mkl-dnn/understanding_memory_formats.html
+                try
+                {
+                    auto mem_prim_desc =
+                        mkldnn::memory::primitive_desc(md, mkldnn_utils::global_cpu_engine);
+                    m_mkldnn_memory_size = mem_prim_desc.get_size();
+                }
+                catch (const mkldnn::error& e)
+                {
+                    throw ngraph_error(
+                        "error in computing mkldnn memory size from memory primitive desc: " +
+                        e.message);
+                }
+            }
        }
    }
 }
--- a/src/ngraph/runtime/cpu/cpu_layout_descriptor.hpp
+++ b/src/ngraph/runtime/cpu/cpu_layout_descriptor.hpp
@@ -39,6 +39,7 @@ namespace ngraph
                LayoutDescriptor(const ngraph::descriptor::TensorView& tv);
                ~LayoutDescriptor() override {}
                size_t get_size() override { return m_size; }
+                virtual size_t get_allocated_size() override { return m_mkldnn_memory_size; }
                size_t get_offset() const { return m_offset; }
                size_t get_index_offset(const std::vector<size_t>& indices) override;
@@ -47,7 +48,7 @@ namespace ngraph
                bool operator==(const TensorViewLayout& other) const override;
                const mkldnn::memory::desc& get_mkldnn_md() const { return m_mkldnn_md; }
-                void set_mkldnn_md(const mkldnn::memory::desc md) { m_mkldnn_md = md; }
+                void set_mkldnn_md(const mkldnn::memory::desc md);
                bool is_mkldnn_layout() const
                {
                    return m_mkldnn_md.data.format != mkldnn::memory::format::format_undef;
@@ -66,6 +67,7 @@ namespace ngraph
                // Otherwise, physical layout is assumed to be in row-major
                // format represented by m_strides
                mkldnn::memory::desc m_mkldnn_md;
+                size_t m_mkldnn_memory_size;
            };
            typedef std::vector<std::shared_ptr<ngraph::runtime::cpu::LayoutDescriptor>>

--- a/src/ngraph/runtime/cpu/pass/cpu_layout.cpp
+++ b/src/ngraph/runtime/cpu/pass/cpu_layout.cpp
@@ -103,7 +103,6 @@ shared_ptr<Node> runtime::cpu::pass::CPULayout::insert_input_conversions(
        {
            auto layout = std::make_shared<ngraph::runtime::cpu::LayoutDescriptor>(*tv);
            layout->set_mkldnn_md(required_mds[index]);
            auto new_node = std::shared_ptr<Node>(
                new runtime::cpu::op::ConvertLayout(output.get_node(), output.get_index(), layout));
            new_args.push_back(new_node);
@@ -186,9 +185,7 @@ void runtime::cpu::pass::CPULayout::set_native_layouts(
            if (!mkldnn_utils::compare_mkldnn_mds(cpu_tvl->get_mkldnn_md(), native_md))
            {
                auto layout = std::make_shared<ngraph::runtime::cpu::LayoutDescriptor>(*tv);
                layout->set_mkldnn_md(native_md);
                auto new_node = std::shared_ptr<Node>(new runtime::cpu::op::ConvertLayout(
                    output.get_node(), output.get_index(), layout));
                new_args.push_back(new_node);