DEX Part 3 (#1184)

* CPU Direct Execution: Implement ConvertLayout and refactor * CPU Direct Execution: Implement Convolution

DEX Part 3 (#1184)
* CPU Direct Execution: Implement ConvertLayout and refactor * CPU Direct Execution: Implement Convolution
d37fa712 · Jaikrishnan Menon · Scott Cyphers · 4cd2c602 · d37fa712 · d37fa712
Commit d37fa712 authored Jul 11, 2018 by Jaikrishnan Menon Committed by Scott Cyphers Jul 11, 2018
6 changed files
--- a/src/ngraph/runtime/cpu/CMakeLists.txt
+++ b/src/ngraph/runtime/cpu/CMakeLists.txt
@@ -27,6 +27,8 @@ set(SRC
    cpu_tensor_view_wrapper.cpp
    cpu_tensor_view.cpp
    cpu_tracing.cpp
+    builder/convert_layout.cpp
+    builder/convolution.cpp
    kernel/eigen_thread_pool.cpp
    kernel/pad.cpp
    kernel/reduce_max.cpp

--- a/src/ngraph/runtime/cpu/builder/convert_layout.cpp
+++ b/src/ngraph/runtime/cpu/builder/convert_layout.cpp
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "ngraph/runtime/cpu/op/convert_layout.hpp"
+#include "ngraph/runtime/cpu/cpu_builder.hpp"
+#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
+#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            template <>
+            void Builder::BUILDER_DECL(ngraph::runtime::cpu::op::ConvertLayout)
+            {
+                auto& functors = external_function->get_functors();
+                auto& tensor_data = external_function->get_tensor_data();
+
+                auto& arg_tensor = tensor_data[args[0].get_name()];
+                auto& out_tensor = tensor_data[out[0].get_name()];
+
+                auto input_tvl =
+                    node->get_inputs()[0].get_output().get_tensor_view()->get_tensor_view_layout();
+                auto input_cpu_tvl =
+                    dynamic_pointer_cast<runtime::cpu::LayoutDescriptor>(input_tvl);
+                auto input_format = input_cpu_tvl->get_mkldnn_format();
+
+                // Reorder input shape if needed
+                auto input_axis_order = input_cpu_tvl->get_axis_order();
+                Shape input_shape(input_axis_order.size());
+                for (size_t idx = 0; idx < input_axis_order.size(); idx++)
+                {
+                    input_shape[idx] = args[0].get_shape()[input_axis_order[idx]];
+                }
+
+                auto output_tvl = node->get_output_tensor_view(0)->get_tensor_view_layout();
+                auto output_format =
+                    dynamic_cast<runtime::cpu::LayoutDescriptor&>(*output_tvl).get_mkldnn_format();
+
+                // MKLDNN relies on format names for selecting optimized kernel implementations
+                // Hacky way to deal with this until they move to using canonicalized layouts
+                if (input_format == mkldnn::memory::format::nchw &&
+                    runtime::cpu::mkldnn_utils::is_mkldnn_filter_format(output_format))
+                {
+                    input_format = mkldnn::memory::format::oihw;
+                }
+                if (output_format == mkldnn::memory::format::nchw &&
+                    runtime::cpu::mkldnn_utils::is_mkldnn_filter_format(input_format))
+                {
+                    output_format = mkldnn::memory::format::oihw;
+                }
+
+                auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
+
+                auto input_desc = mkldnn_emitter->build_memory_descriptor(
+                    input_shape, args[0].get_element_type(), input_format);
+                auto result_desc = mkldnn_emitter->build_memory_descriptor(out[0], output_format);
+
+                size_t reorder_index = mkldnn_emitter->build_reorder(input_desc, result_desc);
+
+                auto& deps = mkldnn_emitter->get_primitive_deps(reorder_index);
+                auto functor = [&, reorder_index](CPURuntimeContext* ctx) {
+                    cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
+                    cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
+                    cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, reorder_index);
+                };
+                functors.emplace_back(functor);
+            }
+        }
+    }
+}
--- a/src/ngraph/runtime/cpu/builder/convolution.cpp
+++ b/src/ngraph/runtime/cpu/builder/convolution.cpp
--- a/src/ngraph/runtime/cpu/cpu_builder.cpp
+++ b/src/ngraph/runtime/cpu/cpu_builder.cpp
@@ -98,7 +98,6 @@
 #include "ngraph/runtime/cpu/kernel/multiply.hpp"
 #include "ngraph/runtime/cpu/kernel/relu.hpp"
 #include "ngraph/runtime/cpu/kernel/result.hpp"
-#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
 #include "ngraph/runtime/cpu/op/batch_norm_relu.hpp"
 #include "ngraph/runtime/cpu/op/conv_bias.hpp"
 #include "ngraph/runtime/cpu/op/conv_relu.hpp"
@@ -119,53 +118,6 @@
 using namespace std;
 using namespace ngraph;

-// Per-type kernel macro
-#define SELECT_KERNEL(KV, ET, K)                                                                   \
-    if (ET == element::boolean)                                                                    \
-    {                                                                                              \
-        KV = K<char>;                                                                              \
-    }                                                                                              \
-    else if (ET == element::f32)                                                                   \
-    {                                                                                              \
-        KV = K<float>;                                                                             \
-    }                                                                                              \
-    else if (ET == element::f64)                                                                   \
-    {                                                                                              \
-        KV = K<double>;                                                                            \
-    }                                                                                              \
-    else if (ET == element::i8)                                                                    \
-    {                                                                                              \
-        KV = K<int8_t>;                                                                            \
-    }                                                                                              \
-    else if (ET == element::i16)                                                                   \
-    {                                                                                              \
-        KV = K<int16_t>;                                                                           \
-    }                                                                                              \
-    else if (ET == element::i32)                                                                   \
-    {                                                                                              \
-        KV = K<int32_t>;                                                                           \
-    }                                                                                              \
-    else if (ET == element::i64)                                                                   \
-    {                                                                                              \
-        KV = K<int64_t>;                                                                           \
-    }                                                                                              \
-    else if (ET == element::u8)                                                                    \
-    {                                                                                              \
-        KV = K<uint8_t>;                                                                           \
-    }                                                                                              \
-    else if (ET == element::u16)                                                                   \
-    {                                                                                              \
-        KV = K<uint16_t>;                                                                          \
-    }                                                                                              \
-    else if (ET == element::u32)                                                                   \
-    {                                                                                              \
-        KV = K<uint32_t>;                                                                          \
-    }                                                                                              \
-    else if (ET == element::u64)                                                                   \
-    {                                                                                              \
-        KV = K<uint64_t>;                                                                          \
-    }
-
 #define BUILD_UNARY_ELEMWISE_FUNCTOR(OP)                                                           \
    auto& functors = external_function->get_functors();                                            \
    auto& tensor_data = external_function->get_tensor_data();                                      \
@@ -419,6 +371,14 @@ namespace ngraph
                {TI(ngraph::op::Parameter), &runtime::cpu::Builder::nop},
                {TI(ngraph::op::Abs), &runtime::cpu::Builder::build<ngraph::op::Abs>},
                {TI(ngraph::op::Ceiling), &runtime::cpu::Builder::build<ngraph::op::Ceiling>},
+                {TI(ngraph::runtime::cpu::op::ConvertLayout),
+                 &runtime::cpu::Builder::build<ngraph::runtime::cpu::op::ConvertLayout>},
+                {TI(ngraph::op::Convolution),
+                 &runtime::cpu::Builder::build<ngraph::op::Convolution>},
+                {TI(ngraph::op::ConvolutionBackpropData),
+                 &runtime::cpu::Builder::build<ngraph::op::ConvolutionBackpropData>},
+                {TI(ngraph::op::ConvolutionBackpropFilters),
+                 &runtime::cpu::Builder::build<ngraph::op::ConvolutionBackpropFilters>},
                {TI(ngraph::op::Relu), &runtime::cpu::Builder::build<ngraph::op::Relu>},
                {TI(ngraph::op::Result), &runtime::cpu::Builder::build<ngraph::op::Result>},
                {TI(ngraph::op::MatmulBias), &runtime::cpu::Builder::build<ngraph::op::MatmulBias>},

--- a/src/ngraph/runtime/cpu/cpu_builder.hpp
+++ b/src/ngraph/runtime/cpu/cpu_builder.hpp
--- a/src/ngraph/runtime/cpu/kernel/convolution.hpp
+++ b/src/ngraph/runtime/cpu/kernel/convolution.hpp
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include "ngraph/runtime/reference/convolution.hpp"
+#include "ngraph/shape.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            namespace kernel
+            {
+                template <typename ElementType>
+                void convolution(void* input0,
+                                 void* input1,
+                                 void* output,
+                                 const Shape& arg0_shape,
+                                 const Shape& arg1_shape,
+                                 const Shape& result_shape,
+                                 const Strides& window_movement_strides,
+                                 const Strides& window_dilation_strides,
+                                 const CoordinateDiff& padding_below,
+                                 const CoordinateDiff& padding_above,
+                                 const Strides& data_dilation_strides,
+                                 size_t batch_axis_data,
+                                 size_t input_channel_axis_data,
+                                 size_t input_channel_axis_filters,
+                                 size_t output_channel_axis_filters,
+                                 size_t batch_axis_result,
+                                 size_t output_channel_axis_result,
+                                 bool rotate_filter)
+                {
+                    reference::convolution<ElementType>(static_cast<const ElementType*>(input0),
+                                                        static_cast<const ElementType*>(input1),
+                                                        static_cast<ElementType*>(output),
+                                                        arg0_shape,
+                                                        arg1_shape,
+                                                        result_shape,
+                                                        window_movement_strides,
+                                                        window_dilation_strides,
+                                                        padding_below,
+                                                        padding_above,
+                                                        data_dilation_strides,
+                                                        batch_axis_data,
+                                                        input_channel_axis_data,
+                                                        input_channel_axis_filters,
+                                                        output_channel_axis_filters,
+                                                        batch_axis_result,
+                                                        output_channel_axis_result,
+                                                        rotate_filter);
+                }
+            }
+        }
+    }
+}