CPU Direct Execution: Implement Slice (#1356)

a2ba381d · Jaikrishnan Menon · Scott Cyphers · ec45be4b · a2ba381d · a2ba381d
Commit a2ba381d authored Aug 08, 2018 by Jaikrishnan Menon Committed by Scott Cyphers Aug 08, 2018
Showing with 196 additions and 0 deletions

CMakeLists.txt src/ngraph/runtime/cpu/CMakeLists.txt +1 -0

slice.cpp src/ngraph/runtime/cpu/builder/slice.cpp +104 -0

slice.hpp src/ngraph/runtime/cpu/kernel/slice.hpp +91 -0

No files found.
--- a/src/ngraph/runtime/cpu/CMakeLists.txt
+++ b/src/ngraph/runtime/cpu/CMakeLists.txt
@@ -52,6 +52,7 @@ set(SRC
    builder/select.cpp
    builder/select_and_scatter.cpp
    builder/sigmoid.cpp
+    builder/slice.cpp
    builder/softmax.cpp
    builder/sum.cpp
    kernel/eigen_thread_pool.cpp

--- a/src/ngraph/runtime/cpu/builder/slice.cpp
+++ b/src/ngraph/runtime/cpu/builder/slice.cpp
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <cstring>
+
+#include "ngraph/op/slice.hpp"
+#include "ngraph/runtime/cpu/cpu_builder.hpp"
+#include "ngraph/runtime/cpu/kernel/slice.hpp"
+#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
+#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            template <>
+            void Builder::BUILDER_DECL(ngraph::op::Slice)
+            {
+                auto& functors = external_function->get_functors();
+                auto& tensor_data = external_function->get_tensor_data();
+
+                auto& arg_tensor = tensor_data[args[0].get_name()];
+                auto& out_tensor = tensor_data[out[0].get_name()];
+
+                const ngraph::op::Slice* slice = static_cast<const ngraph::op::Slice*>(node);
+
+                auto arg_shape = args[0].get_shape();
+                auto out_shape = out[0].get_shape();
+
+                auto strides = slice->get_strides();
+                auto lower_bounds = slice->get_lower_bounds();
+                auto upper_bounds = slice->get_upper_bounds();
+
+                bool strided = false;
+                for (auto stride : strides)
+                {
+                    if (stride != 1)
+                    {
+                        strided = true;
+                        break;
+                    }
+                }
+
+                if (strided)
+                {
+                    std::function<decltype(runtime::cpu::kernel::strided_slice<float, 2>)> kernel;
+
+                    SELECT_KERNEL_BY_RANK(kernel,
+                                          args[0].get_element_type(),
+                                          arg_shape.size(),
+                                          runtime::cpu::kernel::strided_slice);
+
+                    auto functor =
+                        [&, kernel, arg_shape, out_shape, lower_bounds, upper_bounds, strides](
+                            CPURuntimeContext* ctx) {
+                            kernel(arg_tensor,
+                                   out_tensor,
+                                   arg_shape,
+                                   out_shape,
+                                   lower_bounds,
+                                   upper_bounds,
+                                   strides);
+                        };
+                    functors.emplace_back(functor);
+                }
+                else
+                {
+                    std::function<decltype(runtime::cpu::kernel::slice<float, 2>)> kernel;
+
+                    SELECT_KERNEL_BY_RANK(kernel,
+                                          args[0].get_element_type(),
+                                          arg_shape.size(),
+                                          runtime::cpu::kernel::slice);
+
+                    auto functor =
+                        [&, kernel, arg_shape, out_shape, lower_bounds](CPURuntimeContext* ctx) {
+                            kernel(arg_tensor, out_tensor, arg_shape, out_shape, lower_bounds);
+                        };
+                    functors.emplace_back(functor);
+                }
+            }
+
+            REGISTER_OP_BUILDER(Slice);
+        }
+    }
+}
--- a/src/ngraph/runtime/cpu/kernel/slice.hpp
+++ b/src/ngraph/runtime/cpu/kernel/slice.hpp
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#define EIGEN_USE_THREADS
+#include <unsupported/Eigen/CXX11/Tensor>
+
+#include "ngraph/coordinate.hpp"
+#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
+#include "ngraph/shape.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            namespace kernel
+            {
+                template <typename ElementType, unsigned int Rank>
+                void slice(void* input,
+                           void* output,
+                           const Shape& input_shape,
+                           const Shape& output_shape,
+                           const Coordinate& lower_bounds)
+                {
+                    Eigen::array<Eigen::Index, Rank> out_dims, in_dims;
+                    Eigen::array<Eigen::Index, Rank> indices;
+
+                    for (int i = 0; i < Rank; i++)
+                    {
+                        out_dims[i] = output_shape[i];
+                        in_dims[i] = input_shape[i];
+                        indices[i] = lower_bounds[i];
+                    }
+
+                    Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> out(
+                        static_cast<ElementType*>(output), out_dims);
+                    Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(
+                        static_cast<ElementType*>(input), in_dims);
+
+                    out.device(eigen::global_thread_pool_device) = in.slice(indices, out_dims);
+                }
+
+                template <typename ElementType, unsigned int Rank>
+                void strided_slice(void* input,
+                                   void* output,
+                                   const Shape& input_shape,
+                                   const Shape& output_shape,
+                                   const Coordinate& lower_bounds,
+                                   const Coordinate& upper_bounds,
+                                   const Strides& slice_strides)
+                {
+                    Eigen::array<Eigen::Index, Rank> out_dims, in_dims;
+                    Eigen::array<Eigen::Index, Rank> start_indices, stop_indices, strides;
+
+                    for (int i = 0; i < Rank; i++)
+                    {
+                        out_dims[i] = output_shape[i];
+                        in_dims[i] = input_shape[i];
+                        start_indices[i] = lower_bounds[i];
+                        stop_indices[i] = upper_bounds[i];
+                        strides[i] = slice_strides[i];
+                    }
+
+                    Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> out(
+                        static_cast<ElementType*>(output), out_dims);
+                    Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(
+                        static_cast<ElementType*>(input), in_dims);
+
+                    out.device(eigen::global_thread_pool_device) =
+                        in.stridedSlice(start_indices, stop_indices, strides);
+                }
+            }
+        }
+    }
+}