IntelGPU backend: Slice operation (#1304)

7d6a41f3 · shssf · Robert Kimball · c38c76a7 · 7d6a41f3 · 7d6a41f3
Commit 7d6a41f3 authored Aug 03, 2018 by shssf Committed by Robert Kimball Aug 03, 2018
4 changed files
--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
@@ -46,6 +46,7 @@
 #include "ngraph/op/max_pool.hpp"
 #include "ngraph/op/pad.hpp"
 #include "ngraph/op/reshape.hpp"
+#include "ngraph/op/slice.hpp"
 #include "ngraph/op/sum.hpp"
 #include "ngraph/util.hpp"

@@ -186,6 +187,39 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)

            do_equal_propagation(topology, input_name, output_name);
        }
+        else if ("Slice" == op->description())
+        {
+            arguments_check(op, 1, 1);
+
+            const string& input_name = op->get_inputs().begin()->get_tensor().get_name();
+            const Shape& input_shape = op->get_inputs().begin()->get_shape();
+            const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
+            const Shape& output_shape = op->get_outputs().begin()->get_shape();
+            const element::Type& output_type =
+                op->get_outputs().begin()->get_tensor().get_element_type();
+            const shared_ptr<op::Slice> elem = static_pointer_cast<op::Slice>(op);
+            const Coordinate& lower_bounds = elem->get_lower_bounds();
+            const Coordinate& upper_bounds = elem->get_upper_bounds();
+            const Strides& strides = elem->get_strides();
+
+            if (input_shape.empty() || output_shape.empty() || lower_bounds.empty() ||
+                upper_bounds.empty() || strides.empty())
+            {
+                do_equal_propagation(topology, input_name, output_name);
+            }
+            else
+            {
+                do_slice_operation(topology,
+                                   input_name,
+                                   input_shape,
+                                   output_name,
+                                   output_shape,
+                                   output_type,
+                                   lower_bounds,
+                                   upper_bounds,
+                                   strides);
+            }
+        }
        else if ("Add" == op->description())
        {
            do_eltwise_operation(topology, op, cldnn::eltwise_mode::sum);

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
@@ -27,6 +27,7 @@
 using namespace std;
 using namespace ngraph;

+static vector<cldnn_arg> parameters_1inp_1out = {{arg_input, 0}, {arg_output, 0}};
 static vector<cldnn_arg> parameters_2inp_1out = {{arg_input, 0}, {arg_input, 1}, {arg_output, 0}};

 static string array_dims(const Shape& dimentions)
@@ -58,16 +59,22 @@ static string access_dims(const Shape& dimentions, const AxisSet& axis = {})
    return buffer;
 }

-static string
-    access_dims_strided(const Shape& dimentions, const Shape& pad_below, const Shape& pad_interior)
+static string access_dims_strided(const Shape& dimentions,
+                                  const Shape& pad_below,
+                                  const Shape& pad_interior,
+                                  bool is_pad_interior)
 {
    string buffer;
    size_t var_idx = 0;

    for (auto const& i : dimentions)
    {
-        buffer += "[i" + to_string(var_idx) + " * (" + to_string(pad_interior.at(var_idx)) +
-                  " + 1) + " + to_string(pad_below.at(var_idx)) + "]";
+        buffer += "[i" + to_string(var_idx) + " * (" + to_string(pad_interior.at(var_idx));
+        if (is_pad_interior)
+        {
+            buffer += " + 1";
+        }
+        buffer += ") + " + to_string(pad_below.at(var_idx)) + "]";
        ++var_idx;
    }

@@ -130,7 +137,7 @@ void runtime::intelgpu::do_pad_operation(cldnn::topology& topology,
            ++var_idx;
        }

-        writer << "output" << access_dims_strided(input_shape, pad_below, pad_interior)
+        writer << "output" << access_dims_strided(input_shape, pad_below, pad_interior, true)
               << " = input" << access_dims(input_shape) << ";\n";

        // Closing brackets for main Copy loop
@@ -446,3 +453,53 @@ void runtime::intelgpu::do_dot_operation(cldnn::topology& topology,
                                             layout);
    topology.add(op_dot);
 }
+
+void runtime::intelgpu::do_slice_operation(cldnn::topology& topology,
+                                           const string& input_name,
+                                           const Shape& input_shape,
+                                           const string& output_name,
+                                           const Shape& output_shape,
+                                           const element::Type& output_type,
+                                           const Coordinate& lower_bounds,
+                                           const Coordinate& uppper_bounds,
+                                           const Strides& strides)
+{
+    const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
+    string entry_point_name = "slice_unknown";
+    codegen::CodeWriter writer;
+
+    writer << "__kernel void " << entry_point_name << "(const __global float input"
+           << array_dims(input_shape) << ", __global float output" << array_dims(output_shape)
+           << ")\n";
+    writer.block_begin();
+    {
+        size_t var_idx = 0;
+        // Main loops
+        for (auto const& i : output_shape)
+        {
+            writer << "for (uint i" << var_idx << " = 0; i" << var_idx << " < " << i << "; ++i"
+                   << var_idx << ")\n";
+            writer.block_begin();
+            ++var_idx;
+        }
+
+        writer << "output" << access_dims(output_shape) << " = input"
+               << access_dims_strided(input_shape, lower_bounds, strides, false) << ";\n";
+
+        // Closing brackets for main loops
+        for (auto const& i : output_shape)
+        {
+            writer.block_end();
+        }
+    }
+    writer.block_end();
+
+    const cldnn::custom_gpu_primitive op_slice(output_name,
+                                               {input_name},
+                                               {writer.get_code()},
+                                               entry_point_name,
+                                               parameters_1inp_1out,
+                                               "",
+                                               layout);
+    topology.add(op_slice);
+}
--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
@@ -19,7 +19,9 @@
 #include <CPP/topology.hpp>

 #include "ngraph/axis_set.hpp"
+#include "ngraph/coordinate.hpp"
 #include "ngraph/shape.hpp"
+#include "ngraph/strides.hpp"
 #include "ngraph/type/element_type.hpp"

 namespace ngraph
@@ -46,6 +48,16 @@ namespace ngraph
                                  const std::string& output_name,
                                  const Shape& output_shape,
                                  const element::Type& output_type);
+
+            void do_slice_operation(cldnn::topology& topology,
+                                    const std::string& input_name,
+                                    const Shape& input_shape,
+                                    const std::string& output_name,
+                                    const Shape& output_shape,
+                                    const element::Type& output_type,
+                                    const Coordinate& lower_bounds,
+                                    const Coordinate& uppper_bounds,
+                                    const Strides& strides);
        }
    }
 }
--- a/src/ngraph/runtime/intelgpu/unit_test.manifest
+++ b/src/ngraph/runtime/intelgpu/unit_test.manifest
@@ -49,7 +49,6 @@ backwards_maxpool_n4c1h4w4_kh2kw2_sh1sw1
 backwards_maxpool_n4_c1_hw4_2x2_max
 backwards_minimum
 backwards_power
-backwards_relu
 backwards_replace_slice
 backwards_reverse_3d_02
 backwards_reverse_sequence_n3_c2_h3
@@ -65,23 +64,12 @@ backwards_softmax_3d
 backwards_softmax_all
 backwards_softmax_axis
 backwards_softmax_underflow
-backwards_sum_m2s
-backwards_sum_m2v_0
-backwards_sum_m2v_1
-backwards_sum_v2s
 backwards_tan
 batchnorm_bprop_n4c3h2w2
 batchnorm_fprop_b1c2h2w2
 batchnorm_fprop_b2c2h2w1
 batch_norm_one_output
 batch_norm_three_outputs
-broadcast_algo_3d_stride_1
-broadcast_algo_3d_stride_2
-broadcast_algo_matrix_stride_1
-broadcast_algo_matrix_stride_2
-broadcast_algo_matrix_stride_3
-broadcast_algo_vector_middle
-broadcast_matrix_1
 broadcast_vector_rowwise_int64
 broadcast_vector_rowwise_reversed
 ceiling
@@ -149,8 +137,6 @@ floor
 function_call
 greater
 greatereq
-kahan_sum_3d_to_vector
-kahan_sum_to_scalar
 less
 lesseq
 lesseq_bool
@@ -209,7 +195,6 @@ pad_exterior_2d_0x0
 pad_exterior_2d_0x3
 pad_exterior_2d_3x0
 pad_interior_exterior_4d_2x0x3x2
-power
 product_3d_eliminate_zero_dim
 product_3d_to_matrix_least_sig
 product_3d_to_matrix_most_sig
@@ -238,8 +223,6 @@ reduce_window_emulating_max_pool_1d_1channel_2image
 reduce_window_emulating_max_pool_1d_2channel_2image
 reduce_window_emulating_max_pool_2d_1channel_1image_strided
 reduce_window_emulating_max_pool_2d_2channel_2image
-relu_2Dbackprop
-relu_4Dbackprop
 replace_slice_3d
 replace_slice_3d_strided
 replace_slice_3d_strided_different_strides
@@ -271,18 +254,9 @@ select_and_scatter_3d_without_overlap
 select_and_scatter_without_overlap
 select_and_scatter_with_overlap
 sigmoid_bprop_n1c1h4
-sigmoid_n1c1h2w2
-sigmoid_n1c1h4
 sign
 sin
 sinh
-slice_3d
-slice_3d_strided
-slice_3d_strided_different_strides
-slice_matrix
-slice_matrix_strided
-slice_scalar
-slice_vector
 softmax_all
 softmax_axis
 softmax_axis_2
@@ -290,19 +264,9 @@ softmax_axis_3d
 softmax_axis_3d_trivial
 softmax_underflow
 sum_3d_eliminate_zero_dim
-sum_3d_to_matrix_least_sig
-sum_3d_to_matrix_most_sig
-sum_3d_to_scalar
-sum_3d_to_vector
-sum_5d_to_scalar
 sum_matrix_cols_zero
-sum_matrix_columns
-sum_matrix_rows
 sum_matrix_rows_zero
 sum_matrix_to_scalar_zero_by_zero
-sum_to_scalar
-sum_trivial
-sum_trivial_5d
 sum_vector_zero
 tan
 tensor_constant_int64