IntelGPU backend: Reshape operation 5D support implemented (#2433)

4fd8228d · Sergey Shalnov · Scott Cyphers · b78321c6 · 4fd8228d · 4fd8228d
Commit 4fd8228d authored Feb 13, 2019 by Sergey Shalnov Committed by Scott Cyphers Feb 13, 2019
5 changed files
--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
@@ -931,35 +931,51 @@ shared_ptr<runtime::Executable>
            arguments_check(op, 1, 1);
            const shared_ptr<op::Reshape> op_reshape = static_pointer_cast<op::Reshape>(op);
+            const AxisVector& reshape_axes = op_reshape->get_input_order();
-            if (op_reshape->get_is_transpose())
+            if ((get_input_type(op) != element::f32) || (get_input_shape(op).size() > 4) ||
+                (get_output_shape(op).size() > 4))
            {
-                vector<uint16_t> permute_order({0, 1, 2, 3}); // No action by default
+                do_reshape_operation(topology,
-                const AxisVector& reshape_axes = op_reshape->get_input_order();
+                                     get_input_name(op),
-                const size_t max_dim = 4;
+                                     get_input_shape(op),
-                const size_t scale =
+                                     get_input_type(op),
-                    reshape_axes.size() < max_dim ? max_dim - reshape_axes.size() : 0;
+                                     get_output_name(op),
+                                     get_output_shape(op),
-                // Need to scale indexes up according on array rank.
+                                     get_output_type(op),
-                // For example, in 2D array, indexes are 0,1 but in 4D array it should be 2,3
+                                     reshape_axes);
-                // because cldnn::tensor is always 4D assuming cldnn::bfyx model
-                size_t rindex = max_dim;
-                for (auto i = reshape_axes.crbegin(); i != reshape_axes.crend() && rindex > 0;
-                     ++i, --rindex)
-                {
-                    permute_order.at(rindex - 1) = *i + scale;
-                }
-                const cldnn::permute cldnn_permute(
-                    get_output_name(op), get_input_name(op), permute_order);
-                topology.add(cldnn_permute);
            }
            else
            {
-                const cldnn::tensor new_shape =
+                if (op_reshape->get_is_transpose())
-                    intelgpu_space::create_cldnn_tensor(get_output_shape(op));
+                {
-                const cldnn::reshape reshape_op(get_output_name(op), get_input_name(op), new_shape);
+                    vector<uint16_t> permute_order({0, 1, 2, 3}); // No action by default
-                topology.add(reshape_op);
+                    const size_t max_dim = 4;
+                    const size_t scale =
+                        reshape_axes.size() < max_dim ? max_dim - reshape_axes.size() : 0;
+                    // Need to scale indexes up according on array rank.
+                    // For example, in 2D array, indexes are 0,1 but in 4D array it should be 2,3
+                    // because cldnn::tensor is always 4D assuming cldnn::bfyx model
+                    size_t rindex = max_dim;
+                    for (auto i = reshape_axes.crbegin(); i != reshape_axes.crend() && rindex > 0;
+                         ++i, --rindex)
+                    {
+                        permute_order.at(rindex - 1) = *i + scale;
+                    }
+                    const cldnn::permute cldnn_permute(
+                        get_output_name(op), get_input_name(op), permute_order);
+                    topology.add(cldnn_permute);
+                }
+                else
+                {
+                    const cldnn::tensor new_shape =
+                        intelgpu_space::create_cldnn_tensor(get_output_shape(op));
+                    const cldnn::reshape reshape_op(
+                        get_output_name(op), get_input_name(op), new_shape);
+                    topology.add(reshape_op);
+                }
            }
            break;
        }

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
@@ -1453,3 +1453,63 @@ void runtime::intelgpu::do_negative_operation(cldnn::topology& topology,
                                                  gws);
    topology.add(op_negative);
 }
+void runtime::intelgpu::do_reshape_operation(cldnn::topology& topology,
+                                             const string& input_name,
+                                             const Shape& input_shape,
+                                             const element::Type& input_type,
+                                             const string& output_name,
+                                             const Shape& output_shape,
+                                             const element::Type& output_type,
+                                             const AxisVector& reshape_axes)
+{
+    const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
+    const string entry_point_name = "reshape_" + output_name;
+    const string& input_type_name = get_opencl_type_name(input_type);
+    const string& output_type_name = get_opencl_type_name(output_type);
+    const size_t dst_shape_size = shape_size(output_shape);
+    codegen::CodeWriter writer;
+    gen_func_def(writer,
+                 entry_point_name,
+                 {input_type_name},
+                 {input_shape},
+                 output_type_name,
+                 {dst_shape_size});
+    writer.block_begin();
+    {
+        writer << "// input: " << input_shape << "\n";
+        writer << "//output: " << output_shape << "\n";
+        writer << "//axes: " << reshape_axes << "\n\n";
+        writer << "uint output_it = 0;\n";
+        // Main operation loop
+        for (auto const i : reshape_axes)
+        {
+            writer << "for (uint i" << i << " = 0; i" << i << " < " << input_shape.at(i) << "; ++i"
+                   << i << ")\n";
+            writer.block_begin();
+        }
+        writer << "output[output_it] = input0" << access_dims(input_shape) << ";\n"
+               << "++output_it;\n";
+        // Closing brackets for loop
+        for (auto const i : reshape_axes)
+        {
+            writer.block_end();
+        }
+    }
+    writer.block_end();
+    const cldnn::custom_gpu_primitive op_reshape(output_name,
+                                                 {input_name},
+                                                 {writer.get_code()},
+                                                 entry_point_name,
+                                                 get_kernel_args(1, 1),
+                                                 "",
+                                                 layout,
+                                                 {1});
+    topology.add(op_reshape);
+}
--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
@@ -21,6 +21,7 @@
 #include "ngraph/runtime/intelgpu/code_writer.hpp"
 #include "ngraph/axis_set.hpp"
+#include "ngraph/axis_vector.hpp"
 #include "ngraph/coordinate.hpp"
 #include "ngraph/shape.hpp"
 #include "ngraph/strides.hpp"
@@ -195,6 +196,15 @@ namespace ngraph
                                       const Shape& output_shape,
                                       const element::Type& output_type);
+            void do_reshape_operation(cldnn::topology& topology,
+                                      const std::string& input_name,
+                                      const Shape& input_shape,
+                                      const element::Type& input_type,
+                                      const std::string& output_name,
+                                      const Shape& output_shape,
+                                      const element::Type& output_type,
+                                      const AxisVector& reshape_axes);
            // Helper functions used in cldnn::custom_gpu_primitive kernels
            std::string get_opencl_type_name(const element::Type& ngraph_type);
            std::string get_opencl_type_min_max_value(const element::Type& ngraph_type,

--- a/src/ngraph/runtime/intelgpu/unit_test.manifest
+++ b/src/ngraph/runtime/intelgpu/unit_test.manifest
@@ -61,7 +61,6 @@ replace_slice_matrix
 replace_slice_matrix_inplace
 replace_slice_scalar
 replace_slice_vector
-reshape_6d
 reverse_sequence_n2c3h4w2
 reverse_sequence_n4c3h2w2
 reverse_sequence_n4d2c3h2w2

--- a/test/backend_reshape.in.cpp
+++ b/test/backend_reshape.in.cpp
@@ -256,18 +256,15 @@ NGRAPH_TEST(${BACKEND_NAME}, reshape_m2m_dim_change_transpose)
 NGRAPH_TEST(${BACKEND_NAME}, reshape_3d_transpose_021)
 {
-    vector<float> a_data(2 * 3 * 4);
-    for (int i = 0; i < 2 * 3 * 4; i++)
-    {
-        a_data[i] = float(i + 1);
-    }
    Shape shape_a{2, 3, 4};
    Shape shape_r{2, 4, 3};
    auto A = make_shared<op::Parameter>(element::f32, shape_a);
    auto r = make_shared<op::Reshape>(A, AxisVector{0, 2, 1}, shape_r);
    auto f = make_shared<Function>(r, ParameterVector{A});
+    vector<float> a_data(shape_size(shape_a));
+    iota(a_data.begin(), a_data.end(), 1);
    auto backend = runtime::Backend::create("${BACKEND_NAME}");
    // Create some tensors for input/output
@@ -284,18 +281,15 @@ NGRAPH_TEST(${BACKEND_NAME}, reshape_3d_transpose_021)
 NGRAPH_TEST(${BACKEND_NAME}, reshape_3d_transpose_210)
 {
-    vector<float> a_data(2 * 3 * 4);
-    for (int i = 0; i < 2 * 3 * 4; i++)
-    {
-        a_data[i] = float(i + 1);
-    }
    Shape shape_a{2, 3, 4};
    Shape shape_r{4, 3, 2};
    auto A = make_shared<op::Parameter>(element::f32, shape_a);
    auto r = make_shared<op::Reshape>(A, AxisVector{2, 1, 0}, shape_r);
    auto f = make_shared<Function>(r, ParameterVector{A});
+    vector<float> a_data(shape_size(shape_a));
+    iota(a_data.begin(), a_data.end(), 1);
    auto backend = runtime::Backend::create("${BACKEND_NAME}");
    // Create some tensors for input/output
@@ -312,18 +306,15 @@ NGRAPH_TEST(${BACKEND_NAME}, reshape_3d_transpose_210)
 NGRAPH_TEST(${BACKEND_NAME}, reshape_3d_transpose_201)
 {
-    vector<float> a_data(2 * 3 * 4);
-    for (int i = 0; i < 2 * 3 * 4; i++)
-    {
-        a_data[i] = float(i + 1);
-    }
    Shape shape_a{2, 3, 4};
    Shape shape_r{4, 2, 3};
    auto A = make_shared<op::Parameter>(element::f32, shape_a);
    auto r = make_shared<op::Reshape>(A, AxisVector{2, 0, 1}, shape_r);
    auto f = make_shared<Function>(r, ParameterVector{A});
+    vector<float> a_data(shape_size(shape_a));
+    iota(a_data.begin(), a_data.end(), 1);
    auto backend = runtime::Backend::create("${BACKEND_NAME}");
    // Create some tensors for input/output
@@ -340,18 +331,15 @@ NGRAPH_TEST(${BACKEND_NAME}, reshape_3d_transpose_201)
 NGRAPH_TEST(${BACKEND_NAME}, reshape_3d_transpose_102)
 {
-    vector<float> a_data(2 * 3 * 4);
-    for (int i = 0; i < 2 * 3 * 4; i++)
-    {
-        a_data[i] = float(i + 1);
-    }
    Shape shape_a{2, 3, 4};
    Shape shape_r{3, 2, 4};
    auto A = make_shared<op::Parameter>(element::f32, shape_a);
    auto r = make_shared<op::Reshape>(A, AxisVector{1, 0, 2}, shape_r);
    auto f = make_shared<Function>(r, ParameterVector{A});
+    vector<float> a_data(shape_size(shape_a));
+    iota(a_data.begin(), a_data.end(), 1);
    auto backend = runtime::Backend::create("${BACKEND_NAME}");
    // Create some tensors for input/output
@@ -368,18 +356,15 @@ NGRAPH_TEST(${BACKEND_NAME}, reshape_3d_transpose_102)
 NGRAPH_TEST(${BACKEND_NAME}, reshape_3d_transpose_120)
 {
-    vector<float> a_data(2 * 3 * 4);
-    for (int i = 0; i < 2 * 3 * 4; i++)
-    {
-        a_data[i] = float(i + 1);
-    }
    Shape shape_a{2, 3, 4};
    Shape shape_r{3, 4, 2};
    auto A = make_shared<op::Parameter>(element::f32, shape_a);
    auto r = make_shared<op::Reshape>(A, AxisVector{1, 2, 0}, shape_r);
    auto f = make_shared<Function>(r, ParameterVector{A});
+    vector<float> a_data(shape_size(shape_a));
+    iota(a_data.begin(), a_data.end(), 1);
    auto backend = runtime::Backend::create("${BACKEND_NAME}");
    // Create some tensors for input/output
@@ -396,18 +381,15 @@ NGRAPH_TEST(${BACKEND_NAME}, reshape_3d_transpose_120)
 NGRAPH_TEST(${BACKEND_NAME}, reshape_4d_transpose)
 {
-    vector<float> a_data(2 * 2 * 5 * 5);
-    for (int i = 0; i < 2 * 2 * 5 * 5; i++)
-    {
-        a_data[i] = float(i + 1);
-    }
    Shape shape_a{2, 2, 5, 5};
    Shape shape_r{2, 5, 5, 2};
    auto A = make_shared<op::Parameter>(element::f32, shape_a);
    auto r = make_shared<op::Reshape>(A, AxisVector{0, 2, 3, 1}, shape_r);
    auto f = make_shared<Function>(r, ParameterVector{A});
+    vector<float> a_data(shape_size(shape_a));
+    iota(a_data.begin(), a_data.end(), 1);
    auto backend = runtime::Backend::create("${BACKEND_NAME}");
    // Create some tensors for input/output
@@ -430,18 +412,15 @@ NGRAPH_TEST(${BACKEND_NAME}, reshape_4d_transpose)
 NGRAPH_TEST(${BACKEND_NAME}, reshape_4d_no_transpose)
 {
-    vector<float> a_data(2 * 2 * 5 * 5);
-    for (int i = 0; i < 2 * 2 * 5 * 5; i++)
-    {
-        a_data[i] = float(i + 1);
-    }
    Shape shape_a{2, 2, 5, 5};
    Shape shape_r{2, 5, 5, 2};
    auto A = make_shared<op::Parameter>(element::f32, shape_a);
    auto r = make_shared<op::Reshape>(A, AxisVector{0, 1, 2, 3}, shape_r);
    auto f = make_shared<Function>(r, ParameterVector{A});
+    vector<float> a_data(shape_size(shape_a));
+    iota(a_data.begin(), a_data.end(), 1);
    auto backend = runtime::Backend::create("${BACKEND_NAME}");
    // Create some tensors for input/output
@@ -517,16 +496,13 @@ NGRAPH_TEST(${BACKEND_NAME}, reshape_transposed_shape_change)
 //
 NGRAPH_TEST(${BACKEND_NAME}, reshape_6d)
 {
-    vector<float> a_data(2 * 2 * 3 * 3 * 2 * 4);
-    for (int i = 0; i < 2 * 2 * 3 * 3 * 2 * 4; i++)
-    {
-        a_data[i] = float(i + 1);
-    }
    Shape shape_a{2, 2, 3, 3, 2, 4};
    auto A = make_shared<op::Parameter>(element::f32, shape_a);
    Shape shape_r{3, 2, 2, 4, 3, 2};
+    vector<float> a_data(shape_size(shape_a));
+    iota(a_data.begin(), a_data.end(), 1);
    auto r = make_shared<op::Reshape>(A, AxisVector{2, 4, 0, 5, 3, 1}, shape_r);
    auto f = make_shared<Function>(r, ParameterVector{A});
@@ -565,3 +541,37 @@ NGRAPH_TEST(${BACKEND_NAME}, reshape_6d)
            215., 287., 200., 272., 208., 280., 216., 288.}),
        read_vector<float>(result));
 }
+#if NGRAPH_INTERPRETER_ENABLE
+NGRAPH_TEST(${BACKEND_NAME}, reshape_shufflenet_5d)
+{
+    Shape shape_a{1, 112, 56, 56};
+    auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    Shape shape_b{1, 4, 28, 56, 56};
+    auto B = make_shared<op::Parameter>(element::f32, shape_b);
+    Shape shape_c{1, 28, 4, 56, 56};
+    auto C = make_shared<op::Parameter>(element::f32, shape_c);
+    Shape shape_r{1, 112, 56, 56};
+    vector<float> a_data(shape_size(shape_a));
+    iota(a_data.begin(), a_data.end(), 1);
+    auto r0 = make_shared<op::Reshape>(A, AxisVector{0, 1, 2, 3}, shape_b);
+    auto r1 = make_shared<op::Reshape>(r0, AxisVector{0, 2, 1, 3, 4}, shape_c);
+    auto r2 = make_shared<op::Reshape>(r1, AxisVector{0, 1, 2, 3, 4}, shape_r);
+    auto f = make_shared<Function>(r2, ParameterVector{A});
+    auto ref_func = clone_function(*f);
+    auto bk_func = clone_function(*f);
+    vector<vector<float>> args;
+    args.push_back(a_data);
+    auto ref_results = execute(ref_func, args, "INTERPRETER");
+    auto bk_results = execute(bk_func, args, "${BACKEND_NAME}");
+    EXPECT_TRUE(test::all_close_f(ref_results.at(0), bk_results.at(0)));
+}
+#endif //NGRAPH_INTERPRETER_ENABLE