IntelGPU backend: Use custom eltwise kernel for signed integers (#1716)

fd80d8ee · shssf · Robert Kimball · 8d70e2a3 · fd80d8ee · fd80d8ee
Commit fd80d8ee authored Sep 28, 2018 by shssf Committed by Robert Kimball Sep 28, 2018
4 changed files
--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
@@ -161,9 +161,41 @@ static void do_eltwise_operation(cldnn::topology& topology,
 {
    arguments_check(op, 2, 1);

-    const cldnn::eltwise op_add(
-        get_output_name(op), {get_input_name(op, 0), get_input_name(op, 1)}, mode);
-    topology.add(op_add);
+    if ((get_input_type(op) == element::i32 || get_input_type(op) == element::i64) &&
+        (mode == cldnn::eltwise_mode::min || mode == cldnn::eltwise_mode::max))
+    {
+        string custom_op;
+
+        if (mode == cldnn::eltwise_mode::min)
+        {
+            custom_op = "min";
+        }
+        else if (mode == cldnn::eltwise_mode::max)
+        {
+            custom_op = "max";
+        }
+        else
+        {
+            custom_op = "not_implemented_operation";
+        }
+
+        runtime::intelgpu::do_eltwise_kernel(topology,
+                                             get_input_name(op, 0),
+                                             get_input_shape(op, 0),
+                                             get_input_type(op, 0),
+                                             get_input_name(op, 1),
+                                             get_input_shape(op, 1),
+                                             get_output_name(op),
+                                             get_output_shape(op),
+                                             get_output_type(op),
+                                             custom_op);
+    }
+    else
+    {
+        const cldnn::eltwise op_add(
+            get_output_name(op), {get_input_name(op, 0), get_input_name(op, 1)}, mode);
+        topology.add(op_add);
+    }
 }

 static void do_unary_operation(cldnn::topology& topology,

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
@@ -1170,6 +1170,53 @@ void runtime::intelgpu::do_logic_kernel(cldnn::topology& topology,
    topology.add(op_logical);
 }

+void runtime::intelgpu::do_eltwise_kernel(cldnn::topology& topology,
+                                          const string& input0_name,
+                                          const Shape& input0_shape,
+                                          const element::Type& input0_type,
+                                          const string& input1_name,
+                                          const Shape& input1_shape,
+                                          const string& output_name,
+                                          const Shape& output_shape,
+                                          const element::Type& output_type,
+                                          const string& operation)
+{
+    const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
+    const string entry_point_name = "eltwise_" + output_name;
+    codegen::CodeWriter writer;
+    vector<size_t> gws;
+
+    gen_func_def(writer,
+                 entry_point_name,
+                 {2, get_opencl_type_name(input0_type)},
+                 {input0_shape, input1_shape},
+                 get_opencl_type_name(output_type),
+                 output_shape);
+
+    writer.block_begin();
+    {
+        // Main loops
+        gws = generate_loops(writer, output_shape, true);
+
+        writer << "output" << access_dims(output_shape) << " = " << operation << "(input0"
+               << access_dims(input0_shape) << ", input1" << access_dims(input1_shape) << ");\n";
+
+        // Closing brackets for main loops
+        generate_loops(writer, output_shape, false);
+    }
+    writer.block_end();
+
+    const cldnn::custom_gpu_primitive op_logical(output_name,
+                                                 {input0_name, input1_name},
+                                                 {writer.get_code()},
+                                                 entry_point_name,
+                                                 get_kernel_args(2, 1),
+                                                 "",
+                                                 layout,
+                                                 gws);
+    topology.add(op_logical);
+}
+
 void runtime::intelgpu::do_reverse_operation(cldnn::topology& topology,
                                             const string& input_name,
                                             const Shape& input_shape,

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
@@ -106,6 +106,17 @@ namespace ngraph
                                 const element::Type& output_type,
                                 const std::string& operation);

+            void do_eltwise_kernel(cldnn::topology& topology,
+                                   const std::string& input0_name,
+                                   const Shape& input0_shape,
+                                   const element::Type& input0_type,
+                                   const std::string& input1_name,
+                                   const Shape& input1_shape,
+                                   const std::string& output_name,
+                                   const Shape& output_shape,
+                                   const element::Type& output_type,
+                                   const std::string& operation);
+
            void do_reverse_operation(cldnn::topology& topology,
                                      const std::string& input_name,
                                      const Shape& input_shape,

--- a/test/backend_test.in.cpp
+++ b/test/backend_test.in.cpp
@@ -1650,6 +1650,87 @@ NGRAPH_TEST(${BACKEND_NAME}, minimum)
    EXPECT_EQ((vector<float>{1, 2, -8, 8, -.5, 0, 1, 1}), read_vector<float>(result));
 }

+NGRAPH_TEST(${BACKEND_NAME}, minimum_int32)
+{
+    Shape shape{2, 2, 2};
+    auto A = make_shared<op::Parameter>(element::i32, shape);
+    auto B = make_shared<op::Parameter>(element::i32, shape);
+    auto f = make_shared<Function>(make_shared<op::Minimum>(A, B), op::ParameterVector{A, B});
+
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::i32, shape);
+    copy_data(a, vector<int32_t>{1, 8, -8, 17, -5, 67635216, 2, 1});
+    auto b = backend->create_tensor(element::i32, shape);
+    copy_data(b, vector<int32_t>{1, 2, 4, 8, 0, 18448, 1, 6});
+    auto result = backend->create_tensor(element::i32, shape);
+
+    backend->call_with_validate(f, {result}, {a, b});
+    EXPECT_EQ((vector<int32_t>{1, 2, -8, 8, -5, 18448, 1, 1}), read_vector<int32_t>(result));
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, minimum_int64)
+{
+    Shape shape{2, 2, 2};
+    auto A = make_shared<op::Parameter>(element::i64, shape);
+    auto B = make_shared<op::Parameter>(element::i64, shape);
+    auto f = make_shared<Function>(make_shared<op::Minimum>(A, B), op::ParameterVector{A, B});
+
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::i64, shape);
+    copy_data(a, vector<int64_t>{1, 8, -8, 17, -5, 67635216, 2, 17179887632});
+    auto b = backend->create_tensor(element::i64, shape);
+    copy_data(b, vector<int64_t>{1, 2, 4, 8, 0, 18448, 1, 280592});
+    auto result = backend->create_tensor(element::i64, shape);
+
+    backend->call_with_validate(f, {result}, {a, b});
+    EXPECT_EQ((vector<int64_t>{1, 2, -8, 8, -5, 18448, 1, 280592}), read_vector<int64_t>(result));
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, maximum_int32)
+{
+    Shape shape{2, 2, 2};
+    auto A = make_shared<op::Parameter>(element::i32, shape);
+    auto B = make_shared<op::Parameter>(element::i32, shape);
+    auto f = make_shared<Function>(make_shared<op::Maximum>(A, B), op::ParameterVector{A, B});
+
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::i32, shape);
+    copy_data(a, vector<int32_t>{1, 8, -8, 17, -5, 67635216, 2, 1});
+    auto b = backend->create_tensor(element::i32, shape);
+    copy_data(b, vector<int32_t>{1, 2, 4, 8, 0, 18448, 1, 6});
+    auto result = backend->create_tensor(element::i32, shape);
+
+    backend->call_with_validate(f, {result}, {a, b});
+    EXPECT_EQ((vector<int32_t>{1, 8, 4, 17, 0, 67635216, 2, 6}), read_vector<int32_t>(result));
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, maximum_int64)
+{
+    Shape shape{2, 2, 2};
+    auto A = make_shared<op::Parameter>(element::i64, shape);
+    auto B = make_shared<op::Parameter>(element::i64, shape);
+    auto f = make_shared<Function>(make_shared<op::Maximum>(A, B), op::ParameterVector{A, B});
+
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::i64, shape);
+    copy_data(a, vector<int64_t>{1, 8, -8, 17, -5, 67635216, 2, 17179887632});
+    auto b = backend->create_tensor(element::i64, shape);
+    copy_data(b, vector<int64_t>{1, 2, 4, 8, 0, 18448, 1, 280592});
+    auto result = backend->create_tensor(element::i64, shape);
+
+    backend->call_with_validate(f, {result}, {a, b});
+    EXPECT_EQ((vector<int64_t>{1, 8, 4, 17, 0, 67635216, 2, 17179887632}),
+              read_vector<int64_t>(result));
+}
+
 NGRAPH_TEST(${BACKEND_NAME}, negative)
 {
    Shape shape{2, 3};