IntelGPU backend: Eltwise and unary operation all types support (#2460)

780ef08c · Sergey Shalnov · Robert Kimball · afcc4ca8 · 780ef08c · 780ef08c
Commit 780ef08c authored Feb 15, 2019 by Sergey Shalnov Committed by Robert Kimball Feb 15, 2019
5 changed files
--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
@@ -170,30 +170,15 @@ static const element::Type& get_output_type(const shared_ptr<Node>& op, size_t n
 static void do_eltwise_operation(cldnn::topology& topology,
                                 const shared_ptr<Node>& op,
+                                 const string& custom_op,
+                                 bool function_operation,
                                 cldnn::eltwise_mode mode)
 {
    arguments_check(op, 2, 1);
-// Leave it here for some time
+    if (get_input_type(op) != element::f32 || get_input_type(op, 1) != element::f32 ||
-#if USE_INTELGPU_CUSTOM_KERNELS
+        get_output_type(op) != element::f32)
-    if ((get_input_type(op) == element::i32 || get_input_type(op) == element::i64) &&
-        (mode == cldnn::eltwise_mode::min || mode == cldnn::eltwise_mode::max))
-    {
-        string custom_op;
-        if (mode == cldnn::eltwise_mode::min)
-        {
-            custom_op = "min";
-        }
-        else if (mode == cldnn::eltwise_mode::max)
    {
-            custom_op = "max";
-        }
-        else
-        {
-            custom_op = "not_implemented_operation";
-        }
        runtime::intelgpu::do_eltwise_kernel(topology,
                                             get_input_name(op, 0),
                                             get_input_shape(op, 0),
@@ -203,23 +188,18 @@ static void do_eltwise_operation(cldnn::topology& topology,
                                             get_output_name(op),
                                             get_output_shape(op),
                                             get_output_type(op),
-                                             custom_op);
+                                             custom_op,
+                                             function_operation);
    }
    else
    {
-        const cldnn::eltwise op_add(
-            get_output_name(op), {get_input_name(op, 0), get_input_name(op, 1)}, mode);
-        topology.add(op_add);
-    }
-#else
        const cldnn::eltwise op_eltwise(
            get_output_name(op), {get_input_name(op, 0), get_input_name(op, 1)}, mode);
        topology.add(op_eltwise);
-#endif
+    }
 }
-static void do_unary_operation(cldnn::topology& topology,
+static void do_cldnn_unary(cldnn::topology& topology,
                           const shared_ptr<Node>& op,
                           cldnn_activation_func mode,
                           const cldnn_activation_additional_params& param = {0.f, 0.f})
@@ -230,6 +210,39 @@ static void do_unary_operation(cldnn::topology& topology,
    topology.add(cldnn_unary);
 }
+static void
+    do_custom_unary(cldnn::topology& topology, const shared_ptr<Node>& op, const string& operation)
+{
+    arguments_check(op, 1, 1);
+    runtime::intelgpu::do_custom_unary_operation(topology,
+                                                 get_input_name(op),
+                                                 get_input_shape(op),
+                                                 get_input_type(op),
+                                                 get_output_name(op),
+                                                 get_output_shape(op),
+                                                 get_output_type(op),
+                                                 operation);
+}
+static void do_universal_unary(cldnn::topology& topology,
+                               const shared_ptr<Node>& op,
+                               const string& operation,
+                               cldnn_activation_func mode,
+                               const cldnn_activation_additional_params& param = {0.f, 0.f})
+{
+    arguments_check(op, 1, 1);
+    if (get_input_type(op) != element::f32)
+    {
+        do_custom_unary(topology, op, operation);
+    }
+    else
+    {
+        do_cldnn_unary(topology, op, mode, param);
+    }
+}
 static void do_pooling_operation(cldnn::topology& topology,
                                 const shared_ptr<Node>& op,
                                 const Shape& pool_shape,
@@ -608,7 +621,8 @@ shared_ptr<runtime::Executable>
            // clDNN has limited support for Softmax operation
            // following are the checks to go with custom kernel
-            if ((shape_dim_count > 3) || ((shape_dim_count == 3) && (axes_size == 2)))
+            if ((shape_dim_count > 3) || ((shape_dim_count == 3) && (axes_size == 2)) ||
+                (get_input_type(op) != element::f32))
            {
                do_softmax_operation(topology,
                                     get_input_name(op),
@@ -644,27 +658,37 @@ shared_ptr<runtime::Executable>
        }
        case OP_TYPEID::Add:
        {
-            do_eltwise_operation(topology, op, cldnn::eltwise_mode::sum);
+            do_eltwise_operation(topology, op, "+", false, cldnn::eltwise_mode::sum);
+            break;
+        }
+        case OP_TYPEID::Subtract:
+        {
+            do_eltwise_operation(topology, op, "-", false, cldnn::eltwise_mode::sub);
            break;
        }
        case OP_TYPEID::Multiply:
        {
-            do_eltwise_operation(topology, op, cldnn::eltwise_mode::prod);
+            do_eltwise_operation(topology, op, "*", false, cldnn::eltwise_mode::prod);
            break;
        }
        case OP_TYPEID::Divide:
        {
-            do_eltwise_operation(topology, op, cldnn::eltwise_mode::div);
+            do_eltwise_operation(topology, op, "/", false, cldnn::eltwise_mode::div);
            break;
        }
        case OP_TYPEID::Maximum:
        {
-            do_eltwise_operation(topology, op, cldnn::eltwise_mode::max);
+            do_eltwise_operation(topology, op, "max", true, cldnn::eltwise_mode::max);
            break;
        }
        case OP_TYPEID::Minimum:
        {
-            do_eltwise_operation(topology, op, cldnn::eltwise_mode::min);
+            do_eltwise_operation(topology, op, "min", true, cldnn::eltwise_mode::min);
+            break;
+        }
+        case OP_TYPEID::Power:
+        {
+            do_eltwise_operation(topology, op, "pow", true, cldnn::eltwise_mode::pow);
            break;
        }
        case OP_TYPEID::Constant:
@@ -945,29 +969,6 @@ shared_ptr<runtime::Executable>
            }
            break;
        }
-        case OP_TYPEID::Negative:
-        {
-            if (get_input_type(op) == ngraph::element::i32)
-            {
-                // This is workaround to enable GNMT in training mode.
-                // clDNN doesn't support i32 data type for activation primitive.
-                // Exception from clDNN:  implementation_map for N5cldnn10activationE
-                // could not find any implementation to match key
-                do_negative_operation(topology,
-                                      get_input_name(op),
-                                      get_input_shape(op),
-                                      get_input_type(op),
-                                      get_output_name(op),
-                                      get_output_shape(op),
-                                      get_output_type(op));
-            }
-            else
-            {
-                const cldnn_activation_additional_params param = {-1.f, 0.f};
-                do_unary_operation(topology, op, activation_linear, param);
-            }
-            break;
-        }
        case OP_TYPEID::All:
        {
            arguments_check(op, 1, 1);
@@ -1014,7 +1015,12 @@ shared_ptr<runtime::Executable>
        }
        case OP_TYPEID::Relu:
        {
-            do_unary_operation(topology, op, activation_relu);
+            do_cldnn_unary(topology, op, activation_relu);
+            break;
+        }
+        case OP_TYPEID::Sigmoid:
+        {
+            do_cldnn_unary(topology, op, activation_logistic);
            break;
        }
        case OP_TYPEID::ReluBackprop:
@@ -1032,62 +1038,88 @@ shared_ptr<runtime::Executable>
        }
        case OP_TYPEID::Abs:
        {
-            do_unary_operation(topology, op, activation_abs);
+            do_universal_unary(topology, op, "fabs", activation_abs);
            break;
        }
        case OP_TYPEID::Sqrt:
        {
-            do_unary_operation(topology, op, activation_sqrt);
+            do_universal_unary(topology, op, "sqrt", activation_sqrt);
            break;
        }
        case OP_TYPEID::Tanh:
        {
-            do_unary_operation(topology, op, activation_hyperbolic_tan);
+            do_universal_unary(topology, op, "tanh", activation_hyperbolic_tan);
            break;
        }
        case OP_TYPEID::Sin:
        {
-            do_unary_operation(topology, op, activation_sin);
+            do_universal_unary(topology, op, "sin", activation_sin);
            break;
        }
        case OP_TYPEID::Asin:
        {
-            do_unary_operation(topology, op, activation_asin);
+            do_universal_unary(topology, op, "asin", activation_asin);
            break;
        }
        case OP_TYPEID::Sinh:
        {
-            do_unary_operation(topology, op, activation_sinh);
+            do_universal_unary(topology, op, "sinh", activation_sinh);
            break;
        }
        case OP_TYPEID::Cos:
        {
-            do_unary_operation(topology, op, activation_cos);
+            do_universal_unary(topology, op, "cos", activation_cos);
            break;
        }
        case OP_TYPEID::Acos:
        {
-            do_unary_operation(topology, op, activation_acos);
+            do_universal_unary(topology, op, "acos", activation_acos);
            break;
        }
        case OP_TYPEID::Cosh:
        {
-            do_unary_operation(topology, op, activation_cosh);
+            do_universal_unary(topology, op, "cosh", activation_cosh);
            break;
        }
        case OP_TYPEID::Log:
        {
-            do_unary_operation(topology, op, activation_log);
+            do_universal_unary(topology, op, "log", activation_log);
            break;
        }
        case OP_TYPEID::Exp:
        {
-            do_unary_operation(topology, op, activation_exp);
+            do_universal_unary(topology, op, "exp", activation_exp);
            break;
        }
-        case OP_TYPEID::Sigmoid:
+        case OP_TYPEID::Negative:
+        {
+            const cldnn_activation_additional_params param = {-1.f, 0.f};
+            do_universal_unary(topology, op, "-", activation_linear, param);
+            break;
+        }
+        case OP_TYPEID::Atan:
+        {
+            do_custom_unary(topology, op, "atan");
+            break;
+        }
+        case OP_TYPEID::Ceiling:
+        {
+            do_custom_unary(topology, op, "ceil");
+            break;
+        }
+        case OP_TYPEID::Floor:
+        {
+            do_custom_unary(topology, op, "floor");
+            break;
+        }
+        case OP_TYPEID::Sign:
+        {
+            do_custom_unary(topology, op, "sign");
+            break;
+        }
+        case OP_TYPEID::Tan:
        {
-            do_unary_operation(topology, op, activation_logistic);
+            do_custom_unary(topology, op, "tan");
            break;
        }
        case OP_TYPEID::SigmoidBackprop:
@@ -1156,81 +1188,6 @@ shared_ptr<runtime::Executable>
            do_logical_operation(topology, op, " || ");
            break;
        }
-        case OP_TYPEID::Subtract:
-        {
-            do_eltwise_operation(topology, op, cldnn::eltwise_mode::sub);
-            break;
-        }
-        case OP_TYPEID::Power:
-        {
-            do_eltwise_operation(topology, op, cldnn::eltwise_mode::pow);
-            break;
-        }
-        case OP_TYPEID::Atan:
-        {
-            arguments_check(op, 1, 1);
-            do_custom_eltwise_operation(topology,
-                                        get_input_name(op),
-                                        get_input_shape(op),
-                                        get_input_type(op),
-                                        get_output_name(op),
-                                        get_output_shape(op),
-                                        get_output_type(op),
-                                        CUSTOM_ELTWISE::Atan);
-            break;
-        }
-        case OP_TYPEID::Ceiling:
-        {
-            arguments_check(op, 1, 1);
-            do_custom_eltwise_operation(topology,
-                                        get_input_name(op),
-                                        get_input_shape(op),
-                                        get_input_type(op),
-                                        get_output_name(op),
-                                        get_output_shape(op),
-                                        get_output_type(op),
-                                        CUSTOM_ELTWISE::Ceil);
-            break;
-        }
-        case OP_TYPEID::Floor:
-        {
-            arguments_check(op, 1, 1);
-            do_custom_eltwise_operation(topology,
-                                        get_input_name(op),
-                                        get_input_shape(op),
-                                        get_input_type(op),
-                                        get_output_name(op),
-                                        get_output_shape(op),
-                                        get_output_type(op),
-                                        CUSTOM_ELTWISE::Floor);
-            break;
-        }
-        case OP_TYPEID::Sign:
-        {
-            arguments_check(op, 1, 1);
-            do_custom_eltwise_operation(topology,
-                                        get_input_name(op),
-                                        get_input_shape(op),
-                                        get_input_type(op),
-                                        get_output_name(op),
-                                        get_output_shape(op),
-                                        get_output_type(op),
-                                        CUSTOM_ELTWISE::Sign);
-            break;
-        }
-        case OP_TYPEID::Tan:
-        {
-            arguments_check(op, 1, 1);
-            do_custom_eltwise_operation(topology,
-                                        get_input_name(op),
-                                        get_input_shape(op),
-                                        get_input_type(op),
-                                        get_output_name(op),
-                                        get_output_shape(op),
-                                        get_output_type(op),
-                                        CUSTOM_ELTWISE::Tan);
-            break;
-        }
        case OP_TYPEID::Pad:
        {
            arguments_check(op, 2, 1);

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
@@ -1012,7 +1012,8 @@ void runtime::intelgpu::do_eltwise_kernel(cldnn::topology& topology,
                                          const string& output_name,
                                          const Shape& output_shape,
                                          const element::Type& output_type,
-                                          const string& operation)
+                                          const string& operation,
+                                          bool function_operation)
 {
    const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
    const string entry_point_name = "eltwise_" + output_name;
@@ -1031,15 +1032,34 @@ void runtime::intelgpu::do_eltwise_kernel(cldnn::topology& topology,
        // Main loops
        gws = generate_loops(writer, output_shape, true);
-        writer << "output" << access_dims(output_shape) << " = " << operation << "(input0"
+        writer << "output" << access_dims(output_shape) << " = ";
-               << access_dims(input0_shape) << ", input1" << access_dims(input1_shape) << ");\n";
+        if (function_operation)
+        {
+            string explicit_conversion;
+            // TODO need better workaround for this built_in
+            if (operation == "pow")
+            {
+                explicit_conversion = "convert_double";
+            }
+            writer << operation << "(" << explicit_conversion << "(input0"
+                   << access_dims(input0_shape) << "), " << explicit_conversion << "(input1"
+                   << access_dims(input1_shape) << "));";
+        }
+        else
+        {
+            writer << "(input0" << access_dims(input0_shape) << " " << operation << " input1"
+                   << access_dims(input1_shape) << ");";
+        }
+        writer << " // " << get_opencl_type_name(input0_type) << " "
+               << get_opencl_type_name(output_type) << "\n";
        // Closing brackets for main loops
        generate_loops(writer, output_shape, false);
    }
    writer.block_end();
-    const cldnn::custom_gpu_primitive op_logical(output_name,
+    const cldnn::custom_gpu_primitive op_eltwise(output_name,
                                                 {input0_name, input1_name},
                                                 {writer.get_code()},
                                                 entry_point_name,
@@ -1047,7 +1067,7 @@ void runtime::intelgpu::do_eltwise_kernel(cldnn::topology& topology,
                                                 "",
                                                 layout,
                                                 gws);
-    topology.add(op_logical);
+    topology.add(op_eltwise);
 }
 void runtime::intelgpu::do_reverse_operation(cldnn::topology& topology,
@@ -1268,16 +1288,17 @@ void runtime::intelgpu::do_sigmoid_backprop_operation(cldnn::topology& topology,
    topology.add(op_sigmoid_backprop);
 }
-void runtime::intelgpu::do_custom_eltwise_operation(cldnn::topology& topology,
+void runtime::intelgpu::do_custom_unary_operation(cldnn::topology& topology,
                                                  const string& input_name,
                                                  const Shape& input_shape,
                                                  const element::Type& input_type,
                                                  const string& output_name,
                                                  const Shape& output_shape,
                                                  const element::Type& output_type,
-                                                    const CUSTOM_ELTWISE operation_name)
+                                                  const string& operation_name)
 {
-    const string entry_point_name = "op_custom_eltwise_" + output_name;
+    const string entry_point_name = "op_custom_unary_" + output_name;
+    const string intermidiate_type = input_type.size() < 8 ? "float" : "double";
    codegen::CodeWriter writer;
    vector<size_t> gws;
@@ -1290,39 +1311,19 @@ void runtime::intelgpu::do_custom_eltwise_operation(cldnn::topology& topology,
    writer.block_begin();
    {
        gws = generate_loops(writer, output_shape, true);
-        writer << "output" << access_dims(output_shape) << " = ";
-        switch (operation_name)
+        // convert to intermediate floating point type
-        {
+        writer << intermidiate_type << " input_var = convert_" << intermidiate_type << "(input0"
-        case CUSTOM_ELTWISE::Atan:
+               << access_dims(input_shape) << ");\n";
-        {
-            writer << "atan";
+        // do the operation with the same type
-            break;
+        writer << intermidiate_type << " output_var = " << operation_name
-        }
+               << "(input_var); //Type: " << get_opencl_type_name(input_type) << "\n";
-        case CUSTOM_ELTWISE::Ceil:
-        {
+        // convert to destination type
-            writer << "ceil";
+        writer << "output" << access_dims(output_shape) << " = convert_"
-            break;
+               << get_opencl_type_name(output_type) << "(output_var);\n";
-        }
-        case CUSTOM_ELTWISE::Floor:
-        {
-            if (input_type.is_real())
-            {
-                writer << "floor";
-            }
-            break;
-        }
-        case CUSTOM_ELTWISE::Sign:
-        {
-            writer << "sign";
-            break;
-        }
-        case CUSTOM_ELTWISE::Tan:
-        {
-            writer << "tan";
-            break;
-        }
-        }
-        writer << "(input0" << access_dims(input_shape) << ");\n";
        generate_loops(writer, output_shape, false);
    }
    writer.block_end();
@@ -1419,46 +1420,6 @@ void runtime::intelgpu::do_arg_max_min_operation(cldnn::topology& topology,
    topology.add(op_arg_max_min);
 }
-void runtime::intelgpu::do_negative_operation(cldnn::topology& topology,
-                                              const string& input_name,
-                                              const Shape& input_shape,
-                                              const element::Type& input_type,
-                                              const string& output_name,
-                                              const Shape& output_shape,
-                                              const element::Type& output_type)
-{
-    const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
-    const string entry_point_name = "negative_" + output_name;
-    const string& input_type_name = get_opencl_type_name(input_type);
-    const string& output_type_name = get_opencl_type_name(output_type);
-    codegen::CodeWriter writer;
-    vector<size_t> gws;
-    gen_func_def(
-        writer, entry_point_name, {input_type_name}, {input_shape}, output_type_name, output_shape);
-    writer.block_begin();
-    {
-        gws = generate_loops(writer, output_shape, true);
-        writer << "output" << access_dims(output_shape) << " = - (input0"
-               << access_dims(input_shape) << ");\n";
-        generate_loops(writer, output_shape, false);
-    }
-    writer.block_end();
-    const cldnn::custom_gpu_primitive op_negative(output_name,
-                                                  {input_name},
-                                                  {writer.get_code()},
-                                                  entry_point_name,
-                                                  get_kernel_args(1, 1),
-                                                  "",
-                                                  layout,
-                                                  gws);
-    topology.add(op_negative);
-}
 void runtime::intelgpu::do_reshape_operation(cldnn::topology& topology,
                                             const string& input_name,
                                             const Shape& input_shape,

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
@@ -119,7 +119,8 @@ namespace ngraph
                                   const std::string& output_name,
                                   const Shape& output_shape,
                                   const element::Type& output_type,
-                                   const std::string& operation);
+                                   const std::string& operation,
+                                   bool function_operation);
            void do_reverse_operation(cldnn::topology& topology,
                                      const std::string& input_name,
@@ -162,23 +163,14 @@ namespace ngraph
                                               const Shape& output_shape,
                                               const element::Type& output_type);
-            enum class CUSTOM_ELTWISE
+            void do_custom_unary_operation(cldnn::topology& topology,
-            {
-                Atan,
-                Ceil,
-                Floor,
-                Sign,
-                Tan
-            };
-            void do_custom_eltwise_operation(cldnn::topology& topology,
                                           const std::string& input_name,
                                           const Shape& input_shape,
                                           const element::Type& input_type,
                                           const std::string& output_name,
                                           const Shape& output_shape,
                                           const element::Type& output_type,
-                                             const CUSTOM_ELTWISE operation_name);
+                                           const std::string& operation_name);
            void do_arg_max_min_operation(cldnn::topology& topology,
                                          const std::string& input_name,
@@ -190,14 +182,6 @@ namespace ngraph
                                          const size_t reduction_axis,
                                          const bool is_max);
-            void do_negative_operation(cldnn::topology& topology,
-                                       const std::string& input_name,
-                                       const Shape& input_shape,
-                                       const element::Type& input_type,
-                                       const std::string& output_name,
-                                       const Shape& output_shape,
-                                       const element::Type& output_type);
            void do_reshape_operation(cldnn::topology& topology,
                                      const std::string& input_name,
                                      const Shape& input_shape,

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_softmax.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_softmax.cpp
@@ -65,9 +65,10 @@ void runtime::intelgpu::do_softmax_operation(cldnn::topology& topology,
    codegen::CodeWriter writer1;
    vector<size_t> gws;
-    writer0 << "__kernel void " << entry_point_middle_name << "(const __global float input"
+    writer0 << "__kernel void " << entry_point_middle_name << "(const __global "
-            << array_dims(input_shape) << ", __global float output" << array_dims(input_shape, axes)
+            << get_opencl_type_name(input_type) << " input" << array_dims(input_shape)
-            << ")\n";
+            << ", __global " << get_opencl_type_name(output_type) << " output"
+            << array_dims(input_shape, axes) << ")\n";
    writer0.block_begin();
    {
@@ -90,10 +91,11 @@ void runtime::intelgpu::do_softmax_operation(cldnn::topology& topology,
                                                        gws);
    topology.add(op_softmax_middle);
-    writer1 << "__kernel void " << entry_point_name << "(const __global float input0"
+    writer1 << "__kernel void " << entry_point_name << "(const __global "
-            << array_dims(input_shape) << ", const __global float input1"
+            << get_opencl_type_name(input_type) << " input0" << array_dims(input_shape)
-            << array_dims(input_shape, axes) << ", __global float output"
+            << ", const __global " << get_opencl_type_name(input_type) << " input1"
-            << array_dims(output_shape) << ")\n";
+            << array_dims(input_shape, axes) << ", __global " << get_opencl_type_name(output_type)
+            << " output" << array_dims(output_shape) << ")\n";
    writer1.block_begin();
    {

--- a/src/ngraph/runtime/intelgpu/unit_test.manifest
+++ b/src/ngraph/runtime/intelgpu/unit_test.manifest
@@ -13,9 +13,6 @@ backwards_reverse_sequence_n3_c2_h3
 backwards_reverse_sequence_n4d2c3h2w2
 backwards_slice
 batch_norm_bprop_n4c3h2w2
-batch_norm_inference_0eps_f64
-batch_norm_inference_f64
-batch_norm_training_0eps_f64
 dequantize
 dequantize_axes
 dequantize_dynamic_offset
@@ -64,7 +61,6 @@ shape_of_5d
 shape_of_matrix
 shape_of_scalar
 shape_of_vector
-softmax_axis_3d_double
 topk_1d_max_all
 topk_1d_max_one
 topk_1d_max_partial
@@ -88,26 +84,3 @@ topk_3d_min_partial
 topk_3d_single_output
 topk_5d_max_partial
 topk_int64
-zero_sized_abs
-zero_sized_acos
-zero_sized_add
-zero_sized_asin
-zero_sized_atan
-zero_sized_ceiling
-zero_sized_cos
-zero_sized_cosh
-zero_sized_divide
-zero_sized_exp
-zero_sized_log
-zero_sized_maximum
-zero_sized_minimum
-zero_sized_multiply
-zero_sized_negative
-zero_sized_power
-zero_sized_sign
-zero_sized_sin
-zero_sized_sinh
-zero_sized_sqrt
-zero_sized_subtract
-zero_sized_tan
-zero_sized_tanh