IntelGPU backend: Convolution support for double and code minor clean up (#2479)

* IntelGPU backend: Comvolution support for double and code minor clean up * PR2479. custom kernel selection fix

IntelGPU backend: Convolution support for double and code minor clean up (#2479)
* IntelGPU backend: Comvolution support for double and code minor clean up * PR2479. custom kernel selection fix
578f7d8f · Sergey Shalnov · Robert Kimball · f64b0e0c · 578f7d8f · 578f7d8f
Commit 578f7d8f authored Feb 22, 2019 by Sergey Shalnov Committed by Robert Kimball Feb 22, 2019
4 changed files
--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
@@ -847,7 +847,7 @@ shared_ptr<runtime::Executable>
            {
                do_equal_propagation(topology, get_input_name(op), get_output_name(op));
            }
-            else if ((get_output_shape(op).size() <= 4) &&
+            else if ((get_output_shape(op).size() <= 4) && (shape_size(get_output_shape(op)) > 0) &&
                     ((get_input_type(op) == element::f32) || (get_input_type(op) == element::i32)))
            {
                const size_t shift = 4 - get_output_shape(op).size();
@@ -1434,7 +1434,8 @@ shared_ptr<runtime::Executable>
            // following are the checks to go with workaround
            if ((win_stride.size() > 2) || (pad_below.size() > 2) || (pad_above.size() > 2) ||
                (win_dilation.size() > 2) || (data_dilation.size() > 2) ||
-                (data_dilation.at(0) != 1) || (data_dilation.at(1) != 1))
+                (data_dilation.at(0) != 1) || (data_dilation.at(1) != 1) ||
+                (get_output_type(op) != element::f32))
            {
                do_convolution_operation(topology,
                                         get_input_name(op, 0),
@@ -1509,7 +1510,7 @@ shared_ptr<runtime::Executable>
            if ((win_stride.size() > 2) || (win_stride.at(0) != 1) || (win_stride.at(1) != 1) ||
                (pad_below.size() > 2) || (pad_above.size() > 2) || (data_dilation.size() > 2) ||
                (data_dilation.at(0) != 1) || (data_dilation.at(1) != 1) ||
-                (win_dilation.size() > 2))
+                (win_dilation.size() > 2) || (get_output_type(op) != element::f32))
            {
                do_convolution_operation(topology,
                                         get_input_name(op, 0),
@@ -1608,7 +1609,8 @@ shared_ptr<runtime::Executable>
            if ((win_stride.size() > 2) || (win_stride.at(0) != 1) || (win_stride.at(1) != 1) ||
                (pad_below.size() > 2) || (pad_above.size() > 2) || (data_dilation.size() > 2) ||
                (data_dilation.at(0) != 1) || (data_dilation.at(1) != 1) ||
-                (win_dilation.size() > 2) || (win_dilation.at(0) != 1) || (win_dilation.at(1) != 1))
+                (win_dilation.size() > 2) || (win_dilation.at(0) != 1) ||
+                (win_dilation.at(1) != 1) || (get_output_type(op) != element::f32))
            {
                do_convolution_operation(topology,
                                         get_input_name(op, 1),

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_convolution.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_convolution.cpp
@@ -109,6 +109,7 @@ void runtime::intelgpu::do_convolution_operation(cldnn::topology& topology,
                                                 bool reverse_filter)
 {
    const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
+    const string kernel_type_name = get_opencl_type_name(output_type);
    const string entry_point_name = "convolution_" + output_name;
    const Shape input_data(input_shape.cbegin() + 2, input_shape.cend());
    const Shape filter_data(filter_shape.cbegin() + 2, filter_shape.cend());
@@ -116,9 +117,10 @@ void runtime::intelgpu::do_convolution_operation(cldnn::topology& topology,
    codegen::CodeWriter writer;
    vector<size_t> gws;
-    writer << "__kernel void " << entry_point_name << "(const __global float input"
+    writer << "__kernel void " << entry_point_name << "(const __global " << kernel_type_name
-           << array_dims(input_shape) << ", const __global float filter" << array_dims(filter_shape)
+           << " input" << array_dims(input_shape) << ", const __global " << kernel_type_name
-           << ", __global float output" << array_dims(output_shape) << ")\n";
+           << " filter" << array_dims(filter_shape) << ", __global " << kernel_type_name
+           << " output" << array_dims(output_shape) << ")\n";
    writer.block_begin();
    { // Main function body
@@ -152,7 +154,7 @@ void runtime::intelgpu::do_convolution_operation(cldnn::topology& topology,
                        ++var_idx;
                    }
-                    writer << "float result = 0.0f;\n\n"
+                    writer << kernel_type_name << " result = 0.0;\n\n"
                           << "// Loop over input_channel\n"
                           << "for (uint input_channel = 0; input_channel < "
                           << input_shape.at(input_channel_axis_data) << "; ++input_channel)\n";
@@ -221,7 +223,7 @@ void runtime::intelgpu::do_convolution_operation(cldnn::topology& topology,
                            writer << ")\n";
                            writer.block_begin();
                            {
-                                writer << "float input_elem = " << input_order
+                                writer << kernel_type_name << " input_elem = " << input_order
                                       << array_dim(input_data, "input_idx_data_dilation") << ";\n";
                                // Output element calculation

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.cpp
@@ -105,149 +105,3 @@ void runtime::intelgpu::do_all_any_op(cldnn::topology& topology,
                                                 {1});
    topology.add(op_all_any);
 }
-static void get_custom_func_name(codegen::CodeWriter& writer,
-                                 vector<shared_ptr<Function>>& func,
-                                 const string& func_name,
-                                 const string& type_name)
-{
-    if (func.size() != 1)
-    {
-        throw invalid_argument("IntelGPU Custom_Call operation. Custom function number: " +
-                               to_string(func.size()) + " expected: 1");
-    }
-    writer << type_name << " " << func_name << "(const " << type_name << " input0, const "
-           << type_name << " input1)\n";
-    writer.block_begin();
-    {
-        for (shared_ptr<Node> op : func.at(0)->get_ordered_ops())
-        {
-            if ((op->description() != "Parameter") && (op->description() != "Result"))
-            {
-                if (op->description() == "Multiply")
-                {
-                    writer << "return input0 * input1;\n";
-                }
-                else if (op->description() == "Add")
-                {
-                    writer << "return input0 + input1;\n";
-                }
-                else if (op->description() == "Maximum")
-                {
-                    writer << "return max(input0, input1);\n";
-                }
-                else if (op->description() == "Minimum")
-                {
-                    writer << "return min(input0, input1);\n";
-                }
-                else if (op->description() == "And")
-                {
-                    writer << "return input0 && input1;\n";
-                }
-                else if (op->description() == "Or")
-                {
-                    writer << "return input0 || input1;\n";
-                }
-                else if (op->description() == "Equal")
-                {
-                    writer << "return input0 == input1;\n";
-                }
-                else if (op->description() == "NotEqual")
-                {
-                    writer << "return input0 != input1;\n";
-                }
-                else
-                {
-                    writer << "UNIMPLEMENTED_FUNCTION_INTELGPU: " << op->description() << "\n";
-                }
-            }
-        }
-    } // End of function bracket
-    writer.block_end();
-}
-void runtime::intelgpu::do_reduce_func_call(cldnn::topology& topology,
-                                            const string& input0_name,
-                                            const Shape& input0_shape,
-                                            const string& input1_name,
-                                            const Shape& input1_shape,
-                                            const string& output_name,
-                                            const Shape& output_shape,
-                                            const element::Type& output_type,
-                                            const AxisSet& axis,
-                                            vector<shared_ptr<Function>>& func)
-{
-    const string entry_point_name = "reduce_func_call_" + output_name;
-    const string aux_point_name = "aux_call_" + output_name;
-    const string kernel_type_name = get_opencl_type_name(output_type);
-    const size_t input_size = shape_size<Shape>(input0_shape);
-    codegen::CodeWriter writer;
-    get_custom_func_name(writer, func, aux_point_name, kernel_type_name);
-    // The kernel name and parameters
-    gen_func_def(writer,
-                 entry_point_name,
-                 {2, kernel_type_name},
-                 {input0_shape, {1}},
-                 kernel_type_name,
-                 output_shape);
-    writer.block_begin();
-    {
-        // Initialization loop
-        size_t var_idx = 0;
-        for (auto const& i : output_shape)
-        {
-            writer << "for (uint i" << var_idx << " = 0; i" << var_idx << " < " << i << "; ++i"
-                   << var_idx << ")\n";
-            writer.block_begin();
-            ++var_idx;
-        }
-        writer << "output" << access_dims(output_shape) << " = input1" << access_dims(input1_shape)
-               << ";\n";
-        // Closing brackets for initialization loop
-        for (auto const& i : output_shape)
-        {
-            writer.block_end();
-        }
-        if (input_size && !input0_shape.empty())
-        {
-            // Main operation loop
-            var_idx = 0;
-            for (auto const& i : input0_shape)
-            {
-                writer << "for (uint i" << var_idx << " = 0; i" << var_idx << " < " << i << "; ++i"
-                       << var_idx << ")\n";
-                writer.block_begin();
-                ++var_idx;
-            }
-            writer << "output" << access_dims(input0_shape, "i", axis) << " = " << aux_point_name
-                   << "(output" << access_dims(input0_shape, "i", axis) << ", input0"
-                   << access_dims(input0_shape) << ");\n";
-            // Closing brackets for loop
-            for (auto const& i : input0_shape)
-            {
-                writer.block_end();
-            }
-        }
-    } // End of function bracket
-    writer.block_end();
-    const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
-    const cldnn::custom_gpu_primitive op_product(output_name,
-                                                 {input0_name, input1_name},
-                                                 {writer.get_code()},
-                                                 entry_point_name,
-                                                 get_kernel_args(2, 1),
-                                                 "",
-                                                 layout,
-                                                 {1});
-    topology.add(op_product);
-}
--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.hpp
@@ -19,7 +19,6 @@
 #include <CPP/topology.hpp>
 #include "ngraph/axis_set.hpp"
-#include "ngraph/function.hpp"
 #include "ngraph/shape.hpp"
 namespace ngraph
@@ -37,17 +36,6 @@ namespace ngraph
                               const AxisSet& axis,
                               const std::string& operation,
                               const std::string& init_val);
-            void do_reduce_func_call(cldnn::topology& topology,
-                                     const std::string& input0_name,
-                                     const Shape& input0_shape,
-                                     const std::string& input1_name,
-                                     const Shape& input1_shape,
-                                     const std::string& output_name,
-                                     const Shape& output_shape,
-                                     const element::Type& output_type,
-                                     const AxisSet& axis,
-                                     std::vector<std::shared_ptr<Function>>& func);
        }
    }
 }