IntelGPU backend: Workaround for unsupported data types (#1572)

446cf07b · shssf · Robert Kimball · b21ff63d · 446cf07b · 446cf07b
Commit 446cf07b authored Sep 07, 2018 by shssf Committed by Robert Kimball Sep 07, 2018
7 changed files
--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
@@ -109,15 +109,6 @@ static const element::Type& get_output_type(const shared_ptr<Node>& op, size_t n
    return op->get_outputs().at(num).get_tensor().get_element_type();
 }
-static void argument_type_check(const element::Type& type)
-{
-    if (type != element::f32 && type != element::boolean)
-    {
-        throw invalid_argument("Kernel data type \"" + type.c_type_string() +
-                               "\" is not supported.");
-    }
-}
 static void do_eltwise_operation(cldnn::topology& topology,
                                 const shared_ptr<Node>& op,
                                 cldnn::eltwise_mode mode)
@@ -168,16 +159,13 @@ static void do_logical_operation(cldnn::topology& topology,
                                 const string& operation)
 {
    arguments_check(op, 2, 1);
-    argument_type_check(get_input_type(op, 0));
-    argument_type_check(get_input_type(op, 1));
    runtime::intelgpu::do_logic_kernel(topology,
                                       get_input_name(op, 0),
                                       get_input_shape(op, 0),
-                                       get_input_type(op, 0).c_type_string(),
+                                       get_input_type(op, 0),
                                       get_input_name(op, 1),
                                       get_input_shape(op, 1),
-                                       get_input_type(op, 1).c_type_string(),
                                       get_output_name(op),
                                       get_output_shape(op),
                                       get_output_type(op),
@@ -548,6 +536,7 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
                do_bcast_sum_operation(topology,
                                       get_input_name(op),
                                       get_input_shape(op),
+                                       get_input_type(op),
                                       get_output_name(op),
                                       get_output_shape(op),
                                       get_output_type(op),
@@ -571,6 +560,7 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
                do_bcast_sum_operation(topology,
                                       get_input_name(op),
                                       get_input_shape(op),
+                                       get_input_type(op),
                                       get_output_name(op),
                                       get_output_shape(op),
                                       get_output_type(op),

--- a/src/ngraph/runtime/intelgpu/intelgpu_layout.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_layout.cpp
@@ -123,9 +123,23 @@ cldnn::tensor runtime::intelgpu::IntelGPULayout::create_cldnn_offset(const Shape
 cldnn::layout runtime::intelgpu::IntelGPULayout::create_cldnn_layout(
    const ngraph::element::Type& element_type, const Shape& element_shape)
 {
-    const cldnn::data_types data_type = get_cldnn_type(element_type);
    const cldnn::format::type format = cldnn::format::bfyx;
-    const cldnn::tensor tensor = create_cldnn_tensor(element_shape);
+    cldnn::data_types data_type;
+    cldnn::tensor tensor;
+    // This is workaround for data types that are not supported by clDNN
+    // If the type is not supported, it treated as char*
+    // Example, "int64_t input[2, 3, 4]" will be "char input[192]"
+    if ((element_type == ngraph::element::i64) || (element_type == ngraph::element::i32))
+    {
+        data_type = cldnn::data_types::i8;
+        tensor = create_cldnn_tensor({shape_size(element_shape) * element_type.size()});
+    }
+    else
+    {
+        data_type = get_cldnn_type(element_type);
+        tensor = create_cldnn_tensor(element_shape);
+    }
    return cldnn::layout(data_type, format, tensor);
 }

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.cpp
@@ -31,6 +31,7 @@ using namespace ngraph;
 void runtime::intelgpu::do_bcast_sum_operation(cldnn::topology& topology,
                                               const string& input_name,
                                               const Shape& input_shape,
+                                               const element::Type& input_type,
                                               const string& output_name,
                                               const Shape& output_shape,
                                               const element::Type& output_type,
@@ -42,8 +43,12 @@ void runtime::intelgpu::do_bcast_sum_operation(cldnn::topology& topology,
    codegen::CodeWriter writer;
    vector<size_t> gws;
-    runtime::intelgpu::gen_func_def(
+    runtime::intelgpu::gen_func_def(writer,
-        writer, function_name, {"float"}, {input_shape}, "float", output_shape);
+                                    function_name,
+                                    {get_opencl_type_name(input_type)},
+                                    {input_shape},
+                                    get_opencl_type_name(output_type),
+                                    output_shape);
    writer.block_begin();
    {
        if (is_bcast)
@@ -63,7 +68,11 @@ void runtime::intelgpu::do_bcast_sum_operation(cldnn::topology& topology,
                "output" + access_dims(input_shape, "i", axis) + " = result;\n";
            // Generate loops related to input order with GWS
-            gws = generate_loops_w_axes(writer, input_shape, true, axis, "float result = 0.0f;\n");
+            gws = generate_loops_w_axes(writer,
+                                        input_shape,
+                                        true,
+                                        axis,
+                                        get_opencl_type_name(output_type) + " result = 0.0f;\n");
            writer << "result += input0" << access_dims(input_shape) << ";\n";

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp
@@ -33,6 +33,7 @@ namespace ngraph
            void do_bcast_sum_operation(cldnn::topology& topology,
                                        const std::string& input_name,
                                        const Shape& input_shape,
+                                        const element::Type& input_type,
                                        const std::string& output_name,
                                        const Shape& output_shape,
                                        const element::Type& output_type,

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
@@ -26,6 +26,22 @@
 using namespace std;
 using namespace ngraph;
+string runtime::intelgpu::get_opencl_type_name(const element::Type& ngraph_type)
+{
+    if (ngraph_type == ngraph::element::i64)
+    {
+        return "long";
+    }
+    else if (ngraph_type == ngraph::element::i32)
+    {
+        return "int";
+    }
+    else
+    {
+        return ngraph_type.c_type_string();
+    }
+}
 vector<cldnn_arg> runtime::intelgpu::get_kernel_args(size_t input, size_t output)
 {
    vector<cldnn_arg> result;
@@ -1094,10 +1110,9 @@ void runtime::intelgpu::do_select_operation(cldnn::topology& topology,
 void runtime::intelgpu::do_logic_kernel(cldnn::topology& topology,
                                        const string& input0_name,
                                        const Shape& input0_shape,
-                                        const string& input0_type,
+                                        const element::Type& input0_type,
                                        const string& input1_name,
                                        const Shape& input1_shape,
-                                        const string& input1_type,
                                        const string& output_name,
                                        const Shape& output_shape,
                                        const element::Type& output_type,
@@ -1110,9 +1125,9 @@ void runtime::intelgpu::do_logic_kernel(cldnn::topology& topology,
    gen_func_def(writer,
                 entry_point_name,
-                 {2, input0_type},
+                 {2, get_opencl_type_name(input0_type)},
                 {input0_shape, input1_shape},
-                 "char",
+                 get_opencl_type_name(output_type),
                 output_shape);
    writer.block_begin();
@@ -1228,9 +1243,9 @@ void runtime::intelgpu::do_one_hot_operation(cldnn::topology& topology,
    gen_func_def(writer,
                 entry_point_name,
-                 {input_type.c_type_string()},
+                 {get_opencl_type_name(input_type)},
                 {input_shape},
-                 output_type.c_type_string(),
+                 get_opencl_type_name(output_type),
                 output_shape);
    writer.block_begin();
@@ -1286,8 +1301,8 @@ void runtime::intelgpu::do_convert_operation(cldnn::topology& topology,
 {
    const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
    const string entry_point_name = "convert_" + output_name;
-    const string& input_type_name = input_type.c_type_string();
+    const string& input_type_name = get_opencl_type_name(input_type);
-    const string& output_type_name = output_type.c_type_string();
+    const string& output_type_name = get_opencl_type_name(output_type);
    codegen::CodeWriter writer;
    vector<size_t> gws;

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
@@ -96,12 +96,11 @@ namespace ngraph
                                     const element::Type& output_type);
            void do_logic_kernel(cldnn::topology& topology,
-                                 const std::string& inputA_name,
+                                 const std::string& input0_name,
-                                 const Shape& inputA_shape,
+                                 const Shape& input0_shape,
-                                 const std::string& inputA_type,
+                                 const element::Type& input0_type,
-                                 const std::string& inputB_name,
+                                 const std::string& input1_name,
-                                 const Shape& inputB_shape,
+                                 const Shape& input1_shape,
-                                 const std::string& inputB_type,
                                 const std::string& output_name,
                                 const Shape& output_shape,
                                 const element::Type& output_type,
@@ -149,6 +148,7 @@ namespace ngraph
                                               const element::Type& output_type);
            // Helper functions used in cldnn::custom_gpu_primitive kernels
+            std::string get_opencl_type_name(const element::Type& ngraph_type);
            std::vector<cldnn_arg> get_kernel_args(size_t input, size_t output);
            std::string array_dims(const Shape& dimentions, const AxisSet& axis = {});
            std::string access_dims(const Shape& dimentions,

--- a/src/ngraph/runtime/intelgpu/unit_test.manifest
+++ b/src/ngraph/runtime/intelgpu/unit_test.manifest
-abc_int64
+argmax_trivial
-aliased_output
+argmin_trivial
 atan
 avg_pool_2d_2channel_2image_padded_only_above
 avg_pool_3d
 backwards_abs
+backwards_acos
 backwards_atan
-backwards_avgpool_n1_c1_hw2x2
-backwards_avgpool_n1_c1_hw4x4
-backwards_avgpool_n2_c2_hw4x4
 backwards_batch_norm_three_outputs
 backwards_ceiling
 backwards_dot_scalar_tensor
 backwards_dot_tensor3_tensor3
 backwards_dot_tensor_scalar
 backwards_dot_tensor_vector
-backwards_exp
 backwards_floor
 backwards_maxpool_n2_c1_hw5_3x3_str2_max
 backwards_maxpool_n4_c1_hw4_2x2_max
@@ -24,15 +21,11 @@ backwards_reverse_sequence_n4d2c3h2w2
 backwards_sign
 backwards_slice
 backwards_tan
-backwards_tanh
 batch_norm_one_output
 batch_norm_three_outputs
-broadcast_vector_rowwise_int64
 ceiling
 concat_matrix_int64
-constant_multi_use
+convolution_outlining
-convert_int32_bool
-convert_int32_float32
 divide_by_zero_int32
 dot_matrix_vector_int64
 floor
@@ -41,15 +34,6 @@ lrn
 max_pool_3d
 numeric_double_inf
 numeric_double_nan
-one_hot_matrix_0
-one_hot_scalar_0_in_3
-one_hot_scalar_1_in_3
-one_hot_scalar_2_in_3
-one_hot_scalar_oob_in_3
-one_hot_vector_0
-one_hot_vector_1
-one_hot_vector_1_barely_oob
-one_hot_vector_1_far_oob
 reduce_3d_to_vector
 reduce_matrix_cols_zero
 reduce_matrix_columns
@@ -74,16 +58,11 @@ reshape_6d
 reverse_sequence_n2c3h4w2
 reverse_sequence_n4c3h2w2
 reverse_sequence_n4d2c3h2w2
-scalar_constant_int64
 select_and_scatter_3d_without_overlap
-select_and_scatter_without_overlap
 select_and_scatter_with_overlap
+select_and_scatter_without_overlap
 sign
 tan
-tensor_2constant
-tensor_constant_int64
-validate_call_input_type
-validate_call_output_type
 zero_sized_abs
 zero_sized_acos
 zero_sized_add