IntelGPU backend: ArgMax, ArgMin, LRN operations + Add Quantize op into IntelGPU (#1697)

* add quantize op into intelgpu * IntelGPU backend: ArgMax, ArgMin, LRN operations * PR1697. Comments addressed * PR1697. Empty lines added

IntelGPU backend: ArgMax, ArgMin, LRN operations + Add Quantize op into IntelGPU (#1697)
* add quantize op into intelgpu * IntelGPU backend: ArgMax, ArgMin, LRN operations * PR1697. Comments addressed * PR1697. Empty lines added
35a4a32f · Anna Alberska · Robert Kimball · 7506133f · 35a4a32f · 35a4a32f
Commit 35a4a32f authored Sep 27, 2018 by Anna Alberska Committed by Robert Kimball Sep 27, 2018
4 changed files
--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
@@ -16,6 +16,7 @@
 #include <CPP/activation.hpp>
 #include <CPP/activation_grad.hpp>
+#include <CPP/arg_max_min.hpp>
 #include <CPP/batch_norm.hpp>
 #include <CPP/broadcast.hpp>
 #include <CPP/concatenation.hpp>
@@ -24,6 +25,7 @@
 #include <CPP/eltwise.hpp>
 #include <CPP/input_layout.hpp>
 #include <CPP/layout.hpp>
+#include <CPP/lrn.hpp>
 #include <CPP/permute.hpp>
 #include <CPP/pooling.hpp>
 #include <CPP/reorder.hpp>
@@ -49,6 +51,8 @@
 #include "ngraph/function.hpp"
 #include "ngraph/node.hpp"
+#include "ngraph/op/argmax.hpp"
+#include "ngraph/op/argmin.hpp"
 #include "ngraph/op/avg_pool.hpp"
 #include "ngraph/op/batch_norm.hpp"
 #include "ngraph/op/broadcast.hpp"
@@ -57,6 +61,7 @@
 #include "ngraph/op/convolution.hpp"
 #include "ngraph/op/dot.hpp"
 #include "ngraph/op/get_output_element.hpp"
+#include "ngraph/op/lrn.hpp"
 #include "ngraph/op/max.hpp"
 #include "ngraph/op/max_pool.hpp"
 #include "ngraph/op/min.hpp"
@@ -1232,11 +1237,85 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
                                 one_hot_axis);
            break;
        }
-        case OP_TYPEID::AllReduce:
        case OP_TYPEID::ArgMax:
+        {
+            arguments_check(op, 1, 1);
+            const shared_ptr<op::ArgMax> arg_max_op = static_pointer_cast<op::ArgMax>(op);
+            const size_t reduction_axis = arg_max_op->get_reduction_axis();
+            const element::Type& index_elem_type = arg_max_op->get_element_type();
+            if (index_elem_type == element::i64 || index_elem_type == element::i32)
+            {
+                do_arg_max_min_operation(topology,
+                                         get_input_name(op),
+                                         get_input_shape(op),
+                                         get_input_type(op),
+                                         get_output_name(op),
+                                         get_output_shape(op),
+                                         get_output_type(op),
+                                         reduction_axis,
+                                         true);
+            }
+            else
+            {
+                cldnn::arg_max_min::axis_name axis =
+                    reduction_axis == 0 ? cldnn::arg_max_min::y : cldnn::arg_max_min::x;
+                const cldnn::arg_max_min arg_max_min(
+                    get_output_name(op), get_input_name(op), cldnn::arg_max_min::max, 1, axis);
+                topology.add(arg_max_min);
+            }
+            break;
+        }
        case OP_TYPEID::ArgMin:
-        case OP_TYPEID::FunctionCall:
+        {
+            arguments_check(op, 1, 1);
+            const shared_ptr<op::ArgMin> arg_min_op = static_pointer_cast<op::ArgMin>(op);
+            const size_t reduction_axis = arg_min_op->get_reduction_axis();
+            const element::Type& index_elem_type = arg_min_op->get_element_type();
+            if (index_elem_type == element::i64 || index_elem_type == element::i32)
+            {
+                do_arg_max_min_operation(topology,
+                                         get_input_name(op),
+                                         get_input_shape(op),
+                                         get_input_type(op),
+                                         get_output_name(op),
+                                         get_output_shape(op),
+                                         get_output_type(op),
+                                         reduction_axis,
+                                         false);
+            }
+            else
+            {
+                cldnn::arg_max_min::axis_name axis =
+                    reduction_axis == 0 ? cldnn::arg_max_min::y : cldnn::arg_max_min::x;
+                const cldnn::arg_max_min arg_max_min(
+                    get_output_name(op), get_input_name(op), cldnn::arg_max_min::min, 1, axis);
+                topology.add(arg_max_min);
+            }
+            break;
+        }
        case OP_TYPEID::LRN:
+        {
+            arguments_check(op, 1, 1);
+            const shared_ptr<op::LRN> lrn_op = static_pointer_cast<op::LRN>(op);
+            const cldnn::lrn lrn(get_output_name(op),
+                                 get_input_name(op),
+                                 lrn_op->get_nsize(),
+                                 lrn_op->get_bias(),
+                                 lrn_op->get_alpha(),
+                                 lrn_op->get_beta(),
+                                 cldnn_lrn_norm_region_across_channel);
+            topology.add(lrn);
+            break;
+        }
+        case OP_TYPEID::AllReduce:
+        case OP_TYPEID::FunctionCall:
+        case OP_TYPEID::Quantize:
        case OP_TYPEID::Reduce:
        case OP_TYPEID::ReduceWindow:
        case OP_TYPEID::ReplaceSlice:

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
@@ -1258,7 +1258,7 @@ void runtime::intelgpu::do_one_hot_operation(cldnn::topology& topology,
            size_t current_input = 0;
            string buffer;
            const size_t output_shape_size = output_shape.size();
-            for (uint j = 0; j < output_shape_size; j++)
+            for (uint j = 0; j < output_shape_size; ++j)
            {
                if (j == one_hot_axis)
                {
@@ -1439,3 +1439,84 @@ void runtime::intelgpu::do_custom_eltwise_operation(cldnn::topology& topology,
                                                        gws);
    topology.add(op_custom_eltwise);
 }
+void runtime::intelgpu::do_arg_max_min_operation(cldnn::topology& topology,
+                                                 const string& input_name,
+                                                 const Shape& input_shape,
+                                                 const element::Type& input_type,
+                                                 const string& output_name,
+                                                 const Shape& output_shape,
+                                                 const element::Type& output_type,
+                                                 const size_t reduction_axis,
+                                                 const bool is_max)
+{
+    const string operation_name = is_max ? "max" : "min";
+    const string entry_point_name = "op_arg_" + operation_name + "_" + output_name;
+    codegen::CodeWriter writer;
+    vector<size_t> gws;
+    const string operation_sign = is_max ? " > " : " < ";
+    const string infinity = is_max ? "-INFINITY" : "INFINITY";
+    const string var_name = operation_name + "_val";
+    size_t current_input = 0;
+    string dims_buffer;
+    const size_t input_shape_size = input_shape.size();
+    for (uint j = 0; j < input_shape_size; ++j)
+    {
+        if (j == reduction_axis)
+        {
+            dims_buffer += "[i]";
+        }
+        else
+        {
+            dims_buffer += "[i" + to_string(current_input) + "]";
+            ++current_input;
+        }
+    }
+    gen_func_def(writer,
+                 entry_point_name,
+                 {get_opencl_type_name(input_type)},
+                 {input_shape},
+                 get_opencl_type_name(output_type),
+                 output_shape);
+    writer.block_begin();
+    {
+        gws = generate_loops(writer, output_shape, true);
+        writer << get_opencl_type_name(output_type) << " " << var_name << " = " << infinity
+               << ";\n";
+        writer << "uint index = -1;\n";
+        writer << "for (uint i = 0; i < " << input_shape.at(reduction_axis) << "; ++i)\n";
+        writer.block_begin();
+        {
+            writer << "if(i == 0 || input0" << dims_buffer << operation_sign << var_name << ")\n";
+            writer.block_begin();
+            {
+                writer << var_name << " = input0" << dims_buffer << ";\n";
+                writer << "index = i;\n";
+            }
+            writer.block_end();
+        }
+        writer.block_end();
+        writer << "output" << access_dims(output_shape) << " = index;\n";
+        generate_loops(writer, output_shape, false);
+    }
+    writer.block_end();
+    const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
+    const cldnn::custom_gpu_primitive op_arg_max_min(output_name,
+                                                     {input_name},
+                                                     {writer.get_code()},
+                                                     entry_point_name,
+                                                     get_kernel_args(1, 1),
+                                                     "",
+                                                     layout,
+                                                     gws);
+    topology.add(op_arg_max_min);
+}
--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
@@ -165,6 +165,16 @@ namespace ngraph
                                             const element::Type& output_type,
                                             const CUSTOM_ELTWISE operation_name);
+            void do_arg_max_min_operation(cldnn::topology& topology,
+                                          const std::string& input_name,
+                                          const Shape& input_shape,
+                                          const element::Type& input_type,
+                                          const std::string& output_name,
+                                          const Shape& output_shape,
+                                          const element::Type& output_type,
+                                          const size_t reduction_axis,
+                                          const bool is_max);
            // Helper functions used in cldnn::custom_gpu_primitive kernels
            std::string get_opencl_type_name(const element::Type& ngraph_type);
            std::vector<cldnn_arg> get_kernel_args(size_t input, size_t output);

--- a/src/ngraph/runtime/intelgpu/unit_test.manifest
+++ b/src/ngraph/runtime/intelgpu/unit_test.manifest
-argmax_trivial
-argmin_trivial
 avg_pool_2d_2channel_2image_padded_only_above
 avg_pool_3d
 backwards_batch_norm_three_outputs
@@ -19,10 +17,13 @@ batch_norm_three_outputs
 divide_by_zero_int32
 dot_matrix_vector_int64
 function_call
-lrn
 max_pool_3d
 numeric_double_inf
 numeric_double_nan
+quantize
+quantize_axes
+quantize_int8
+quantize_clamp
 reduce_3d_to_vector
 reduce_matrix_cols_zero
 reduce_matrix_columns