IntelGPU backend: TopK operation (#2736)

* add top_k operation * modify topk definition * Update intelgpu_backend.cpp * Update intelgpu_op_custom_kernels.cpp * add topk to graph visualization * enable index_element_type in graph visualization * minor changes

IntelGPU backend: TopK operation (#2736)
* add top_k operation * modify topk definition * Update intelgpu_backend.cpp * Update intelgpu_op_custom_kernels.cpp * add topk to graph visualization * enable index_element_type in graph visualization * minor changes
b86eca4d · Anna Alberska · Robert Kimball · 5d74b489 · b86eca4d · b86eca4d
Commit b86eca4d authored Apr 11, 2019 by Anna Alberska Committed by Robert Kimball Apr 11, 2019
5 changed files
--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
@@ -92,6 +92,7 @@
 #include "ngraph/op/slice.hpp"
 #include "ngraph/op/softmax.hpp"
 #include "ngraph/op/sum.hpp"
+#include "ngraph/op/topk.hpp"
 #include "ngraph/parameter_vector.hpp"
 #include "ngraph/util.hpp"

@@ -2021,6 +2022,44 @@ shared_ptr<runtime::Executable>
            topology.add(lrn);
            break;
        }
+        case OP_TYPEID::TopK:
+        {
+            arguments_check(op, 1, 2);
+
+            const shared_ptr<op::TopK> topk_op = static_pointer_cast<op::TopK>(op);
+
+            const size_t top_k_axis = topk_op->get_top_k_axis();
+            const element::Type& index_elem_type = topk_op->get_index_element_type();
+            const size_t k = topk_op->get_k();
+            const bool compute_max = topk_op->get_compute_max();
+
+            do_topk_operation(topology,
+                              op->get_input_tensor_name(0),
+                              op->get_input_shape(0),
+                              op->get_input_element_type(0),
+                              op->get_output_tensor_name(0),
+                              op->get_output_shape(0),
+                              op->get_output_element_type(0),
+                              index_elem_type,
+                              top_k_axis,
+                              k,
+                              compute_max,
+                              true);
+
+            do_topk_operation(topology,
+                              op->get_input_tensor_name(0),
+                              op->get_input_shape(0),
+                              op->get_input_element_type(0),
+                              op->get_output_tensor_name(1),
+                              op->get_output_shape(1),
+                              op->get_output_element_type(1),
+                              index_elem_type,
+                              top_k_axis,
+                              k,
+                              compute_max,
+                              false);
+            break;
+        }
        case OP_TYPEID::AllReduce:
        case OP_TYPEID::BroadcastDistributed:
        case OP_TYPEID::BroadcastLike:
@@ -2041,7 +2080,6 @@ shared_ptr<runtime::Executable>
        case OP_TYPEID::ScalarConstantLike:
        case OP_TYPEID::ShapeOf:
        case OP_TYPEID::StopGradient:
-        case OP_TYPEID::TopK:
        case OP_TYPEID::Transpose:
        case OP_TYPEID::EmbeddingLookup:
        case OP_TYPEID::DynBroadcast:

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
@@ -2168,6 +2168,152 @@ void runtime::intelgpu::do_dequantize_operation(cldnn::topology& topology,
    topology.add(op_dequantize);
 }

+void runtime::intelgpu::do_topk_operation(cldnn::topology& topology,
+                                          const std::string& input_name,
+                                          const Shape& input_shape,
+                                          const element::Type& input_type,
+                                          const std::string& output_name,
+                                          const Shape& output_shape,
+                                          const element::Type& output_type,
+                                          const element::Type& index_elem_type,
+                                          const size_t top_k_axis,
+                                          const size_t k,
+                                          const bool compute_max,
+                                          const bool find_indices)
+{
+    const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
+    const string entry_point_name = "topk_" + output_name;
+    CodeWriter writer;
+    const string operation_sign = compute_max ? " > " : " < ";
+    const string prev_operation_sign = !compute_max ? ">" : "<";
+    const size_t shape_size = input_shape.size();
+
+    gen_func_def(writer,
+                 entry_point_name,
+                 {get_opencl_type_name(input_type)},
+                 {input_shape},
+                 get_opencl_type_name(output_type),
+                 output_shape);
+
+    writer.block_begin();
+    {
+        writer << get_opencl_type_name(input_type)
+               << " prev_min_max = " << get_opencl_type_min_max_value(input_type, !compute_max)
+               << ";\n";
+        writer << get_opencl_type_name(index_elem_type) << " prev_index = -2;\n";
+        writer << get_opencl_type_name(input_type)
+               << " current_min_max = " << get_opencl_type_min_max_value(input_type, compute_max)
+               << ";\n";
+        writer << get_opencl_type_name(index_elem_type) << " current_index = -1;\n";
+
+        size_t current_output = 0;
+        for (auto const& i : output_shape)
+        {
+            if (current_output != top_k_axis)
+            {
+                writer << "for (uint i" << current_output << " = 0; i" << current_output << " < "
+                       << i << "; ++i" << current_output << ")\n";
+                writer.block_begin();
+            }
+            ++current_output;
+        }
+
+        writer << "prev_min_max = " << get_opencl_type_min_max_value(input_type, !compute_max)
+               << ";\n";
+        writer << "prev_index = -2;\n";
+
+        writer << "for (uint i = 0; i < " << output_shape.at(top_k_axis) << "; ++i)\n";
+        writer.block_begin();
+
+        writer << "current_min_max = " << get_opencl_type_min_max_value(input_type, compute_max)
+               << ";\n";
+        writer << "current_index = -1;\n";
+
+        writer << "for (uint j = 0; j < " << input_shape.at(top_k_axis) << "; ++j)\n";
+        writer.block_begin();
+
+        size_t current = 0;
+        string buffer;
+        for (uint j = 0; j < shape_size; ++j)
+        {
+            if (j == top_k_axis)
+            {
+                buffer += "[j]";
+            }
+            else
+            {
+                buffer += "[i" + to_string(current) + "]";
+            }
+            ++current;
+        }
+
+        writer << "if (input0" << buffer << operation_sign << "current_min_max)\n";
+        writer.block_begin();
+        {
+            writer << "if (input0" << buffer << " " << prev_operation_sign
+                   << " prev_min_max || (input0" << buffer
+                   << " == prev_min_max && j > prev_index))\n";
+            writer.block_begin();
+            {
+                writer << "current_min_max = input0" << buffer << ";\n";
+                writer << "current_index = j;\n";
+            }
+            writer.block_end();
+        }
+        writer.block_end();
+
+        writer.block_end();
+
+        current = 0;
+        string outbuffer;
+        for (uint j = 0; j < shape_size; ++j)
+        {
+            if (j == top_k_axis)
+            {
+                outbuffer += "[i]";
+            }
+            else
+            {
+                outbuffer += "[i" + to_string(current) + "]";
+            }
+            ++current;
+        }
+
+        if (find_indices == true)
+        {
+            writer << "output" << outbuffer << " = current_index;\n";
+        }
+        else
+        {
+            writer << "output" << outbuffer << " = current_min_max;\n";
+        }
+        writer << "prev_min_max = current_min_max;\n";
+        writer << "prev_index = current_index;\n";
+
+        writer.block_end();
+        current_output = 0;
+        for (auto const& i : output_shape)
+        {
+            if (current_output != top_k_axis)
+            {
+                writer.block_end();
+            }
+            ++current_output;
+        }
+    }
+    writer.block_end();
+
+    const cldnn::custom_gpu_primitive op_topk(output_name,
+                                              {input_name},
+                                              {writer.get_code()},
+                                              entry_point_name,
+                                              get_kernel_args(1, 1),
+                                              "",
+                                              layout,
+                                              {1});
+    topology.add(op_topk);
+}
+
 size_t runtime::intelgpu::get_max_memory_rss()
 {
    size_t result = 0;

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
@@ -266,6 +266,19 @@ namespace ngraph
                                         const element::Type& output_type,
                                         const AxisSet& axis);

+            void do_topk_operation(cldnn::topology& topology,
+                                   const std::string& input_name,
+                                   const Shape& input_shape,
+                                   const element::Type& input_type,
+                                   const std::string& output_name,
+                                   const Shape& output_shape,
+                                   const element::Type& output_type,
+                                   const element::Type& index_elem_type,
+                                   const size_t top_k_axis,
+                                   const size_t k,
+                                   const bool compute_max,
+                                   const bool find_indices);
+
            // Helper functions used in cldnn::custom_gpu_primitive kernels
            std::string get_opencl_type_name(const element::Type& ngraph_type);
            std::string get_opencl_type_min_max_value(const element::Type& ngraph_type,

--- a/src/ngraph/runtime/intelgpu/unit_test.manifest
+++ b/src/ngraph/runtime/intelgpu/unit_test.manifest
@@ -22,30 +22,6 @@ shape_of_5d
 shape_of_matrix
 shape_of_scalar
 shape_of_vector
-topk_1d_max_all
-topk_1d_max_one
-topk_1d_max_partial
-topk_1d_min_all
-topk_1d_min_one
-topk_1d_min_partial
-topk_2d_max_all
-topk_2d_max_one
-topk_2d_max_one_with_equal_values
-topk_2d_max_partial
-topk_2d_min_all
-topk_2d_min_one
-topk_2d_min_partial
-topk_3d_large_input_max
-topk_3d_large_input_min
-topk_3d_max_all
-topk_3d_max_one
-topk_3d_max_partial
-topk_3d_min_all
-topk_3d_min_one
-topk_3d_min_partial
-topk_3d_single_output
-topk_5d_max_partial
-topk_int64
 floor_int32

 # Unsupported extra padding modes

--- a/src/ngraph/runtime/intelgpu/visualize_tree.cpp
+++ b/src/ngraph/runtime/intelgpu/visualize_tree.cpp
@@ -45,6 +45,7 @@
 #include "ngraph/op/reshape.hpp"
 #include "ngraph/op/slice.hpp"
 #include "ngraph/op/sum.hpp"
+#include "ngraph/op/topk.hpp"
 #include "ngraph/util.hpp"

 using namespace ngraph;
@@ -366,6 +367,17 @@ void print_node_parameters(ostringstream& writer, const shared_ptr<Node>& node)
                                       conv_op_data->get_padding_below_forward());
        break;
    }
+    case OP_TYPEID::TopK:
+    {
+        const shared_ptr<op::TopK> topk_op = static_pointer_cast<op::TopK>(node);
+
+        writer << print_table_row_value("top_k_axis", topk_op->get_top_k_axis())
+               << print_table_row_value("index_element_type", topk_op->get_index_element_type())
+               << print_table_row_value("k", topk_op->get_k())
+               << print_table_row_value("compute_max", topk_op->get_compute_max());
+
+        break;
+    }
    case OP_TYPEID::UNDEFINED_OP:
    default:
    {