Commit b86eca4d authored by Anna Alberska's avatar Anna Alberska Committed by Robert Kimball

IntelGPU backend: TopK operation (#2736)

* add top_k operation

* modify topk definition

* Update intelgpu_backend.cpp

* Update intelgpu_op_custom_kernels.cpp

* add topk to graph visualization

* enable index_element_type in graph visualization

* minor changes
parent 5d74b489
......@@ -92,6 +92,7 @@
#include "ngraph/op/slice.hpp"
#include "ngraph/op/softmax.hpp"
#include "ngraph/op/sum.hpp"
#include "ngraph/op/topk.hpp"
#include "ngraph/parameter_vector.hpp"
#include "ngraph/util.hpp"
......@@ -2021,6 +2022,44 @@ shared_ptr<runtime::Executable>
topology.add(lrn);
break;
}
case OP_TYPEID::TopK:
{
arguments_check(op, 1, 2);
const shared_ptr<op::TopK> topk_op = static_pointer_cast<op::TopK>(op);
const size_t top_k_axis = topk_op->get_top_k_axis();
const element::Type& index_elem_type = topk_op->get_index_element_type();
const size_t k = topk_op->get_k();
const bool compute_max = topk_op->get_compute_max();
do_topk_operation(topology,
op->get_input_tensor_name(0),
op->get_input_shape(0),
op->get_input_element_type(0),
op->get_output_tensor_name(0),
op->get_output_shape(0),
op->get_output_element_type(0),
index_elem_type,
top_k_axis,
k,
compute_max,
true);
do_topk_operation(topology,
op->get_input_tensor_name(0),
op->get_input_shape(0),
op->get_input_element_type(0),
op->get_output_tensor_name(1),
op->get_output_shape(1),
op->get_output_element_type(1),
index_elem_type,
top_k_axis,
k,
compute_max,
false);
break;
}
case OP_TYPEID::AllReduce:
case OP_TYPEID::BroadcastDistributed:
case OP_TYPEID::BroadcastLike:
......@@ -2041,7 +2080,6 @@ shared_ptr<runtime::Executable>
case OP_TYPEID::ScalarConstantLike:
case OP_TYPEID::ShapeOf:
case OP_TYPEID::StopGradient:
case OP_TYPEID::TopK:
case OP_TYPEID::Transpose:
case OP_TYPEID::EmbeddingLookup:
case OP_TYPEID::DynBroadcast:
......
......@@ -2168,6 +2168,152 @@ void runtime::intelgpu::do_dequantize_operation(cldnn::topology& topology,
topology.add(op_dequantize);
}
void runtime::intelgpu::do_topk_operation(cldnn::topology& topology,
const std::string& input_name,
const Shape& input_shape,
const element::Type& input_type,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const element::Type& index_elem_type,
const size_t top_k_axis,
const size_t k,
const bool compute_max,
const bool find_indices)
{
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
const string entry_point_name = "topk_" + output_name;
CodeWriter writer;
const string operation_sign = compute_max ? " > " : " < ";
const string prev_operation_sign = !compute_max ? ">" : "<";
const size_t shape_size = input_shape.size();
gen_func_def(writer,
entry_point_name,
{get_opencl_type_name(input_type)},
{input_shape},
get_opencl_type_name(output_type),
output_shape);
writer.block_begin();
{
writer << get_opencl_type_name(input_type)
<< " prev_min_max = " << get_opencl_type_min_max_value(input_type, !compute_max)
<< ";\n";
writer << get_opencl_type_name(index_elem_type) << " prev_index = -2;\n";
writer << get_opencl_type_name(input_type)
<< " current_min_max = " << get_opencl_type_min_max_value(input_type, compute_max)
<< ";\n";
writer << get_opencl_type_name(index_elem_type) << " current_index = -1;\n";
size_t current_output = 0;
for (auto const& i : output_shape)
{
if (current_output != top_k_axis)
{
writer << "for (uint i" << current_output << " = 0; i" << current_output << " < "
<< i << "; ++i" << current_output << ")\n";
writer.block_begin();
}
++current_output;
}
writer << "prev_min_max = " << get_opencl_type_min_max_value(input_type, !compute_max)
<< ";\n";
writer << "prev_index = -2;\n";
writer << "for (uint i = 0; i < " << output_shape.at(top_k_axis) << "; ++i)\n";
writer.block_begin();
writer << "current_min_max = " << get_opencl_type_min_max_value(input_type, compute_max)
<< ";\n";
writer << "current_index = -1;\n";
writer << "for (uint j = 0; j < " << input_shape.at(top_k_axis) << "; ++j)\n";
writer.block_begin();
size_t current = 0;
string buffer;
for (uint j = 0; j < shape_size; ++j)
{
if (j == top_k_axis)
{
buffer += "[j]";
}
else
{
buffer += "[i" + to_string(current) + "]";
}
++current;
}
writer << "if (input0" << buffer << operation_sign << "current_min_max)\n";
writer.block_begin();
{
writer << "if (input0" << buffer << " " << prev_operation_sign
<< " prev_min_max || (input0" << buffer
<< " == prev_min_max && j > prev_index))\n";
writer.block_begin();
{
writer << "current_min_max = input0" << buffer << ";\n";
writer << "current_index = j;\n";
}
writer.block_end();
}
writer.block_end();
writer.block_end();
current = 0;
string outbuffer;
for (uint j = 0; j < shape_size; ++j)
{
if (j == top_k_axis)
{
outbuffer += "[i]";
}
else
{
outbuffer += "[i" + to_string(current) + "]";
}
++current;
}
if (find_indices == true)
{
writer << "output" << outbuffer << " = current_index;\n";
}
else
{
writer << "output" << outbuffer << " = current_min_max;\n";
}
writer << "prev_min_max = current_min_max;\n";
writer << "prev_index = current_index;\n";
writer.block_end();
current_output = 0;
for (auto const& i : output_shape)
{
if (current_output != top_k_axis)
{
writer.block_end();
}
++current_output;
}
}
writer.block_end();
const cldnn::custom_gpu_primitive op_topk(output_name,
{input_name},
{writer.get_code()},
entry_point_name,
get_kernel_args(1, 1),
"",
layout,
{1});
topology.add(op_topk);
}
size_t runtime::intelgpu::get_max_memory_rss()
{
size_t result = 0;
......
......@@ -266,6 +266,19 @@ namespace ngraph
const element::Type& output_type,
const AxisSet& axis);
void do_topk_operation(cldnn::topology& topology,
const std::string& input_name,
const Shape& input_shape,
const element::Type& input_type,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const element::Type& index_elem_type,
const size_t top_k_axis,
const size_t k,
const bool compute_max,
const bool find_indices);
// Helper functions used in cldnn::custom_gpu_primitive kernels
std::string get_opencl_type_name(const element::Type& ngraph_type);
std::string get_opencl_type_min_max_value(const element::Type& ngraph_type,
......
......@@ -22,30 +22,6 @@ shape_of_5d
shape_of_matrix
shape_of_scalar
shape_of_vector
topk_1d_max_all
topk_1d_max_one
topk_1d_max_partial
topk_1d_min_all
topk_1d_min_one
topk_1d_min_partial
topk_2d_max_all
topk_2d_max_one
topk_2d_max_one_with_equal_values
topk_2d_max_partial
topk_2d_min_all
topk_2d_min_one
topk_2d_min_partial
topk_3d_large_input_max
topk_3d_large_input_min
topk_3d_max_all
topk_3d_max_one
topk_3d_max_partial
topk_3d_min_all
topk_3d_min_one
topk_3d_min_partial
topk_3d_single_output
topk_5d_max_partial
topk_int64
floor_int32
# Unsupported extra padding modes
......
......@@ -45,6 +45,7 @@
#include "ngraph/op/reshape.hpp"
#include "ngraph/op/slice.hpp"
#include "ngraph/op/sum.hpp"
#include "ngraph/op/topk.hpp"
#include "ngraph/util.hpp"
using namespace ngraph;
......@@ -366,6 +367,17 @@ void print_node_parameters(ostringstream& writer, const shared_ptr<Node>& node)
conv_op_data->get_padding_below_forward());
break;
}
case OP_TYPEID::TopK:
{
const shared_ptr<op::TopK> topk_op = static_pointer_cast<op::TopK>(node);
writer << print_table_row_value("top_k_axis", topk_op->get_top_k_axis())
<< print_table_row_value("index_element_type", topk_op->get_index_element_type())
<< print_table_row_value("k", topk_op->get_k())
<< print_table_row_value("compute_max", topk_op->get_compute_max());
break;
}
case OP_TYPEID::UNDEFINED_OP:
default:
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment