Commit 35a4a32f authored by Anna Alberska's avatar Anna Alberska Committed by Robert Kimball

IntelGPU backend: ArgMax, ArgMin, LRN operations + Add Quantize op into IntelGPU (#1697)

* add quantize op into intelgpu

* IntelGPU backend: ArgMax, ArgMin, LRN operations

* PR1697. Comments addressed

* PR1697. Empty lines added
parent 7506133f
......@@ -16,6 +16,7 @@
#include <CPP/activation.hpp>
#include <CPP/activation_grad.hpp>
#include <CPP/arg_max_min.hpp>
#include <CPP/batch_norm.hpp>
#include <CPP/broadcast.hpp>
#include <CPP/concatenation.hpp>
......@@ -24,6 +25,7 @@
#include <CPP/eltwise.hpp>
#include <CPP/input_layout.hpp>
#include <CPP/layout.hpp>
#include <CPP/lrn.hpp>
#include <CPP/permute.hpp>
#include <CPP/pooling.hpp>
#include <CPP/reorder.hpp>
......@@ -49,6 +51,8 @@
#include "ngraph/function.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/argmax.hpp"
#include "ngraph/op/argmin.hpp"
#include "ngraph/op/avg_pool.hpp"
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/broadcast.hpp"
......@@ -57,6 +61,7 @@
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/dot.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/lrn.hpp"
#include "ngraph/op/max.hpp"
#include "ngraph/op/max_pool.hpp"
#include "ngraph/op/min.hpp"
......@@ -1232,11 +1237,85 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
one_hot_axis);
break;
}
case OP_TYPEID::AllReduce:
case OP_TYPEID::ArgMax:
{
arguments_check(op, 1, 1);
const shared_ptr<op::ArgMax> arg_max_op = static_pointer_cast<op::ArgMax>(op);
const size_t reduction_axis = arg_max_op->get_reduction_axis();
const element::Type& index_elem_type = arg_max_op->get_element_type();
if (index_elem_type == element::i64 || index_elem_type == element::i32)
{
do_arg_max_min_operation(topology,
get_input_name(op),
get_input_shape(op),
get_input_type(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
reduction_axis,
true);
}
else
{
cldnn::arg_max_min::axis_name axis =
reduction_axis == 0 ? cldnn::arg_max_min::y : cldnn::arg_max_min::x;
const cldnn::arg_max_min arg_max_min(
get_output_name(op), get_input_name(op), cldnn::arg_max_min::max, 1, axis);
topology.add(arg_max_min);
}
break;
}
case OP_TYPEID::ArgMin:
case OP_TYPEID::FunctionCall:
{
arguments_check(op, 1, 1);
const shared_ptr<op::ArgMin> arg_min_op = static_pointer_cast<op::ArgMin>(op);
const size_t reduction_axis = arg_min_op->get_reduction_axis();
const element::Type& index_elem_type = arg_min_op->get_element_type();
if (index_elem_type == element::i64 || index_elem_type == element::i32)
{
do_arg_max_min_operation(topology,
get_input_name(op),
get_input_shape(op),
get_input_type(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
reduction_axis,
false);
}
else
{
cldnn::arg_max_min::axis_name axis =
reduction_axis == 0 ? cldnn::arg_max_min::y : cldnn::arg_max_min::x;
const cldnn::arg_max_min arg_max_min(
get_output_name(op), get_input_name(op), cldnn::arg_max_min::min, 1, axis);
topology.add(arg_max_min);
}
break;
}
case OP_TYPEID::LRN:
{
arguments_check(op, 1, 1);
const shared_ptr<op::LRN> lrn_op = static_pointer_cast<op::LRN>(op);
const cldnn::lrn lrn(get_output_name(op),
get_input_name(op),
lrn_op->get_nsize(),
lrn_op->get_bias(),
lrn_op->get_alpha(),
lrn_op->get_beta(),
cldnn_lrn_norm_region_across_channel);
topology.add(lrn);
break;
}
case OP_TYPEID::AllReduce:
case OP_TYPEID::FunctionCall:
case OP_TYPEID::Quantize:
case OP_TYPEID::Reduce:
case OP_TYPEID::ReduceWindow:
case OP_TYPEID::ReplaceSlice:
......
......@@ -1258,7 +1258,7 @@ void runtime::intelgpu::do_one_hot_operation(cldnn::topology& topology,
size_t current_input = 0;
string buffer;
const size_t output_shape_size = output_shape.size();
for (uint j = 0; j < output_shape_size; j++)
for (uint j = 0; j < output_shape_size; ++j)
{
if (j == one_hot_axis)
{
......@@ -1439,3 +1439,84 @@ void runtime::intelgpu::do_custom_eltwise_operation(cldnn::topology& topology,
gws);
topology.add(op_custom_eltwise);
}
void runtime::intelgpu::do_arg_max_min_operation(cldnn::topology& topology,
const string& input_name,
const Shape& input_shape,
const element::Type& input_type,
const string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const size_t reduction_axis,
const bool is_max)
{
const string operation_name = is_max ? "max" : "min";
const string entry_point_name = "op_arg_" + operation_name + "_" + output_name;
codegen::CodeWriter writer;
vector<size_t> gws;
const string operation_sign = is_max ? " > " : " < ";
const string infinity = is_max ? "-INFINITY" : "INFINITY";
const string var_name = operation_name + "_val";
size_t current_input = 0;
string dims_buffer;
const size_t input_shape_size = input_shape.size();
for (uint j = 0; j < input_shape_size; ++j)
{
if (j == reduction_axis)
{
dims_buffer += "[i]";
}
else
{
dims_buffer += "[i" + to_string(current_input) + "]";
++current_input;
}
}
gen_func_def(writer,
entry_point_name,
{get_opencl_type_name(input_type)},
{input_shape},
get_opencl_type_name(output_type),
output_shape);
writer.block_begin();
{
gws = generate_loops(writer, output_shape, true);
writer << get_opencl_type_name(output_type) << " " << var_name << " = " << infinity
<< ";\n";
writer << "uint index = -1;\n";
writer << "for (uint i = 0; i < " << input_shape.at(reduction_axis) << "; ++i)\n";
writer.block_begin();
{
writer << "if(i == 0 || input0" << dims_buffer << operation_sign << var_name << ")\n";
writer.block_begin();
{
writer << var_name << " = input0" << dims_buffer << ";\n";
writer << "index = i;\n";
}
writer.block_end();
}
writer.block_end();
writer << "output" << access_dims(output_shape) << " = index;\n";
generate_loops(writer, output_shape, false);
}
writer.block_end();
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
const cldnn::custom_gpu_primitive op_arg_max_min(output_name,
{input_name},
{writer.get_code()},
entry_point_name,
get_kernel_args(1, 1),
"",
layout,
gws);
topology.add(op_arg_max_min);
}
......@@ -165,6 +165,16 @@ namespace ngraph
const element::Type& output_type,
const CUSTOM_ELTWISE operation_name);
void do_arg_max_min_operation(cldnn::topology& topology,
const std::string& input_name,
const Shape& input_shape,
const element::Type& input_type,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const size_t reduction_axis,
const bool is_max);
// Helper functions used in cldnn::custom_gpu_primitive kernels
std::string get_opencl_type_name(const element::Type& ngraph_type);
std::vector<cldnn_arg> get_kernel_args(size_t input, size_t output);
......
argmax_trivial
argmin_trivial
avg_pool_2d_2channel_2image_padded_only_above
avg_pool_3d
backwards_batch_norm_three_outputs
......@@ -19,10 +17,13 @@ batch_norm_three_outputs
divide_by_zero_int32
dot_matrix_vector_int64
function_call
lrn
max_pool_3d
numeric_double_inf
numeric_double_nan
quantize
quantize_axes
quantize_int8
quantize_clamp
reduce_3d_to_vector
reduce_matrix_cols_zero
reduce_matrix_columns
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment