Commit a0ab82d8 authored by Sergey Shalnov's avatar Sergey Shalnov Committed by Robert Kimball

IntelGPU backend: Quantize operations (#2465)

* IntelGPU backend: Quantize operations

* Update intelgpu_op_custom_kernels.cpp
parent 25d23a8d
......@@ -72,6 +72,7 @@
#include "ngraph/op/concat.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/dot.hpp"
#include "ngraph/op/embedding_lookup.hpp"
#include "ngraph/op/get_output_element.hpp"
......@@ -82,6 +83,7 @@
#include "ngraph/op/one_hot.hpp"
#include "ngraph/op/pad.hpp"
#include "ngraph/op/product.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/reshape.hpp"
#include "ngraph/op/reverse.hpp"
#include "ngraph/op/slice.hpp"
......@@ -1775,6 +1777,52 @@ shared_ptr<runtime::Executable>
}
break;
}
case OP_TYPEID::Quantize:
{
arguments_check(op, 3, 1);
const shared_ptr<op::Quantize> quant_op = static_pointer_cast<op::Quantize>(op);
const AxisSet& axes = quant_op->get_axes();
const op::Quantize::RoundMode mode = quant_op->get_round_mode();
do_quantize_operation(topology,
get_input_name(op, 0),
get_input_shape(op, 0),
get_input_type(op, 0),
get_input_name(op, 1),
get_input_shape(op, 1),
get_input_name(op, 2),
get_input_shape(op, 2),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
axes,
mode);
break;
}
case OP_TYPEID::Dequantize:
{
arguments_check(op, 3, 1);
const shared_ptr<op::Dequantize> dequ_op = static_pointer_cast<op::Dequantize>(op);
const AxisSet& axes = dequ_op->get_axes();
do_dequantize_operation(topology,
get_input_name(op, 0),
get_input_shape(op, 0),
get_input_type(op, 0),
get_input_name(op, 1),
get_input_shape(op, 1),
get_input_type(op, 1),
get_input_name(op, 2),
get_input_shape(op, 2),
get_input_type(op, 2),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
axes);
break;
}
case OP_TYPEID::LRN:
{
arguments_check(op, 1, 1);
......@@ -1793,8 +1841,6 @@ shared_ptr<runtime::Executable>
}
case OP_TYPEID::AllReduce:
case OP_TYPEID::BroadcastLike:
case OP_TYPEID::Dequantize:
case OP_TYPEID::Quantize:
case OP_TYPEID::QuantizedAvgPool:
case OP_TYPEID::QuantizedConvolutionBias:
case OP_TYPEID::QuantizedConvolutionBiasAdd:
......
......@@ -1480,6 +1480,191 @@ void runtime::intelgpu::do_reshape_operation(cldnn::topology& topology,
topology.add(op_reshape);
}
void runtime::intelgpu::do_quantize_operation(cldnn::topology& topology,
const string& input0_name,
const Shape& input0_shape,
const element::Type& input0_type,
const string& input1_name,
const Shape& input1_shape,
const string& input2_name,
const Shape& input2_shape,
const string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const AxisSet& axis,
const ngraph::op::Quantize::RoundMode mode)
{
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
const string entry_point_name = "quantize_" + output_name;
const string real_type_str = get_opencl_type_name(input0_type);
const string quant_type_str = get_opencl_type_name(output_type);
codegen::CodeWriter writer;
vector<size_t> gws;
gen_func_def(writer,
entry_point_name,
{real_type_str, real_type_str, quant_type_str},
{input0_shape, input1_shape, input2_shape},
quant_type_str,
output_shape);
writer.block_begin();
{
writer << "// " << axis << "\n"
<< "// rounding mode: " << (int)mode << "\n";
// Main loops
gws = generate_loops(writer, input0_shape, true);
// apply scale
writer << real_type_str << " qvalue = input0" << access_dims(input0_shape) << " / input1"
<< access_dims(input1_shape) << ";\n";
// round
switch (mode)
{
case ngraph::op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY:
{
writer << real_type_str << " abs_qvalue = fabs(qvalue);\n"
<< real_type_str << " abs_qvalue_toward_inf = floor(abs_qvalue + 0.5);\n"
<< "qvalue = (qvalue < 0.0) ? -abs_qvalue_toward_inf : abs_qvalue_toward_inf;\n";
}
break;
case ngraph::op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_ZERO:
{
writer
<< real_type_str << " abs_qvalue = fabs(qvalue);\n"
<< real_type_str << " abs_qvalue_toward_zero = ceil(abs_qvalue - 0.5);\n"
<< "qvalue = (qvalue < 0.0) ? -abs_qvalue_toward_zero : abs_qvalue_toward_zero;\n";
}
break;
case ngraph::op::Quantize::RoundMode::ROUND_NEAREST_UPWARD:
{
writer << "qvalue = floor(qvalue + 0.5);\n";
}
break;
case ngraph::op::Quantize::RoundMode::ROUND_NEAREST_DOWNWARD:
{
writer << "qvalue = ceil(qvalue - 0.5);\n";
}
break;
case ngraph::op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN:
{
writer << real_type_str << " up_qvalue = floor(qvalue + 0.5);\n"
<< real_type_str << " dn_qvalue = ceil(qvalue - 0.5);\n"
<< real_type_str << " rem = fmod(up_qvalue, convert_" << real_type_str
<< "(2.0));\n"
<< "qvalue = (rem == 0.0) ? up_qvalue : dn_qvalue;\n";
}
break;
case ngraph::op::Quantize::RoundMode::ROUND_TOWARD_INFINITY:
{
writer << real_type_str << " abs_qvalue = fabs(qvalue);\n"
<< real_type_str << " abs_qvalue_toward_inf = ceil(abs_qvalue);\n"
<< "qvalue = (qvalue < 0.0) ? -abs_qvalue_toward_inf : abs_qvalue_toward_inf;\n";
}
break;
case ngraph::op::Quantize::RoundMode::ROUND_TOWARD_ZERO:
{
writer
<< real_type_str << " abs_qvalue = fabs(qvalue);\n"
<< real_type_str << " abs_qvalue_toward_zero = floor(abs_qvalue);\n"
<< "qvalue = (qvalue < 0.0) ? -abs_qvalue_toward_zero : abs_qvalue_toward_zero;\n";
}
break;
case ngraph::op::Quantize::RoundMode::ROUND_UP: { writer << "qvalue = ceil(qvalue);\n";
}
break;
case ngraph::op::Quantize::RoundMode::ROUND_DOWN: { writer << "qvalue = floor(qvalue);\n";
}
break;
default:
{
throw ngraph_error("Unsupported rounding mode '" + to_string((int)mode) +
"' in runtime::intelgpu::do_quantize_operation()");
}
}
// apply offset
writer << "qvalue += input2" << access_dims(input2_shape) << ";\n";
// cast to output
writer << "output" << access_dims(output_shape) << " = convert_" << quant_type_str
<< "(qvalue);\n";
// Closing brackets for main loops
generate_loops(writer, input0_shape, false);
}
writer.block_end();
const cldnn::custom_gpu_primitive op_quantize(output_name,
{input0_name, input1_name, input2_name},
{writer.get_code()},
entry_point_name,
get_kernel_args(3, 1),
"",
layout,
gws);
topology.add(op_quantize);
}
void runtime::intelgpu::do_dequantize_operation(cldnn::topology& topology,
const std::string& input0_name,
const Shape& input0_shape,
const element::Type& input0_type,
const std::string& input1_name,
const Shape& input1_shape,
const element::Type& input1_type,
const std::string& input2_name,
const Shape& input2_shape,
const element::Type& input2_type,
const string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const AxisSet& axis)
{
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
const string entry_point_name = "dequantize_" + output_name;
codegen::CodeWriter writer;
vector<size_t> gws;
gen_func_def(writer,
entry_point_name,
{get_opencl_type_name(input0_type),
get_opencl_type_name(input1_type),
get_opencl_type_name(input2_type)},
{input0_shape, input1_shape, input2_shape},
get_opencl_type_name(output_type),
output_shape);
writer.block_begin();
{
writer << "// " << axis << "\n";
// Main loops
gws = generate_loops(writer, output_shape, true);
writer << "output" << access_dims(output_shape) << " = ";
writer << "(input0" << access_dims(input0_shape) << " - input2" << access_dims(input2_shape)
<< ") * input1" << access_dims(input1_shape) << ";\n";
// Closing brackets for main loops
generate_loops(writer, output_shape, false);
}
writer.block_end();
const cldnn::custom_gpu_primitive op_dequantize(output_name,
{input0_name, input1_name, input2_name},
{writer.get_code()},
entry_point_name,
get_kernel_args(3, 1),
"",
layout,
gws);
topology.add(op_dequantize);
}
size_t runtime::intelgpu::get_max_memory_rss()
{
size_t result = 0;
......
......@@ -23,6 +23,7 @@
#include "ngraph/axis_set.hpp"
#include "ngraph/axis_vector.hpp"
#include "ngraph/coordinate.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/shape.hpp"
#include "ngraph/strides.hpp"
#include "ngraph/type/element_type.hpp"
......@@ -191,6 +192,35 @@ namespace ngraph
const element::Type& output_type,
const AxisVector& reshape_axes);
void do_quantize_operation(cldnn::topology& topology,
const std::string& input0_name,
const Shape& input0_shape,
const element::Type& input0_type,
const std::string& input1_name,
const Shape& input1_shape,
const std::string& input2_name,
const Shape& input2_shape,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const AxisSet& axis,
const ngraph::op::Quantize::RoundMode mode);
void do_dequantize_operation(cldnn::topology& topology,
const std::string& input0_name,
const Shape& input0_shape,
const element::Type& input0_type,
const std::string& input1_name,
const Shape& input1_shape,
const element::Type& input1_type,
const std::string& input2_name,
const Shape& input2_shape,
const element::Type& input2_type,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const AxisSet& axis);
// Helper functions used in cldnn::custom_gpu_primitive kernels
std::string get_opencl_type_name(const element::Type& ngraph_type);
std::string get_opencl_type_min_max_value(const element::Type& ngraph_type,
......
......@@ -13,40 +13,12 @@ backwards_reverse_sequence_n3_c2_h3
backwards_reverse_sequence_n4d2c3h2w2
backwards_slice
batch_norm_bprop_n4c3h2w2
dequantize
dequantize_axes
dequantize_dynamic_offset
dequantize_int32
dequantize_int32_zero_offset
dequantize_int8
dequantize_int8_zero_offset
dequantize_zero_offset
divide_by_zero_int32
embedding_lookup_10x1_arbitrary
embedding_lookup_10x1_arbitrary_index_type_int
embedding_lookup_4x5_reverse
generate_mask
max_pool_3d
quantize
quantize_axes
quantize_clamp_int32
quantize_clamp_int8
quantize_clamp_uint8
quantize_dynamic_offset
quantize_int32
quantize_int32_zero_offset
quantize_int8
quantize_int8_zero_offset
quantize_ROUND_DOWN
quantize_ROUND_NEAREST_DOWNWARD
quantize_ROUND_NEAREST_TOWARD_EVEN
quantize_ROUND_NEAREST_TOWARD_INFINITY
quantize_ROUND_NEAREST_TOWARD_ZERO
quantize_ROUND_NEAREST_UPWARD
quantize_ROUND_TOWARD_INFINITY
quantize_ROUND_TOWARD_ZERO
quantize_ROUND_UP
quantize_zero_offset
replace_slice_3d
replace_slice_3d_strided
replace_slice_3d_strided_different_strides
......
......@@ -31,6 +31,7 @@
#include "ngraph/op/concat.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/dot.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/lrn.hpp"
......@@ -40,6 +41,7 @@
#include "ngraph/op/one_hot.hpp"
#include "ngraph/op/pad.hpp"
#include "ngraph/op/product.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/reshape.hpp"
#include "ngraph/op/slice.hpp"
#include "ngraph/op/sum.hpp"
......@@ -277,6 +279,21 @@ void print_node_parameters(ostringstream& writer, const shared_ptr<Node>& node)
<< print_table_row_value("transpose", op_reshape->get_is_transpose());
break;
}
case OP_TYPEID::Quantize:
{
const shared_ptr<op::Quantize> quant_op = static_pointer_cast<op::Quantize>(node);
writer << print_table_row_dims("axes", quant_op->get_axes())
<< print_table_row_value("rounding mode", (int)quant_op->get_round_mode());
break;
}
case OP_TYPEID::Dequantize:
{
const shared_ptr<op::Dequantize> quant_op = static_pointer_cast<op::Dequantize>(node);
writer << print_table_row_dims("axes", quant_op->get_axes());
break;
}
case OP_TYPEID::Concat:
{
const shared_ptr<op::Concat> concat_op = static_pointer_cast<op::Concat>(node);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment