IntelGPU backend: Quantize operations (#2465)

* IntelGPU backend: Quantize operations * Update intelgpu_op_custom_kernels.cpp

IntelGPU backend: Quantize operations (#2465)
* IntelGPU backend: Quantize operations * Update intelgpu_op_custom_kernels.cpp
a0ab82d8 · Sergey Shalnov · Robert Kimball · 25d23a8d · a0ab82d8 · a0ab82d8
Commit a0ab82d8 authored Feb 21, 2019 by Sergey Shalnov Committed by Robert Kimball Feb 21, 2019
5 changed files
--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
@@ -72,6 +72,7 @@
 #include "ngraph/op/concat.hpp"
 #include "ngraph/op/constant.hpp"
 #include "ngraph/op/convolution.hpp"
+#include "ngraph/op/dequantize.hpp"
 #include "ngraph/op/dot.hpp"
 #include "ngraph/op/embedding_lookup.hpp"
 #include "ngraph/op/get_output_element.hpp"
@@ -82,6 +83,7 @@
 #include "ngraph/op/one_hot.hpp"
 #include "ngraph/op/pad.hpp"
 #include "ngraph/op/product.hpp"
+#include "ngraph/op/quantize.hpp"
 #include "ngraph/op/reshape.hpp"
 #include "ngraph/op/reverse.hpp"
 #include "ngraph/op/slice.hpp"
@@ -1775,6 +1777,52 @@ shared_ptr<runtime::Executable>
            }
            break;
        }
+        case OP_TYPEID::Quantize:
+        {
+            arguments_check(op, 3, 1);
+
+            const shared_ptr<op::Quantize> quant_op = static_pointer_cast<op::Quantize>(op);
+            const AxisSet& axes = quant_op->get_axes();
+            const op::Quantize::RoundMode mode = quant_op->get_round_mode();
+
+            do_quantize_operation(topology,
+                                  get_input_name(op, 0),
+                                  get_input_shape(op, 0),
+                                  get_input_type(op, 0),
+                                  get_input_name(op, 1),
+                                  get_input_shape(op, 1),
+                                  get_input_name(op, 2),
+                                  get_input_shape(op, 2),
+                                  get_output_name(op),
+                                  get_output_shape(op),
+                                  get_output_type(op),
+                                  axes,
+                                  mode);
+            break;
+        }
+        case OP_TYPEID::Dequantize:
+        {
+            arguments_check(op, 3, 1);
+
+            const shared_ptr<op::Dequantize> dequ_op = static_pointer_cast<op::Dequantize>(op);
+            const AxisSet& axes = dequ_op->get_axes();
+
+            do_dequantize_operation(topology,
+                                    get_input_name(op, 0),
+                                    get_input_shape(op, 0),
+                                    get_input_type(op, 0),
+                                    get_input_name(op, 1),
+                                    get_input_shape(op, 1),
+                                    get_input_type(op, 1),
+                                    get_input_name(op, 2),
+                                    get_input_shape(op, 2),
+                                    get_input_type(op, 2),
+                                    get_output_name(op),
+                                    get_output_shape(op),
+                                    get_output_type(op),
+                                    axes);
+            break;
+        }
        case OP_TYPEID::LRN:
        {
            arguments_check(op, 1, 1);
@@ -1793,8 +1841,6 @@ shared_ptr<runtime::Executable>
        }
        case OP_TYPEID::AllReduce:
        case OP_TYPEID::BroadcastLike:
-        case OP_TYPEID::Dequantize:
-        case OP_TYPEID::Quantize:
        case OP_TYPEID::QuantizedAvgPool:
        case OP_TYPEID::QuantizedConvolutionBias:
        case OP_TYPEID::QuantizedConvolutionBiasAdd:

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
@@ -1480,6 +1480,191 @@ void runtime::intelgpu::do_reshape_operation(cldnn::topology& topology,
    topology.add(op_reshape);
 }

+void runtime::intelgpu::do_quantize_operation(cldnn::topology& topology,
+                                              const string& input0_name,
+                                              const Shape& input0_shape,
+                                              const element::Type& input0_type,
+                                              const string& input1_name,
+                                              const Shape& input1_shape,
+                                              const string& input2_name,
+                                              const Shape& input2_shape,
+                                              const string& output_name,
+                                              const Shape& output_shape,
+                                              const element::Type& output_type,
+                                              const AxisSet& axis,
+                                              const ngraph::op::Quantize::RoundMode mode)
+{
+    const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
+    const string entry_point_name = "quantize_" + output_name;
+    const string real_type_str = get_opencl_type_name(input0_type);
+    const string quant_type_str = get_opencl_type_name(output_type);
+    codegen::CodeWriter writer;
+    vector<size_t> gws;
+
+    gen_func_def(writer,
+                 entry_point_name,
+                 {real_type_str, real_type_str, quant_type_str},
+                 {input0_shape, input1_shape, input2_shape},
+                 quant_type_str,
+                 output_shape);
+
+    writer.block_begin();
+    {
+        writer << "// " << axis << "\n"
+               << "// rounding mode: " << (int)mode << "\n";
+
+        // Main loops
+        gws = generate_loops(writer, input0_shape, true);
+
+        // apply scale
+        writer << real_type_str << " qvalue = input0" << access_dims(input0_shape) << " / input1"
+               << access_dims(input1_shape) << ";\n";
+
+        // round
+        switch (mode)
+        {
+        case ngraph::op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY:
+        {
+            writer << real_type_str << " abs_qvalue = fabs(qvalue);\n"
+                   << real_type_str << " abs_qvalue_toward_inf = floor(abs_qvalue + 0.5);\n"
+                   << "qvalue = (qvalue < 0.0) ? -abs_qvalue_toward_inf : abs_qvalue_toward_inf;\n";
+        }
+        break;
+        case ngraph::op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_ZERO:
+        {
+            writer
+                << real_type_str << " abs_qvalue = fabs(qvalue);\n"
+                << real_type_str << " abs_qvalue_toward_zero = ceil(abs_qvalue - 0.5);\n"
+                << "qvalue = (qvalue < 0.0) ? -abs_qvalue_toward_zero : abs_qvalue_toward_zero;\n";
+        }
+        break;
+        case ngraph::op::Quantize::RoundMode::ROUND_NEAREST_UPWARD:
+        {
+            writer << "qvalue = floor(qvalue + 0.5);\n";
+        }
+        break;
+        case ngraph::op::Quantize::RoundMode::ROUND_NEAREST_DOWNWARD:
+        {
+            writer << "qvalue = ceil(qvalue - 0.5);\n";
+        }
+        break;
+        case ngraph::op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN:
+        {
+            writer << real_type_str << " up_qvalue = floor(qvalue + 0.5);\n"
+                   << real_type_str << " dn_qvalue = ceil(qvalue - 0.5);\n"
+                   << real_type_str << " rem = fmod(up_qvalue, convert_" << real_type_str
+                   << "(2.0));\n"
+                   << "qvalue = (rem == 0.0) ? up_qvalue : dn_qvalue;\n";
+        }
+        break;
+        case ngraph::op::Quantize::RoundMode::ROUND_TOWARD_INFINITY:
+        {
+            writer << real_type_str << " abs_qvalue = fabs(qvalue);\n"
+                   << real_type_str << " abs_qvalue_toward_inf = ceil(abs_qvalue);\n"
+                   << "qvalue = (qvalue < 0.0) ? -abs_qvalue_toward_inf : abs_qvalue_toward_inf;\n";
+        }
+        break;
+        case ngraph::op::Quantize::RoundMode::ROUND_TOWARD_ZERO:
+        {
+            writer
+                << real_type_str << " abs_qvalue = fabs(qvalue);\n"
+                << real_type_str << " abs_qvalue_toward_zero = floor(abs_qvalue);\n"
+                << "qvalue = (qvalue < 0.0) ? -abs_qvalue_toward_zero : abs_qvalue_toward_zero;\n";
+        }
+        break;
+
+        case ngraph::op::Quantize::RoundMode::ROUND_UP: { writer << "qvalue = ceil(qvalue);\n";
+        }
+        break;
+        case ngraph::op::Quantize::RoundMode::ROUND_DOWN: { writer << "qvalue = floor(qvalue);\n";
+        }
+        break;
+        default:
+        {
+            throw ngraph_error("Unsupported rounding mode '" + to_string((int)mode) +
+                               "' in runtime::intelgpu::do_quantize_operation()");
+        }
+        }
+
+        // apply offset
+        writer << "qvalue += input2" << access_dims(input2_shape) << ";\n";
+
+        // cast to output
+        writer << "output" << access_dims(output_shape) << " = convert_" << quant_type_str
+               << "(qvalue);\n";
+
+        // Closing brackets for main loops
+        generate_loops(writer, input0_shape, false);
+    }
+    writer.block_end();
+
+    const cldnn::custom_gpu_primitive op_quantize(output_name,
+                                                  {input0_name, input1_name, input2_name},
+                                                  {writer.get_code()},
+                                                  entry_point_name,
+                                                  get_kernel_args(3, 1),
+                                                  "",
+                                                  layout,
+                                                  gws);
+    topology.add(op_quantize);
+}
+
+void runtime::intelgpu::do_dequantize_operation(cldnn::topology& topology,
+                                                const std::string& input0_name,
+                                                const Shape& input0_shape,
+                                                const element::Type& input0_type,
+                                                const std::string& input1_name,
+                                                const Shape& input1_shape,
+                                                const element::Type& input1_type,
+                                                const std::string& input2_name,
+                                                const Shape& input2_shape,
+                                                const element::Type& input2_type,
+                                                const string& output_name,
+                                                const Shape& output_shape,
+                                                const element::Type& output_type,
+                                                const AxisSet& axis)
+{
+    const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
+    const string entry_point_name = "dequantize_" + output_name;
+    codegen::CodeWriter writer;
+    vector<size_t> gws;
+
+    gen_func_def(writer,
+                 entry_point_name,
+                 {get_opencl_type_name(input0_type),
+                  get_opencl_type_name(input1_type),
+                  get_opencl_type_name(input2_type)},
+                 {input0_shape, input1_shape, input2_shape},
+                 get_opencl_type_name(output_type),
+                 output_shape);
+
+    writer.block_begin();
+    {
+        writer << "// " << axis << "\n";
+
+        // Main loops
+        gws = generate_loops(writer, output_shape, true);
+
+        writer << "output" << access_dims(output_shape) << " = ";
+        writer << "(input0" << access_dims(input0_shape) << " - input2" << access_dims(input2_shape)
+               << ") * input1" << access_dims(input1_shape) << ";\n";
+
+        // Closing brackets for main loops
+        generate_loops(writer, output_shape, false);
+    }
+    writer.block_end();
+
+    const cldnn::custom_gpu_primitive op_dequantize(output_name,
+                                                    {input0_name, input1_name, input2_name},
+                                                    {writer.get_code()},
+                                                    entry_point_name,
+                                                    get_kernel_args(3, 1),
+                                                    "",
+                                                    layout,
+                                                    gws);
+    topology.add(op_dequantize);
+}
+
 size_t runtime::intelgpu::get_max_memory_rss()
 {
    size_t result = 0;

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
@@ -23,6 +23,7 @@
 #include "ngraph/axis_set.hpp"
 #include "ngraph/axis_vector.hpp"
 #include "ngraph/coordinate.hpp"
+#include "ngraph/op/quantize.hpp"
 #include "ngraph/shape.hpp"
 #include "ngraph/strides.hpp"
 #include "ngraph/type/element_type.hpp"
@@ -191,6 +192,35 @@ namespace ngraph
                                      const element::Type& output_type,
                                      const AxisVector& reshape_axes);

+            void do_quantize_operation(cldnn::topology& topology,
+                                       const std::string& input0_name,
+                                       const Shape& input0_shape,
+                                       const element::Type& input0_type,
+                                       const std::string& input1_name,
+                                       const Shape& input1_shape,
+                                       const std::string& input2_name,
+                                       const Shape& input2_shape,
+                                       const std::string& output_name,
+                                       const Shape& output_shape,
+                                       const element::Type& output_type,
+                                       const AxisSet& axis,
+                                       const ngraph::op::Quantize::RoundMode mode);
+
+            void do_dequantize_operation(cldnn::topology& topology,
+                                         const std::string& input0_name,
+                                         const Shape& input0_shape,
+                                         const element::Type& input0_type,
+                                         const std::string& input1_name,
+                                         const Shape& input1_shape,
+                                         const element::Type& input1_type,
+                                         const std::string& input2_name,
+                                         const Shape& input2_shape,
+                                         const element::Type& input2_type,
+                                         const std::string& output_name,
+                                         const Shape& output_shape,
+                                         const element::Type& output_type,
+                                         const AxisSet& axis);
+
            // Helper functions used in cldnn::custom_gpu_primitive kernels
            std::string get_opencl_type_name(const element::Type& ngraph_type);
            std::string get_opencl_type_min_max_value(const element::Type& ngraph_type,

--- a/src/ngraph/runtime/intelgpu/unit_test.manifest
+++ b/src/ngraph/runtime/intelgpu/unit_test.manifest
@@ -13,40 +13,12 @@ backwards_reverse_sequence_n3_c2_h3
 backwards_reverse_sequence_n4d2c3h2w2
 backwards_slice
 batch_norm_bprop_n4c3h2w2
-dequantize
-dequantize_axes
-dequantize_dynamic_offset
-dequantize_int32
-dequantize_int32_zero_offset
-dequantize_int8
-dequantize_int8_zero_offset
-dequantize_zero_offset
 divide_by_zero_int32
 embedding_lookup_10x1_arbitrary
 embedding_lookup_10x1_arbitrary_index_type_int
 embedding_lookup_4x5_reverse
 generate_mask
 max_pool_3d
-quantize
-quantize_axes
-quantize_clamp_int32
-quantize_clamp_int8
-quantize_clamp_uint8
-quantize_dynamic_offset
-quantize_int32
-quantize_int32_zero_offset
-quantize_int8
-quantize_int8_zero_offset
-quantize_ROUND_DOWN
-quantize_ROUND_NEAREST_DOWNWARD
-quantize_ROUND_NEAREST_TOWARD_EVEN
-quantize_ROUND_NEAREST_TOWARD_INFINITY
-quantize_ROUND_NEAREST_TOWARD_ZERO
-quantize_ROUND_NEAREST_UPWARD
-quantize_ROUND_TOWARD_INFINITY
-quantize_ROUND_TOWARD_ZERO
-quantize_ROUND_UP
-quantize_zero_offset
 replace_slice_3d
 replace_slice_3d_strided
 replace_slice_3d_strided_different_strides

--- a/src/ngraph/runtime/intelgpu/visualize_tree.cpp
+++ b/src/ngraph/runtime/intelgpu/visualize_tree.cpp
@@ -31,6 +31,7 @@
 #include "ngraph/op/concat.hpp"
 #include "ngraph/op/constant.hpp"
 #include "ngraph/op/convolution.hpp"
+#include "ngraph/op/dequantize.hpp"
 #include "ngraph/op/dot.hpp"
 #include "ngraph/op/get_output_element.hpp"
 #include "ngraph/op/lrn.hpp"
@@ -40,6 +41,7 @@
 #include "ngraph/op/one_hot.hpp"
 #include "ngraph/op/pad.hpp"
 #include "ngraph/op/product.hpp"
+#include "ngraph/op/quantize.hpp"
 #include "ngraph/op/reshape.hpp"
 #include "ngraph/op/slice.hpp"
 #include "ngraph/op/sum.hpp"
@@ -277,6 +279,21 @@ void print_node_parameters(ostringstream& writer, const shared_ptr<Node>& node)
               << print_table_row_value("transpose", op_reshape->get_is_transpose());
        break;
    }
+    case OP_TYPEID::Quantize:
+    {
+        const shared_ptr<op::Quantize> quant_op = static_pointer_cast<op::Quantize>(node);
+
+        writer << print_table_row_dims("axes", quant_op->get_axes())
+               << print_table_row_value("rounding mode", (int)quant_op->get_round_mode());
+        break;
+    }
+    case OP_TYPEID::Dequantize:
+    {
+        const shared_ptr<op::Dequantize> quant_op = static_pointer_cast<op::Dequantize>(node);
+
+        writer << print_table_row_dims("axes", quant_op->get_axes());
+        break;
+    }
    case OP_TYPEID::Concat:
    {
        const shared_ptr<op::Concat> concat_op = static_pointer_cast<op::Concat>(node);