adding i32 as quantized type (#2050)

* adding i32 as quantized type * code format * mask gpu unit tests * unused variable * intel gpu unit test manifest * fix typo in unit test manifest

adding i32 as quantized type (#2050)
* adding i32 as quantized type * code format * mask gpu unit tests * unused variable * intel gpu unit test manifest * fix typo in unit test manifest
a9686f94 · Adam Straw · Robert Kimball · 2ebacf5e · a9686f94 · a9686f94
Commit a9686f94 authored Nov 15, 2018 by Adam Straw Committed by Robert Kimball Nov 15, 2018
7 changed files
--- a/src/ngraph/runtime/cpu/builder/quantization.cpp
+++ b/src/ngraph/runtime/cpu/builder/quantization.cpp
@@ -142,6 +142,41 @@ namespace ngraph
                            throw ngraph_error("Unsupported dequantization element type");
                        }
                    }
+                    else if (args[0].get_element_type() == element::i32)
+                    {
+                        if (out[0].get_element_type() == element::f32)
+                        {
+                            functor = [&, arg0_shape, arg1_shape, daxes](
+                                CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
+                                ngraph::runtime::reference::dequantize<int32_t>(
+                                    static_cast<int32_t*>(arg0_tensor),
+                                    static_cast<float*>(arg1_tensor),
+                                    static_cast<int32_t*>(arg2_tensor),
+                                    static_cast<float*>(out_tensor),
+                                    arg0_shape,
+                                    arg1_shape,
+                                    daxes);
+                            };
+                        }
+                        else if (out[0].get_element_type() == element::f64)
+                        {
+                            functor = [&, arg0_shape, arg1_shape, daxes](
+                                CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
+                                ngraph::runtime::reference::dequantize<int32_t>(
+                                    static_cast<int32_t*>(arg0_tensor),
+                                    static_cast<double*>(arg1_tensor),
+                                    static_cast<int32_t*>(arg2_tensor),
+                                    static_cast<double*>(out_tensor),
+                                    arg0_shape,
+                                    arg1_shape,
+                                    daxes);
+                            };
+                        }
+                        else
+                        {
+                            throw ngraph_error("Unsupported dequantization element type");
+                        }
+                    }
                    else
                    {
                        throw ngraph_error("Unsupported input element type");
@@ -235,6 +270,21 @@ namespace ngraph
                                    round_mode);
                            };
                        }
+                        else if (out[0].get_element_type() == element::i32)
+                        {
+                            functor = [&, arg0_shape, arg1_shape, daxes, round_mode](
+                                CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
+                                ngraph::runtime::reference::quantize<float>(
+                                    static_cast<float*>(arg0_tensor),
+                                    static_cast<float*>(arg1_tensor),
+                                    static_cast<int32_t*>(arg2_tensor),
+                                    static_cast<int32_t*>(out_tensor),
+                                    arg0_shape,
+                                    arg1_shape,
+                                    daxes,
+                                    round_mode);
+                            };
+                        }
                        else
                        {
                            throw ngraph_error("Unsupported quantization element type");
@@ -272,6 +322,21 @@ namespace ngraph
                                    round_mode);
                            };
                        }
+                        else if (out[0].get_element_type() == element::i32)
+                        {
+                            functor = [&, arg0_shape, arg1_shape, daxes, round_mode](
+                                CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
+                                ngraph::runtime::reference::quantize<double>(
+                                    static_cast<double*>(arg0_tensor),
+                                    static_cast<double*>(arg1_tensor),
+                                    static_cast<int32_t*>(arg2_tensor),
+                                    static_cast<int32_t*>(out_tensor),
+                                    arg0_shape,
+                                    arg1_shape,
+                                    daxes,
+                                    round_mode);
+                            };
+                        }
                        else
                        {
                            throw ngraph_error("Unsupported quantization element type");

--- a/src/ngraph/runtime/cpu/pass/cpu_assignment.cpp
+++ b/src/ngraph/runtime/cpu/pass/cpu_assignment.cpp
@@ -785,6 +785,12 @@ namespace ngraph
                        if (offset[0] != 0)
                            return;
                    }
+                    if (node->get_input_element_type(0) == element::i32)
+                    {
+                        auto offset = offset_const_op->get_vector<int32_t>();
+                        if (offset[0] != 0)
+                            return;
+                    }
                    auto op_annotations =
                        std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
                    op_annotations->set_mkldnn_op(true);
@@ -818,6 +824,14 @@ namespace ngraph
                            return;
                        }
                    }
+                    if (node->get_output_element_type(0) == element::i32)
+                    {
+                        auto offset = offset_const_op->get_vector<int32_t>();
+                        if (offset[0] != 0)
+                        {
+                            return;
+                        }
+                    }
                    auto op_annotations =
                        std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
                    op_annotations->set_mkldnn_op(true);

--- a/src/ngraph/runtime/cpu/unit_test.manifest
+++ b/src/ngraph/runtime/cpu/unit_test.manifest
@@ -16,6 +16,7 @@ shape_of_scalar
 shape_of_vector
 shape_of_matrix
 shape_of_5d
+quantize_clamp_int32
 # this one just started failing
 batchnorm_bprop_n4c3h2w2
--- a/src/ngraph/runtime/gpu/unit_test.manifest
+++ b/src/ngraph/runtime/gpu/unit_test.manifest
@@ -30,15 +30,25 @@ backwards_maxpool_n2_c1_hw5_3x3_str2_max
 backwards_avgpool_n1_c1_hw2x2
 backwards_avgpool_n1_c1_hw4x4
 backwards_avgpool_n2_c2_hw4x4
-quantize
-quantize_axes
-quantize_int8
-quantize_clamp
 dequantize
+dequantize_zero_offset
 dequantize_axes
 dequantize_int8
-dequantize_zero_offset
+dequantize_int8_zero_offset
+dequantize_int32
+dequantize_int32_zero_offset
+quantize
+quantize_zero_offset
+quantize_axes
+quantize_int8
+quantize_int8_zero_offset
+quantize_int32
+quantize_int32_zero_offset
+quantize_clamp_uint8
+quantize_clamp_int8
+quantize_clamp_int32
 quantize_ROUND_NEAREST_TOWARD_ZERO
+quantize_ROUND_NEAREST_TOWARD_INFINITY
 quantize_ROUND_NEAREST_UPWARD
 quantize_ROUND_NEAREST_DOWNWARD
 quantize_ROUND_NEAREST_TOWARD_EVEN

--- a/src/ngraph/runtime/intelgpu/unit_test.manifest
+++ b/src/ngraph/runtime/intelgpu/unit_test.manifest
@@ -18,9 +18,12 @@ backwards_tanh
 batch_norm_one_output
 batch_norm_three_outputs
 dequantize
+dequantize_zero_offset
 dequantize_axes
 dequantize_int8
-dequantize_zero_offset
+dequantize_int8_zero_offset
+dequantize_int32
+dequantize_int32_zero_offset
 divide_by_zero_int32
 dot_3d_multi_axis
 dot_4d_5d_multi_axis
@@ -31,17 +34,24 @@ max_pool_3d
 numeric_double_inf
 numeric_double_nan
 quantize
-quantize_ROUND_DOWN
+quantize_zero_offset
-quantize_ROUND_NEAREST_DOWNWARD
+quantize_axes
-quantize_ROUND_NEAREST_TOWARD_EVEN
+quantize_int8
+quantize_int8_zero_offset
+quantize_int32
+quantize_int32_zero_offset
+quantize_clamp_uint8
+quantize_clamp_int8
+quantize_clamp_int32
 quantize_ROUND_NEAREST_TOWARD_ZERO
+quantize_ROUND_NEAREST_TOWARD_INFINITY
 quantize_ROUND_NEAREST_UPWARD
+quantize_ROUND_NEAREST_DOWNWARD
+quantize_ROUND_NEAREST_TOWARD_EVEN
 quantize_ROUND_TOWARD_INFINITY
 quantize_ROUND_TOWARD_ZERO
 quantize_ROUND_UP
-quantize_axes
+quantize_ROUND_DOWN
-quantize_clamp
-quantize_int8
 reduce_window_emulating_max_pool_1d_1channel_1image
 reduce_window_emulating_max_pool_1d_1channel_2image
 reduce_window_emulating_max_pool_1d_2channel_2image

--- a/src/ngraph/runtime/interpreter/int_backend.hpp
+++ b/src/ngraph/runtime/interpreter/int_backend.hpp
@@ -971,6 +971,17 @@ private:
                                       quantize->get_axes(),
                                       quantize->get_round_mode());
            }
+            else if (type == element::i32)
+            {
+                reference::quantize<T>(static_cast<const T*>(args[0]),
+                                       static_cast<const T*>(args[1]),
+                                       static_cast<const int32_t*>(args[2]),
+                                       static_cast<int32_t*>(out[0]),
+                                       node.get_input_shape(0),
+                                       node.get_input_shape(1),
+                                       quantize->get_axes(),
+                                       quantize->get_round_mode());
+            }
            else
            {
                std::stringstream ss;

--- a/test/backend_test.in.cpp
+++ b/test/backend_test.in.cpp
@@ -4905,7 +4905,7 @@ NGRAPH_TEST(${BACKEND_NAME}, quantize)
    typedef float input_c_type;
    typedef uint8_t output_c_type;
-    op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
+    op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
    auto X = make_shared<op::Parameter>(input_type, input_shape);
    auto scale = op::Constant::create(input_type, scale_offset_shape, {2});
@@ -4920,12 +4920,12 @@ NGRAPH_TEST(${BACKEND_NAME}, quantize)
    copy_data(x, vector<input_c_type>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
    // divide by scale                2  2  2  2  2  2  2  2  2  2  2   2
-    // equals (rounded)               0  1  1  2  2  3  3  4  4  5  5   6
+    // equals (rounded)               0  0  1  2  2  2  3  4  4  4  5   6
    // plus offset                    1  1  1  1  1  1  1  1  1  1  1   1
-    // equals                         1  2  2  3  3  4  4  5  5  6  6   7
+    // equals                         1  1  2  3  3  3  4  5  5  5  6   7
    backend->call_with_validate(f, {y}, {x});
-    EXPECT_EQ((vector<output_c_type>{1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7}),
+    EXPECT_EQ((vector<output_c_type>{1, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6, 7}),
              read_vector<output_c_type>(y));
 }
@@ -4951,14 +4951,50 @@ NGRAPH_TEST(${BACKEND_NAME}, dequantize)
    auto x = backend->create_tensor(input_type, input_shape);
    auto y = backend->create_tensor(output_type, input_shape);
-    copy_data(x, vector<input_c_type>{1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7});
+    copy_data(x, vector<input_c_type>{1, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6, 7});
    // minus offset                   1  1  1  1  1  1  1  1  1  1  1  1
-    // eqauls                         0  1  1  2  2  3  3  4  4  5  5  6
+    // eqauls                         0  0  1  2  2  2  3  4  4  4  5  6
    // multiplied by scale            2  2  2  2  2  2  2  2  2  2  2  2
-    // equals                         0  2  2  4  4  6  6  8  8 10 10 12
+    // equals                         0  0  2  4  4  4  6  8  8  8 10 12
    backend->call_with_validate(f, {y}, {x});
-    EXPECT_EQ((vector<output_c_type>{0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12}),
+    EXPECT_EQ((vector<output_c_type>{0, 0, 2, 4, 4, 4, 6, 8, 8, 8, 10, 12}),
+              read_vector<output_c_type>(y));
+}
+NGRAPH_TEST(${BACKEND_NAME}, quantize_zero_offset)
+{
+    Shape input_shape{4, 3};
+    Shape scale_offset_shape;
+    AxisSet quantization_axes;
+    auto input_type = element::f32;
+    auto output_type = element::u8;
+    typedef float input_c_type;
+    typedef uint8_t output_c_type;
+    op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
+    auto X = make_shared<op::Parameter>(input_type, input_shape);
+    auto scale = op::Constant::create(input_type, scale_offset_shape, {2});
+    auto offset = op::Constant::create(output_type, scale_offset_shape, {0});
+    auto quantize =
+        make_shared<op::Quantize>(X, scale, offset, output_type, quantization_axes, round_mode);
+    auto f = make_shared<Function>(quantize, op::ParameterVector{X});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    auto x = backend->create_tensor(input_type, input_shape);
+    auto y = backend->create_tensor(output_type, input_shape);
+    copy_data(x, vector<input_c_type>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+    // divide by scale                2  2  2  2  2  2  2  2  2  2  2   2
+    // equals (rounded)               0  0  1  2  2  2  3  4  4  4  5   6
+    // plus offset                    0  0  0  0  0  0  0  0  0  0  0   0
+    // equals                         0  0  1  2  2  2  3  4  4  4  5   6
+    backend->call_with_validate(f, {y}, {x});
+    EXPECT_EQ((vector<output_c_type>{0, 0, 1, 2, 2, 2, 3, 4, 4, 4, 5, 6}),
              read_vector<output_c_type>(y));
 }
@@ -4984,13 +5020,14 @@ NGRAPH_TEST(${BACKEND_NAME}, dequantize_zero_offset)
    auto x = backend->create_tensor(input_type, input_shape);
    auto y = backend->create_tensor(output_type, input_shape);
-    copy_data(x, vector<input_c_type>{1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7});
+    copy_data(x, vector<input_c_type>{0, 0, 1, 2, 2, 2, 3, 4, 4, 4, 5, 6});
    // minus offset                   0  0  0  0  0  0  0  0  0  0  0  0
+    // equals                         0  0  1  2  2  2  3  4  4  4  5  6
    // multiplied by scale            2  2  2  2  2  2  2  2  2  2  2  2
-    // equals                         2  4  4  6  6  8  8  10 10 12 12 14
+    // equals                         0  0  2  4  4  4  6  8  8  8 10 12
    backend->call_with_validate(f, {y}, {x});
-    EXPECT_EQ((vector<output_c_type>{2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14}),
+    EXPECT_EQ((vector<output_c_type>{0, 0, 2, 4, 4, 4, 6, 8, 8, 8, 10, 12}),
              read_vector<output_c_type>(y));
 }
@@ -5075,7 +5112,7 @@ NGRAPH_TEST(${BACKEND_NAME}, quantize_int8)
    typedef float input_c_type;
    typedef int8_t output_c_type;
-    op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
+    op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
    auto X = make_shared<op::Parameter>(input_type, input_shape);
    auto scale = op::Constant::create(input_type, scale_offset_shape, {2});
@@ -5090,12 +5127,12 @@ NGRAPH_TEST(${BACKEND_NAME}, quantize_int8)
    copy_data(x, vector<input_c_type>{0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11});
    // divide by scale                2   2  2   2  2   2  2   2  2   2  2    2
-    // equals (rounded)               0  -1  1  -2  2  -3  3  -4  4  -5  5   -6
+    // equals (rounded)               0   0  1  -2  2  -2  3  -4  4  -4  5   -6
    // plus offset                    1   1  1   1  1   1  1   1  1   1  1    1
-    // equals                         1   0  2  -1  3  -2  4  -3  5  -4  6   -5
+    // equals                         1   1  2  -1  3  -1  4  -3  5  -3  6   -5
    backend->call_with_validate(f, {y}, {x});
-    EXPECT_EQ((vector<output_c_type>{1, 0, 2, -1, 3, -2, 4, -3, 5, -4, 6, -5}),
+    EXPECT_EQ((vector<output_c_type>{1, 1, 2, -1, 3, -1, 4, -3, 5, -3, 6, -5}),
              read_vector<output_c_type>(y));
 }
@@ -5121,18 +5158,18 @@ NGRAPH_TEST(${BACKEND_NAME}, dequantize_int8)
    auto x = backend->create_tensor(input_type, input_shape);
    auto y = backend->create_tensor(output_type, input_shape);
-    copy_data(x, vector<input_c_type>{1, 0, 2, -1, 3, -2, 4, -3, 5, -4, 6, -5});
+    copy_data(x, vector<input_c_type>{1, 1, 2, -1, 3, -1, 4, -3, 5, -3, 6, -5});
    // minus offset                   1  1  1   1  1   1  1   1  1   1  1   1
-    // equals                         0 -1  1  -2  2  -3  3  -4  4  -5  5  -6
+    // equals                         0  0  1  -2  2  -2  3  -4  4  -4  5  -6
    // multiplied by scale            2  2  2   2  2   2  2   2  2   2  2   2
-    // equals                         0 -2  2  -4  4  -6  6  -8  8 -10 10 -12
+    // equals                         0  0  2  -4  4  -4  6  -8  8  -8 10 -12
    backend->call_with_validate(f, {y}, {x});
-    EXPECT_EQ((vector<output_c_type>{0, -2, 2, -4, 4, -6, 6, -8, 8, -10, 10, -12}),
+    EXPECT_EQ((vector<output_c_type>{0, 0, 2, -4, 4, -4, 6, -8, 8, -8, 10, -12}),
              read_vector<output_c_type>(y));
 }
-NGRAPH_TEST(${BACKEND_NAME}, quantize_clamp)
+NGRAPH_TEST(${BACKEND_NAME}, quantize_int8_zero_offset)
 {
    Shape input_shape{4, 3};
    Shape scale_offset_shape;
@@ -5144,10 +5181,79 @@ NGRAPH_TEST(${BACKEND_NAME}, quantize_clamp)
    typedef float input_c_type;
    typedef int8_t output_c_type;
-    op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
+    op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
+    auto X = make_shared<op::Parameter>(input_type, input_shape);
+    auto scale = op::Constant::create(input_type, scale_offset_shape, {2});
+    auto offset = op::Constant::create(output_type, scale_offset_shape, {0});
+    auto quantize =
+        make_shared<op::Quantize>(X, scale, offset, output_type, quantization_axes, round_mode);
+    auto f = make_shared<Function>(quantize, op::ParameterVector{X});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    auto x = backend->create_tensor(input_type, input_shape);
+    auto y = backend->create_tensor(output_type, input_shape);
+    copy_data(x, vector<input_c_type>{0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11});
+    // divide by scale                2   2  2   2  2   2  2   2  2   2  2    2
+    // equals (rounded)               0   0  1  -2  2  -2  3  -4  4  -4  5   -6
+    // plus offset                    0   0  0   0  0   0  0   0  0   0  0    0
+    // equals                         0   0  1  -2  2  -2  3  -4  4  -4  5   -6
+    backend->call_with_validate(f, {y}, {x});
+    EXPECT_EQ((vector<output_c_type>{0, 0, 1, -2, 2, -2, 3, -4, 4, -4, 5, -6}),
+              read_vector<output_c_type>(y));
+}
+NGRAPH_TEST(${BACKEND_NAME}, dequantize_int8_zero_offset)
+{
+    Shape input_shape{4, 3};
+    Shape scale_offset_shape;
+    AxisSet quantization_axes;
+    auto input_type = element::i8;
+    auto output_type = element::f32;
+    typedef int8_t input_c_type;
+    typedef float output_c_type;
+    auto X = make_shared<op::Parameter>(input_type, input_shape);
+    auto scale = op::Constant::create(output_type, scale_offset_shape, {2});
+    auto offset = op::Constant::create(input_type, scale_offset_shape, {0});
+    auto dequantize = make_shared<op::Dequantize>(X, scale, offset, output_type, quantization_axes);
+    auto f = make_shared<Function>(dequantize, op::ParameterVector{X});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    auto x = backend->create_tensor(input_type, input_shape);
+    auto y = backend->create_tensor(output_type, input_shape);
+    copy_data(x, vector<input_c_type>{0, 0, 1, -2, 2, -2, 3, -4, 4, -4, 5, -6});
+    // minus offset                   0  0  0   0  0   0  0   0  0   0  0   0
+    // equals                         0  0  1  -2  2  -2  3  -4  4  -4  5  -6
+    // multiplied by scale            2  2  2   2  2   2  2   2  2   2  2   2
+    // equals                         0  0  2  -4  4  -4  6  -8  8  -8 10 -12
+    backend->call_with_validate(f, {y}, {x});
+    EXPECT_EQ((vector<output_c_type>{0, 0, 2, -4, 4, -4, 6, -8, 8, -8, 10, -12}),
+              read_vector<output_c_type>(y));
+}
+NGRAPH_TEST(${BACKEND_NAME}, quantize_int32)
+{
+    Shape input_shape{4, 3};
+    Shape scale_offset_shape;
+    AxisSet quantization_axes;
+    auto input_type = element::f32;
+    auto output_type = element::i32;
+    typedef float input_c_type;
+    typedef int32_t output_c_type;
+    op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
    auto X = make_shared<op::Parameter>(input_type, input_shape);
-    auto scale = op::Constant::create(input_type, scale_offset_shape, {0.00001});
+    auto scale = op::Constant::create(input_type, scale_offset_shape, {2});
    auto offset = op::Constant::create(output_type, scale_offset_shape, {1});
    auto quantize =
        make_shared<op::Quantize>(X, scale, offset, output_type, quantization_axes, round_mode);
@@ -5158,11 +5264,221 @@ NGRAPH_TEST(${BACKEND_NAME}, quantize_clamp)
    auto y = backend->create_tensor(output_type, input_shape);
    copy_data(x, vector<input_c_type>{0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11});
+    // divide by scale                2   2  2   2  2   2  2   2  2   2  2    2
+    // equals (rounded)               0   0  1  -2  2  -2  3  -4  4  -4  5   -6
+    // plus offset                    1   1  1   1  1   1  1   1  1   1  1    1
+    // equals                         1   1  2  -1  3  -1  4  -3  5  -3  6   -5
    backend->call_with_validate(f, {y}, {x});
-    EXPECT_EQ(
+    EXPECT_EQ((vector<output_c_type>{1, 1, 2, -1, 3, -1, 4, -3, 5, -3, 6, -5}),
-        (vector<output_c_type>{1, -128, 127, -128, 127, -128, 127, -128, 127, -128, 127, -128}),
+              read_vector<output_c_type>(y));
-        read_vector<output_c_type>(y));
+}
+NGRAPH_TEST(${BACKEND_NAME}, dequantize_int32)
+{
+    Shape input_shape{4, 3};
+    Shape scale_offset_shape;
+    AxisSet quantization_axes;
+    auto input_type = element::i32;
+    auto output_type = element::f32;
+    typedef int32_t input_c_type;
+    typedef float output_c_type;
+    auto X = make_shared<op::Parameter>(input_type, input_shape);
+    auto scale = op::Constant::create(output_type, scale_offset_shape, {2});
+    auto offset = op::Constant::create(input_type, scale_offset_shape, {1});
+    auto dequantize = make_shared<op::Dequantize>(X, scale, offset, output_type, quantization_axes);
+    auto f = make_shared<Function>(dequantize, op::ParameterVector{X});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    auto x = backend->create_tensor(input_type, input_shape);
+    auto y = backend->create_tensor(output_type, input_shape);
+    copy_data(x, vector<input_c_type>{1, 1, 2, -1, 3, -1, 4, -3, 5, -3, 6, -5});
+    // minus offset                   1  1  1   1  1   1  1   1  1   1  1   1
+    // equals                         0  0  1  -2  2  -2  3  -4  4  -4  5  -6
+    // multiplied by scale            2  2  2   2  2   2  2   2  2   2  2   2
+    // equals                         0  0  2  -4  4  -4  6  -8  8  -8 10 -12
+    backend->call_with_validate(f, {y}, {x});
+    EXPECT_EQ((vector<output_c_type>{0, 0, 2, -4, 4, -4, 6, -8, 8, -8, 10, -12}),
+              read_vector<output_c_type>(y));
+}
+NGRAPH_TEST(${BACKEND_NAME}, quantize_int32_zero_offset)
+{
+    Shape input_shape{4, 3};
+    Shape scale_offset_shape;
+    AxisSet quantization_axes;
+    auto input_type = element::f32;
+    auto output_type = element::i32;
+    typedef float input_c_type;
+    typedef int32_t output_c_type;
+    op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
+    auto X = make_shared<op::Parameter>(input_type, input_shape);
+    auto scale = op::Constant::create(input_type, scale_offset_shape, {2});
+    auto offset = op::Constant::create(output_type, scale_offset_shape, {0});
+    auto quantize =
+        make_shared<op::Quantize>(X, scale, offset, output_type, quantization_axes, round_mode);
+    auto f = make_shared<Function>(quantize, op::ParameterVector{X});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    auto x = backend->create_tensor(input_type, input_shape);
+    auto y = backend->create_tensor(output_type, input_shape);
+    copy_data(x, vector<input_c_type>{0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11});
+    // divide by scale                2   2  2   2  2   2  2   2  2   2  2    2
+    // equals (rounded)               0   0  1  -2  2  -2  3  -4  4  -4  5   -6
+    // plus offset                    0   0  0   0  0   0  0   0  0   0  0    0
+    // equals                         0   0  1  -2  2  -2  3  -4  4  -4  5   -6
+    backend->call_with_validate(f, {y}, {x});
+    EXPECT_EQ((vector<output_c_type>{0, 0, 1, -2, 2, -2, 3, -4, 4, -4, 5, -6}),
+              read_vector<output_c_type>(y));
+}
+NGRAPH_TEST(${BACKEND_NAME}, dequantize_int32_zero_offset)
+{
+    Shape input_shape{4, 3};
+    Shape scale_offset_shape;
+    AxisSet quantization_axes;
+    auto input_type = element::i32;
+    auto output_type = element::f32;
+    typedef int32_t input_c_type;
+    typedef float output_c_type;
+    auto X = make_shared<op::Parameter>(input_type, input_shape);
+    auto scale = op::Constant::create(output_type, scale_offset_shape, {2});
+    auto offset = op::Constant::create(input_type, scale_offset_shape, {0});
+    auto dequantize = make_shared<op::Dequantize>(X, scale, offset, output_type, quantization_axes);
+    auto f = make_shared<Function>(dequantize, op::ParameterVector{X});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    auto x = backend->create_tensor(input_type, input_shape);
+    auto y = backend->create_tensor(output_type, input_shape);
+    copy_data(x, vector<input_c_type>{0, 0, 1, -2, 2, -2, 3, -4, 4, -4, 5, -6});
+    // minus offset                   0  0  0   0  0   0  0   0  0   0  0   0
+    // equals                         0  0  1  -2  2  -2  3  -4  4  -4  5  -6
+    // multiplied by scale            2  2  2   2  2   2  2   2  2   2  2   2
+    // equals                         0  0  2  -4  4  -4  6  -8  8  -8 10 -12
+    backend->call_with_validate(f, {y}, {x});
+    EXPECT_EQ((vector<output_c_type>{0, 0, 2, -4, 4, -4, 6, -8, 8, -8, 10, -12}),
+              read_vector<output_c_type>(y));
+}
+NGRAPH_TEST(${BACKEND_NAME}, quantize_clamp_uint8)
+{
+    Shape input_shape{4, 3};
+    Shape scale_offset_shape;
+    AxisSet quantization_axes;
+    auto input_type = element::f32;
+    auto output_type = element::u8;
+    typedef float input_c_type;
+    typedef uint8_t output_c_type;
+    op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
+    auto max = std::numeric_limits<uint8_t>::max();
+    auto X = make_shared<op::Parameter>(input_type, input_shape);
+    auto scale = op::Constant::create(input_type, scale_offset_shape, {1.0 / (max + 1.0)});
+    auto offset = op::Constant::create(output_type, scale_offset_shape, {0});
+    auto quantize =
+        make_shared<op::Quantize>(X, scale, offset, output_type, quantization_axes, round_mode);
+    auto f = make_shared<Function>(quantize, op::ParameterVector{X});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    auto x = backend->create_tensor(input_type, input_shape);
+    auto y = backend->create_tensor(output_type, input_shape);
+    copy_data(x, vector<input_c_type>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+    backend->call_with_validate(f, {y}, {x});
+    EXPECT_EQ((vector<output_c_type>{0, max, max, max, max, max, max, max, max, max, max, max}),
+              read_vector<output_c_type>(y));
+}
+NGRAPH_TEST(${BACKEND_NAME}, quantize_clamp_int8)
+{
+    Shape input_shape{4, 3};
+    Shape scale_offset_shape;
+    AxisSet quantization_axes;
+    auto input_type = element::f32;
+    auto output_type = element::i8;
+    typedef float input_c_type;
+    typedef int8_t output_c_type;
+    op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
+    auto min = std::numeric_limits<int8_t>::min();
+    auto max = std::numeric_limits<int8_t>::max();
+    auto X = make_shared<op::Parameter>(input_type, input_shape);
+    auto scale = op::Constant::create(input_type, scale_offset_shape, {1.0 / (max + 1.0)});
+    auto offset = op::Constant::create(output_type, scale_offset_shape, {0});
+    auto quantize =
+        make_shared<op::Quantize>(X, scale, offset, output_type, quantization_axes, round_mode);
+    auto f = make_shared<Function>(quantize, op::ParameterVector{X});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    auto x = backend->create_tensor(input_type, input_shape);
+    auto y = backend->create_tensor(output_type, input_shape);
+    copy_data(x, vector<input_c_type>{0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11});
+    backend->call_with_validate(f, {y}, {x});
+    EXPECT_EQ((vector<output_c_type>{0, min, max, min, max, min, max, min, max, min, max, min}),
+              read_vector<output_c_type>(y));
+}
+NGRAPH_TEST(${BACKEND_NAME}, quantize_clamp_int32)
+{
+    Shape input_shape{4, 3};
+    Shape scale_offset_shape;
+    AxisSet quantization_axes;
+    auto input_type = element::f64;
+    auto output_type = element::i32;
+    // TODO: fails with input due to 32 bits
+    typedef double input_c_type;
+    typedef int32_t output_c_type;
+    op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
+    auto min = std::numeric_limits<int32_t>::min();
+    auto max = std::numeric_limits<int32_t>::max();
+    auto X = make_shared<op::Parameter>(input_type, input_shape);
+    auto scale = op::Constant::create(input_type, scale_offset_shape, {1.0 / (max + 1.0)});
+    auto offset = op::Constant::create(output_type, scale_offset_shape, {0});
+    auto quantize =
+        make_shared<op::Quantize>(X, scale, offset, output_type, quantization_axes, round_mode);
+    auto f = make_shared<Function>(quantize, op::ParameterVector{X});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    auto x = backend->create_tensor(input_type, input_shape);
+    auto y = backend->create_tensor(output_type, input_shape);
+    copy_data(x, vector<input_c_type>{0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11});
+    backend->call_with_validate(f, {y}, {x});
+    EXPECT_EQ((vector<output_c_type>{0, min, max, min, max, min, max, min, max, min, max, min}),
+              read_vector<output_c_type>(y));
 }
 NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_NEAREST_TOWARD_ZERO)
@@ -5199,6 +5515,40 @@ NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_NEAREST_TOWARD_ZERO)
              read_vector<output_c_type>(y));
 }
+NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_NEAREST_TOWARD_INFINITY)
+{
+    Shape input_shape{4, 3};
+    Shape scale_offset_shape;
+    AxisSet quantization_axes;
+    auto input_type = element::f32;
+    auto output_type = element::i8;
+    typedef float input_c_type;
+    typedef int8_t output_c_type;
+    op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
+    auto X = make_shared<op::Parameter>(input_type, input_shape);
+    auto scale = op::Constant::create(input_type, scale_offset_shape, {4});
+    auto offset = op::Constant::create(output_type, scale_offset_shape, {0});
+    auto quantize =
+        make_shared<op::Quantize>(X, scale, offset, output_type, quantization_axes, round_mode);
+    auto f = make_shared<Function>(quantize, op::ParameterVector{X});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    auto x = backend->create_tensor(input_type, input_shape);
+    auto y = backend->create_tensor(output_type, input_shape);
+    copy_data(x, vector<input_c_type>{9, 10, 11, -9, -10, -11, 13, 14, 15, -13, -14, -15});
+    // divide by scale                4   4   4   4    4    4   4   4   4    4    4    4
+    // equals (rounded)               2   3   3  -2   -3   -3   3   4   4   -3   -4   -4
+    backend->call_with_validate(f, {y}, {x});
+    EXPECT_EQ((vector<output_c_type>{2, 3, 3, -2, -3, -3, 3, 4, 4, -3, -4, -4}),
+              read_vector<output_c_type>(y));
+}
 NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_NEAREST_UPWARD)
 {
    Shape input_shape{4, 3};