//***************************************************************************** // Copyright 2017-2018 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //***************************************************************************** #include <algorithm> #include <cinttypes> #include <cmath> #include <cstdlib> #include <string> #include "gtest/gtest.h" #include "ngraph/builder/quantization.hpp" #include "ngraph/ngraph.hpp" #include "ngraph/op/constant.hpp" #include "ngraph/pass/constant_folding.hpp" #include "ngraph/pass/manager.hpp" #include "util/all_close.hpp" #include "util/all_close_f.hpp" #include "util/ndarray.hpp" #include "util/random.hpp" #include "util/test_control.hpp" #include "util/test_tools.hpp" using namespace std; using namespace ngraph; TEST(builder, scaled_QMP_unsigned) { vector<uint8_t> a_data = {0, 1, 0, 2, 1, 0, 3, 2, 0, 0, 2, 0, 0, 0, 1}; Shape shape_a{1, 1, 3, 5}; Shape window_shape{2, 3}; auto window_movement_strides = Strides{1, 1}; Shape padding_below{0, 0}; Shape padding_above{0, 0}; Shape shape_r{1, 1, 2, 3}; auto A = make_shared<op::Parameter>(element::u8, shape_a); auto B = op::Constant::create(element::f32, Shape{1}, {0.0f}); auto C = op::Constant::create(element::f32, Shape{1}, {255.0f}); auto QMP = ngraph::builder::ScaledQuantizedMaxPool( A, window_shape, window_movement_strides, padding_below, padding_above, B, C); auto f = make_shared<Function>(NodeVector{QMP}, ParameterVector{A}); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::u8, shape_a); copy_data(a, a_data); auto result = backend->create_tensor(element::u8, shape_r); backend->call_with_validate(backend->compile(f), {result}, {a}); EXPECT_EQ((vector<uint8_t>{3, 3, 2, 3, 3, 2}), read_vector<uint8_t>(result)); } TEST(builder, scaled_QMP_signed) { vector<int8_t> a_data = {0, 1, 0, -2, 1, 0, -3, 2, 0, 0, 2, 0, 0, 0, 1}; Shape shape_a{1, 1, 3, 5}; Shape window_shape{2, 3}; auto window_movement_strides = Strides{1, 1}; Shape padding_below{0, 0}; Shape padding_above{0, 0}; Shape shape_r{1, 1, 2, 3}; auto A = make_shared<op::Parameter>(element::i8, shape_a); auto B = op::Constant::create(element::f32, Shape{1}, {0.0f}); auto C = op::Constant::create(element::f32, Shape{1}, {127.0f}); auto QMP = ngraph::builder::ScaledQuantizedMaxPool( A, window_shape, window_movement_strides, padding_below, padding_above, B, C); auto f = make_shared<Function>(NodeVector{QMP}, ParameterVector{A}); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::i8, shape_a); copy_data(a, a_data); auto result = backend->create_tensor(element::i8, shape_r); backend->call_with_validate(backend->compile(f), {result}, {a}); EXPECT_EQ((vector<int8_t>{2, 2, 2, 2, 2, 2}), read_vector<int8_t>(result)); } TEST(builder, scaled_QAP_unsigned) { vector<uint8_t> a_data = {0, 1, 0, 2, 1, 0, 3, 2, 0, 0, 2, 0, 0, 0, 1}; Shape shape_a{1, 1, 3, 5}; Shape window_shape{2, 3}; auto window_movement_strides = Strides{1, 1}; Shape padding_below{0, 0}; Shape padding_above{0, 0}; Shape shape_r{1, 1, 2, 3}; auto A = make_shared<op::Parameter>(element::u8, shape_a); auto B = op::Constant::create(element::f32, Shape{1}, {0.0f}); auto C = op::Constant::create(element::f32, Shape{1}, {255.0f}); auto QAP = ngraph::builder::ScaledQuantizedAvgPool( A, window_shape, window_movement_strides, padding_below, padding_above, false, B, C); auto f = make_shared<Function>(NodeVector{QAP}, ParameterVector{A}); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::u8, shape_a); copy_data(a, a_data); auto result = backend->create_tensor(element::u8, shape_r); backend->call_with_validate(backend->compile(f), {result}, {a}); EXPECT_EQ((vector<uint8_t>{1, 1, 1, 1, 1, 0}), read_vector<uint8_t>(result)); } TEST(builder, scaled_QAP_signed) { vector<int8_t> a_data = {10, 1, 0, -2, 1, 0, -3, 4, 0, 0, 2, 0, 0, 0, 1}; Shape shape_a{1, 1, 3, 5}; Shape window_shape{2, 3}; auto window_movement_strides = Strides{1, 1}; Shape padding_below{0, 0}; Shape padding_above{0, 0}; Shape shape_r{1, 1, 2, 3}; auto A = make_shared<op::Parameter>(element::i8, shape_a); auto B = op::Constant::create(element::f32, Shape{1}, {0.0f}); auto C = op::Constant::create(element::f32, Shape{1}, {127.0f}); auto QAP = ngraph::builder::ScaledQuantizedAvgPool( A, window_shape, window_movement_strides, padding_below, padding_above, false, B, C); auto f = make_shared<Function>(NodeVector{QAP}, ParameterVector{A}); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::i8, shape_a); copy_data(a, a_data); auto result = backend->create_tensor(element::i8, shape_r); backend->call_with_validate(backend->compile(f), {result}, {a}); EXPECT_EQ((vector<int8_t>{2, 0, 0, 0, 0, 1}), read_vector<int8_t>(result)); } static void constant_fold(std::shared_ptr<Function> f) { pass::Manager pass_manager; pass_manager.register_pass<pass::ConstantFolding>(); pass_manager.run_passes(f); } TEST(builder, scaled_QC) { Shape shape_a{1, 1, 3, 4}; // input shape Shape shape_b{1, 1, 3, 3}; // filter shape Shape shape_r{1, 1, 3, 4}; // output shape vector<uint8_t> a_data = {1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4}; vector<int8_t> b_data = {1, 2, 3, 4, 5, 0, 0, 1, 2}; auto A = make_shared<op::Parameter>(element::u8, shape_a); auto B = make_shared<op::Parameter>(element::i8, shape_b); auto C = op::Constant::create(element::f32, Shape{1}, {0.0f}); auto D = op::Constant::create(element::f32, Shape{1}, {255.0f}); auto E = op::Constant::create(element::f32, Shape{1}, {-127.0f}); auto F = op::Constant::create(element::f32, Shape{1}, {127.0f}); auto G = op::Constant::create(element::f32, Shape{1}, {22.0f}); auto H = op::Constant::create(element::f32, Shape{1}, {90.0f}); auto CV = ngraph::builder::ScaledQuantizedConvolution(A, B, Strides{1, 1}, // move_strides Strides{1, 1}, // filter_dilation CoordinateDiff{1, 1}, // below_pads CoordinateDiff{1, 1}, // above_pads Strides{1, 1}, // data_dilation C, D, E, F, G, H); auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B}); constant_fold(f); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::u8, shape_a); copy_data(a, a_data); auto b = backend->create_tensor(element::i8, shape_b); copy_data(b, b_data); auto result = backend->create_tensor(element::i8, shape_r); backend->call_with_validate(backend->compile(f), {result}, {a, b}); EXPECT_EQ((vector<int8_t>{31, 48, 42, 45, 54, 102, 127, 61, 47, 74, 61, 55}), read_vector<int8_t>(result)); } TEST(builder, scaled_QC_with_relu) { Shape shape_a{1, 1, 3, 3}; // input shape Shape shape_b{1, 1, 3, 3}; // filter shape Shape shape_r{1, 1, 3, 3}; // output shape vector<uint8_t> a_data = {1, 2, 3, 4, 5, 6, 7, 8, 9}; vector<int8_t> b_data = {1, 2, 1, 0, 0, 0, -1, -2, -1}; auto A = make_shared<op::Parameter>(element::u8, shape_a); auto B = make_shared<op::Parameter>(element::i8, shape_b); auto C = op::Constant::create(element::f32, Shape{1}, {0.0f}); auto D = op::Constant::create(element::f32, Shape{1}, {255.0f}); auto E = op::Constant::create(element::f32, Shape{1}, {-127.0f}); auto F = op::Constant::create(element::f32, Shape{1}, {127.0f}); auto G = op::Constant::create(element::f32, Shape{1}, {20.0f}); auto H = op::Constant::create(element::f32, Shape{1}, {-24.0f}); auto CV = ngraph::builder::ScaledQuantizedConvolutionRelu(A, B, Strides{1, 1}, // move_strides Strides{1, 1}, // filter_dilation CoordinateDiff{1, 1}, // below_pads CoordinateDiff{1, 1}, // above_pads Strides{1, 1}, // data_dilation C, D, E, F, G, H); auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B}); constant_fold(f); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::u8, shape_a); copy_data(a, a_data); auto b = backend->create_tensor(element::i8, shape_b); copy_data(b, b_data); auto result = backend->create_tensor(element::u8, shape_r); backend->call_with_validate(backend->compile(f), {result}, {a, b}); EXPECT_EQ((vector<uint8_t>{0, 0, 0, 0, 0, 0, 138, 212, 181}), read_vector<uint8_t>(result)); } TEST(builder, scaled_QC_with_bias) { Shape shape_a{1, 1, 3, 4}; // input shape Shape shape_b{1, 1, 3, 3}; // filter shape Shape shape_r{1, 1, 3, 4}; // output shape vector<uint8_t> a_data = {1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4}; vector<int8_t> b_data = {1, 2, 3, 4, 5, 0, 0, 1, 2}; vector<int32_t> c_data = {5}; auto A = make_shared<op::Parameter>(element::u8, shape_a); auto B = make_shared<op::Parameter>(element::i8, shape_b); auto Bias = make_shared<op::Parameter>(element::i32, Shape{1}); auto C = op::Constant::create(element::f32, Shape{1}, {0.0f}); auto D = op::Constant::create(element::f32, Shape{1}, {255.0f}); auto E = op::Constant::create(element::f32, Shape{1}, {-127.0f}); auto F = op::Constant::create(element::f32, Shape{1}, {127.0f}); auto G = op::Constant::create(element::f32, Shape{1}, {22.0f}); auto H = op::Constant::create(element::f32, Shape{1}, {90.0f}); auto CV = ngraph::builder::ScaledQuantizedConvolutionBias(A, B, Bias, Strides{1, 1}, // move_strides Strides{1, 1}, // filter_dilation CoordinateDiff{1, 1}, // below_pads CoordinateDiff{1, 1}, // above_pads Strides{1, 1}, // data_dilation C, D, E, F, G, H); auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias}); constant_fold(f); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::u8, shape_a); copy_data(a, a_data); auto b = backend->create_tensor(element::i8, shape_b); copy_data(b, b_data); auto c = backend->create_tensor(element::i32, Shape{1}); copy_data(c, c_data); auto result = backend->create_tensor(element::i8, shape_r); backend->call_with_validate(backend->compile(f), {result}, {a, b, c}); EXPECT_EQ((vector<int8_t>{38, 55, 50, 52, 61, 109, 127, 68, 54, 81, 68, 62}), read_vector<int8_t>(result)); } TEST(builder, scaled_QC_with_bias_and_relu) { Shape shape_a{1, 1, 3, 3}; // input shape Shape shape_b{1, 1, 3, 3}; // filter shape Shape shape_r{1, 1, 3, 3}; // output shape vector<uint8_t> a_data = {1, 2, 3, 4, 5, 6, 7, 8, 9}; vector<int8_t> b_data = {1, 2, 1, 0, 0, 0, -1, -2, -1}; vector<int32_t> c_data = {5}; auto A = make_shared<op::Parameter>(element::u8, shape_a); auto B = make_shared<op::Parameter>(element::i8, shape_b); auto Bias = make_shared<op::Parameter>(element::i32, Shape{1}); auto C = op::Constant::create(element::f32, Shape{1}, {0.0f}); auto D = op::Constant::create(element::f32, Shape{1}, {255.0f}); auto E = op::Constant::create(element::f32, Shape{1}, {-127.0f}); auto F = op::Constant::create(element::f32, Shape{1}, {127.0f}); auto G = op::Constant::create(element::f32, Shape{1}, {20.0f}); auto H = op::Constant::create(element::f32, Shape{1}, {-24.0f}); auto CV = ngraph::builder::ScaledQuantizedConvolutionBias(A, B, Bias, Strides{1, 1}, // move_strides Strides{1, 1}, // filter_dilation CoordinateDiff{1, 1}, // below_pads CoordinateDiff{1, 1}, // above_pads Strides{1, 1}, // data_dilation C, D, E, F, G, H, true); auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias}); constant_fold(f); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::u8, shape_a); copy_data(a, a_data); auto b = backend->create_tensor(element::i8, shape_b); copy_data(b, b_data); auto c = backend->create_tensor(element::i32, Shape{1}); copy_data(c, c_data); auto result = backend->create_tensor(element::u8, shape_r); backend->call_with_validate(backend->compile(f), {result}, {a, b, c}); EXPECT_EQ((vector<uint8_t>{0, 0, 0, 0, 0, 0, 191, 255, 234}), read_vector<uint8_t>(result)); } TEST(builder, scaled_QC_with_bias_add_and_relu) { Shape shape_a{1, 1, 3, 4}; // input shape Shape shape_b{1, 1, 3, 3}; // filter shape Shape shape_r{1, 1, 3, 4}; // output shape vector<uint8_t> a_data = {1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4}; vector<int8_t> b_data = {1, 2, 3, 4, 5, 0, 0, 1, 2}; vector<int32_t> c_data = {5}; vector<uint8_t> conv_2_data = {1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4}; auto A = make_shared<op::Parameter>(element::u8, shape_a); auto B = make_shared<op::Parameter>(element::i8, shape_b); auto Add = make_shared<op::Parameter>(element::u8, shape_a); auto Bias = make_shared<op::Parameter>(element::i32, Shape{1}); auto C = op::Constant::create(element::f32, Shape{}, {0.0f}); auto D = op::Constant::create(element::f32, Shape{}, {255.0f}); auto E = op::Constant::create(element::f32, Shape{}, {-127.0f}); auto F = op::Constant::create(element::f32, Shape{}, {127.0f}); auto G = op::Constant::create(element::f32, Shape{}, {22.0f}); auto H = op::Constant::create(element::f32, Shape{}, {90.0f}); auto I = op::Constant::create(element::f32, Shape{}, {22.0f}); auto J = op::Constant::create(element::f32, Shape{}, {180.0f}); auto CV = ngraph::builder::ScaledQuantizedConvolutionBiasAdd(A, B, Bias, Add, Strides{1, 1}, // move_strides Strides{1, 1}, // filter_dilation CoordinateDiff{1, 1}, // below_pads CoordinateDiff{1, 1}, // above_pads Strides{1, 1}, // data_dilation C, D, E, F, G, H, I, J, true); auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias, Add}); constant_fold(f); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::u8, shape_a); copy_data(a, a_data); auto b = backend->create_tensor(element::i8, shape_b); copy_data(b, b_data); auto c = backend->create_tensor(element::i32, Shape{1}); copy_data(c, c_data); auto d = backend->create_tensor(element::u8, shape_a); copy_data(d, conv_2_data); auto result = backend->create_tensor(element::u8, shape_r); auto handle = backend->compile(f); backend->call_with_validate(handle, {result}, {a, b, c, d}); EXPECT_EQ((vector<uint8_t>{78, 114, 105, 113, 132, 230, 255, 136, 110, 165, 142, 133}), read_vector<uint8_t>(result)); } TEST(builder, scaled_QC_with_bias_signed_add_and_relu) { Shape shape_a{1, 1, 3, 4}; // input shape Shape shape_b{1, 1, 3, 3}; // filter shape Shape shape_r{1, 1, 3, 4}; // output shape vector<uint8_t> a_data = {1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4}; vector<int8_t> b_data = {1, 2, 3, 4, 5, 0, 0, 1, 2}; vector<int32_t> c_data = {5}; vector<int8_t> conv_2_data = {-1, -2, -3, -4, -5, -6, -10, 0, 1, 2, 3, 4}; auto A = make_shared<op::Parameter>(element::u8, shape_a); auto B = make_shared<op::Parameter>(element::i8, shape_b); auto Add = make_shared<op::Parameter>(element::i8, shape_a); auto Bias = make_shared<op::Parameter>(element::i32, Shape{1}); auto C = op::Constant::create(element::f32, Shape{}, {0.0f}); auto D = op::Constant::create(element::f32, Shape{}, {255.0f}); auto E = op::Constant::create(element::f32, Shape{}, {-127.0f}); auto F = op::Constant::create(element::f32, Shape{}, {127.0f}); auto G = op::Constant::create(element::f32, Shape{}, {22.0f}); auto H = op::Constant::create(element::f32, Shape{}, {90.0f}); auto I = op::Constant::create(element::f32, Shape{}, {22.0f}); auto J = op::Constant::create(element::f32, Shape{}, {90.0f}); auto CV = ngraph::builder::ScaledQuantizedConvolutionBiasSignedAdd(A, B, Bias, Add, Strides{1, 1}, // move_strides Strides{1, 1}, // filter_dilation CoordinateDiff{1, 1}, // below_pads CoordinateDiff{1, 1}, // above_pads Strides{1, 1}, // data_dilation C, D, E, F, G, H, I, J, true); auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias, Add}); constant_fold(f); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::u8, shape_a); copy_data(a, a_data); auto b = backend->create_tensor(element::i8, shape_b); copy_data(b, b_data); auto c = backend->create_tensor(element::i32, Shape{1}); copy_data(c, c_data); auto d = backend->create_tensor(element::i8, shape_a); copy_data(d, conv_2_data); auto result = backend->create_tensor(element::u8, shape_r); auto handle = backend->compile(f); backend->call_with_validate(handle, {result}, {a, b, c, d}); EXPECT_EQ((vector<uint8_t>{76, 110, 99, 105, 122, 218, 255, 136, 110, 165, 142, 133}), read_vector<uint8_t>(result)); } TEST(builder, scaled_QC_with_f32_bias_and_relu) { Shape shape_a{1, 1, 3, 3}; // input shape Shape shape_b{1, 1, 3, 3}; // filter shape Shape shape_r{1, 1, 3, 3}; // output shape vector<uint8_t> a_data = {1, 2, 3, 4, 5, 6, 7, 8, 9}; vector<int8_t> b_data = {1, 2, 1, 0, 0, 0, -1, -2, -1}; vector<float> c_data = {5}; auto A = make_shared<op::Parameter>(element::u8, shape_a); auto B = make_shared<op::Parameter>(element::i8, shape_b); auto Bias = make_shared<op::Parameter>(element::f32, Shape{1}); auto C = op::Constant::create(element::f32, Shape{}, {0.0f}); auto D = op::Constant::create(element::f32, Shape{}, {255.0f}); auto E = op::Constant::create(element::f32, Shape{}, {-127.0f}); auto F = op::Constant::create(element::f32, Shape{}, {127.0f}); auto G = op::Constant::create(element::f32, Shape{}, {20.0f}); auto H = op::Constant::create(element::f32, Shape{}, {-24.0f}); auto CV = ngraph::builder::ScaledQuantizedConvolutionBias(A, B, Bias, Strides{1, 1}, // move_strides Strides{1, 1}, // filter_dilation CoordinateDiff{1, 1}, // below_pads CoordinateDiff{1, 1}, // above_pads Strides{1, 1}, // data_dilation C, D, E, F, G, H, true); auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias}); constant_fold(f); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::u8, shape_a); copy_data(a, a_data); auto b = backend->create_tensor(element::i8, shape_b); copy_data(b, b_data); auto c = backend->create_tensor(element::f32, Shape{1}); copy_data(c, c_data); auto result = backend->create_tensor(element::u8, shape_r); backend->call_with_validate(backend->compile(f), {result}, {a, b, c}); EXPECT_EQ((vector<uint8_t>{0, 0, 0, 0, 0, 0, 191, 255, 234}), read_vector<uint8_t>(result)); } TEST(builder, scaled_Q_unsigned) { vector<float> a_data = {-255.0, 0.0, 1.0, 1.25, 1.75, 64.0, 127.0, 500.0}; Shape shape_a{8}; AxisSet quantization_axes; op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN; auto A = make_shared<op::Parameter>(element::f32, shape_a); auto B = op::Constant::create(element::f32, Shape{}, {-255.0f}); auto C = op::Constant::create(element::f32, Shape{}, {127.0f}); auto QT = ngraph::builder::ScaledQuantize(A, B, C, element::u8, quantization_axes, round_mode); auto f = make_shared<Function>(NodeVector{QT}, ParameterVector{A}); constant_fold(f); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::f32, shape_a); copy_data(a, a_data); auto result = backend->create_tensor(element::u8, shape_a); backend->call_with_validate(backend->compile(f), {result}, {a}); EXPECT_EQ((vector<uint8_t>{0, 0, 1, 1, 2, 64, 127, 255}), read_vector<uint8_t>(result)); } TEST(builder, dynamic_scaled_Q_unsigned) { vector<float> a_data = {-255.0, 0.0, 1.0, 1.25, 1.75, 64.0, 127.0, 500.0}; Shape shape_a{8}; AxisSet quantization_axes; op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN; auto A = make_shared<op::Parameter>(element::f32, shape_a); auto B = make_shared<op::Parameter>(element::f32, Shape{}); auto C = make_shared<op::Parameter>(element::f32, Shape{}); auto QT = ngraph::builder::ScaledQuantize(A, B, C, element::u8, quantization_axes, round_mode); auto f = make_shared<Function>(NodeVector{QT}, ParameterVector{A, B, C}); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::f32, shape_a); auto b = backend->create_tensor(element::f32, Shape{}); auto c = backend->create_tensor(element::f32, Shape{}); copy_data(a, a_data); copy_data(b, vector<float>{-255.0f}); copy_data(c, vector<float>{127.0f}); auto result = backend->create_tensor(element::u8, shape_a); backend->call_with_validate(backend->compile(f), {result}, {a, b, c}); EXPECT_EQ((vector<uint8_t>{0, 0, 1, 1, 2, 64, 127, 255}), read_vector<uint8_t>(result)); } TEST(builder, scaled_Q_signed) { vector<float> a_data = {-127.0, 0.0, 1.0, 3.0, 5.0, 64.0, 127.0, 500.0}; Shape shape_a{8}; AxisSet quantization_axes; op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN; auto A = make_shared<op::Parameter>(element::f32, shape_a); auto B = op::Constant::create(element::f32, Shape{}, {-127.0f}); auto C = op::Constant::create(element::f32, Shape{}, {127.0f}); auto QT = ngraph::builder::ScaledQuantize(A, B, C, element::i8, quantization_axes, round_mode); auto f = make_shared<Function>(NodeVector{QT}, ParameterVector{A}); constant_fold(f); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::f32, shape_a); copy_data(a, a_data); auto result = backend->create_tensor(element::i8, shape_a); backend->call_with_validate(backend->compile(f), {result}, {a}); EXPECT_EQ((vector<int8_t>{-127, 0, 1, 3, 5, 64, 127, 127}), read_vector<int8_t>(result)); } TEST(builder, scaled_DQ_signed) { vector<int8_t> a_data = {42}; AxisSet quantization_axes; auto A = make_shared<op::Parameter>(element::i8, Shape{1}); auto B = op::Constant::create(element::f32, Shape{}, {-1.0f}); auto C = op::Constant::create(element::f32, Shape{}, {300.0f}); auto r = ngraph::builder::ScaledDequantize(A, B, C, element::f32, quantization_axes); auto f = make_shared<Function>(r, ParameterVector{A}); constant_fold(f); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::i8, Shape{1}); copy_data(a, a_data); auto result = backend->create_tensor(element::f32, Shape{1}); backend->call_with_validate(backend->compile(f), {result}, {a}); EXPECT_EQ((vector<float>{99.212601}), read_vector<float>(result)); }