/******************************************************************************* * Copyright 2018 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ #include <algorithm> #include <cinttypes> #include <cmath> #include <cstdlib> #include <string> #include "gtest/gtest.h" #include "ngraph/ngraph.hpp" #include "ngraph/op/constant.hpp" #include "ngraph/op/get_output_element.hpp" #include "ngraph/runtime/cpu/op/dequantize.hpp" #include "ngraph/runtime/cpu/op/quantize.hpp" #include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp" #include "ngraph/runtime/cpu/op/quantized_conv.hpp" #include "ngraph/runtime/cpu/op/quantized_max_pool.hpp" #include "util/all_close.hpp" #include "util/all_close_f.hpp" #include "util/ndarray.hpp" #include "util/random.hpp" #include "util/test_control.hpp" #include "util/test_tools.hpp" using namespace std; using namespace ngraph; TEST(quantize_cpu, quantize_max_pool_2d_unsigned) { vector<uint8_t> a_data = {0, 1, 0, 2, 1, 0, 3, 2, 0, 0, 2, 0, 0, 0, 1}; Shape shape_a{1, 1, 3, 5}; Shape window_shape{2, 3}; auto window_movement_strides = Strides{1, 1}; Shape padding_below{0, 0}; Shape padding_above{0, 0}; auto A = make_shared<op::Parameter>(element::u8, shape_a); Shape shape_r{1, 1, 2, 3}; auto B = op::Constant::create(element::f32, Shape{1}, {0.0f}); auto C = op::Constant::create(element::f32, Shape{1}, {255.0f}); auto QMP = make_shared<op::QuantizedMaxPool>( A, window_shape, window_movement_strides, padding_below, padding_above, B, C); auto output_data = std::make_shared<op::GetOutputElement>(QMP, 0); auto output_min = std::make_shared<op::GetOutputElement>(QMP, 1); auto output_max = std::make_shared<op::GetOutputElement>(QMP, 2); auto f = make_shared<Function>(NodeVector{output_data, output_min, output_max}, op::ParameterVector{A}); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::u8, shape_a); copy_data(a, a_data); auto result = backend->create_tensor(element::u8, shape_r); auto result_min = backend->create_tensor(element::f32, Shape{1}); auto result_max = backend->create_tensor(element::f32, Shape{1}); backend->call_with_validate(f, {result, result_min, result_max}, {a}); EXPECT_EQ((vector<uint8_t>{3, 3, 2, 3, 3, 2}), read_vector<uint8_t>(result)); EXPECT_EQ((vector<float>{0.0}), read_vector<float>(result_min)); EXPECT_EQ((vector<float>{255.0}), read_vector<float>(result_max)); } TEST(quantize_cpu, quantize_max_pool_2d_signed) { vector<int8_t> a_data = {0, 1, 0, -2, 1, 0, -3, 2, 0, 0, 2, 0, 0, 0, 1}; Shape shape_a{1, 1, 3, 5}; Shape window_shape{2, 3}; auto window_movement_strides = Strides{1, 1}; Shape padding_below{0, 0}; Shape padding_above{0, 0}; auto A = make_shared<op::Parameter>(element::i8, shape_a); Shape shape_r{1, 1, 2, 3}; auto B = op::Constant::create(element::f32, Shape{1}, {0.0f}); auto C = op::Constant::create(element::f32, Shape{1}, {127.0f}); auto QMP = make_shared<op::QuantizedMaxPool>( A, window_shape, window_movement_strides, padding_below, padding_above, B, C); auto output_data = std::make_shared<op::GetOutputElement>(QMP, 0); auto output_min = std::make_shared<op::GetOutputElement>(QMP, 1); auto output_max = std::make_shared<op::GetOutputElement>(QMP, 2); auto f = make_shared<Function>(NodeVector{output_data, output_min, output_max}, op::ParameterVector{A}); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::i8, shape_a); copy_data(a, a_data); auto result = backend->create_tensor(element::i8, shape_r); auto result_min = backend->create_tensor(element::f32, Shape{1}); auto result_max = backend->create_tensor(element::f32, Shape{1}); backend->call_with_validate(f, {result, result_min, result_max}, {a}); EXPECT_EQ((vector<int8_t>{2, 2, 2, 2, 2, 2}), read_vector<int8_t>(result)); EXPECT_EQ((vector<float>{0.0}), read_vector<float>(result_min)); EXPECT_EQ((vector<float>{127.0}), read_vector<float>(result_max)); } TEST(quantize_cpu, quantize_avg_pool_2d_unsigned) { vector<uint8_t> a_data = {0, 1, 0, 2, 1, 0, 3, 2, 0, 0, 2, 0, 0, 0, 1}; Shape shape_a{1, 1, 3, 5}; Shape window_shape{2, 3}; auto window_movement_strides = Strides{1, 1}; Shape padding_below{0, 0}; Shape padding_above{0, 0}; auto A = make_shared<op::Parameter>(element::u8, shape_a); Shape shape_r{1, 1, 2, 3}; auto B = op::Constant::create(element::f32, Shape{1}, {0.0f}); auto C = op::Constant::create(element::f32, Shape{1}, {255.0f}); auto QMP = make_shared<op::QuantizedAvgPool>( A, window_shape, window_movement_strides, padding_below, padding_above, false, B, C); auto output_data = std::make_shared<op::GetOutputElement>(QMP, 0); auto output_min = std::make_shared<op::GetOutputElement>(QMP, 1); auto output_max = std::make_shared<op::GetOutputElement>(QMP, 2); auto f = make_shared<Function>(NodeVector{output_data, output_min, output_max}, op::ParameterVector{A}); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::u8, shape_a); copy_data(a, a_data); auto result = backend->create_tensor(element::u8, shape_r); auto result_min = backend->create_tensor(element::f32, Shape{1}); auto result_max = backend->create_tensor(element::f32, Shape{1}); backend->call_with_validate(f, {result, result_min, result_max}, {a}); EXPECT_EQ((vector<uint8_t>{1, 1, 1, 1, 1, 0}), read_vector<uint8_t>(result)); EXPECT_EQ((vector<float>{0.0}), read_vector<float>(result_min)); EXPECT_EQ((vector<float>{255.0}), read_vector<float>(result_max)); } TEST(quantize_cpu, quantize_avg_pool_2d_signed) { vector<int8_t> a_data = {10, 1, 0, -2, 1, 0, -3, 4, 0, 0, 2, 0, 0, 0, 1}; Shape shape_a{1, 1, 3, 5}; Shape window_shape{2, 3}; auto window_movement_strides = Strides{1, 1}; Shape padding_below{0, 0}; Shape padding_above{0, 0}; auto A = make_shared<op::Parameter>(element::i8, shape_a); Shape shape_r{1, 1, 2, 3}; auto B = op::Constant::create(element::f32, Shape{1}, {0.0f}); auto C = op::Constant::create(element::f32, Shape{1}, {127.0f}); auto QMP = make_shared<op::QuantizedAvgPool>( A, window_shape, window_movement_strides, padding_below, padding_above, false, B, C); auto output_data = std::make_shared<op::GetOutputElement>(QMP, 0); auto output_min = std::make_shared<op::GetOutputElement>(QMP, 1); auto output_max = std::make_shared<op::GetOutputElement>(QMP, 2); auto f = make_shared<Function>(NodeVector{output_data, output_min, output_max}, op::ParameterVector{A}); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::i8, shape_a); copy_data(a, a_data); auto result = backend->create_tensor(element::i8, shape_r); auto result_min = backend->create_tensor(element::f32, Shape{1}); auto result_max = backend->create_tensor(element::f32, Shape{1}); backend->call_with_validate(f, {result, result_min, result_max}, {a}); EXPECT_EQ((vector<int8_t>{2, 0, 0, 0, 0, 1}), read_vector<int8_t>(result)); EXPECT_EQ((vector<float>{0.0}), read_vector<float>(result_min)); EXPECT_EQ((vector<float>{127.0}), read_vector<float>(result_max)); } template <typename T> void DequantizeTest(int input, float min, float max, float expected_output) { vector<T> a_data = {static_cast<T>(input)}; Shape shape_a{1}; auto A = make_shared<op::Parameter>(element::from<T>(), shape_a); auto B = op::Constant::create(element::f32, Shape{}, {min}); auto C = op::Constant::create(element::f32, Shape{}, {max}); auto r = make_shared<op::Dequantize>(A, B, C, element::from<T>()); auto f = make_shared<Function>(r, op::ParameterVector{A}); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::from<T>(), Shape{1}); copy_data(a, a_data); auto result = backend->create_tensor(element::f32, Shape{1}); backend->call_with_validate(f, {result}, {a}); EXPECT_EQ((vector<float>{expected_output}), read_vector<float>(result)); } TEST(quantize_cpu, dequantize_from_uint8) { DequantizeTest<uint8_t>(255, 100.0f, 300.0f, 300.0); } TEST(quantize_cpu, dequantize_from_uint8_smallrange) { DequantizeTest<uint8_t>(255, -2.0f, 5.0f, 5.0); } TEST(quantize_cpu, dequantize_from_int8_smallrange) { DequantizeTest<int8_t>(-127, -2.0f, 1.0f, -2.0); } TEST(quantize_cpu, dequantize_from_int8) { DequantizeTest<int8_t>(42, -1.0f, 300.0f, static_cast<float>(99.212601)); } TEST(quantize_cpu, quantizedConv2D_small) { Shape shape_a{1, 1, 3, 4}; // input shape Shape shape_b{1, 1, 3, 3}; // filter shape Shape shape_r{1, 1, 3, 4}; // output shape vector<uint8_t> a_data = {1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4}; vector<int8_t> b_data = {1, 2, 3, 4, 5, 0, 0, 1, 2}; auto A = make_shared<op::Parameter>(element::u8, shape_a); auto B = make_shared<op::Parameter>(element::i8, shape_b); auto C = op::Constant::create(element::f32, Shape{1}, {0.0f}); auto D = op::Constant::create(element::f32, Shape{1}, {255.0f}); auto E = op::Constant::create(element::f32, Shape{1}, {-127.0f}); auto F = op::Constant::create(element::f32, Shape{1}, {127.0f}); auto G = op::Constant::create(element::f32, Shape{1}, {22.0f}); auto H = op::Constant::create(element::f32, Shape{1}, {90.0f}); auto CV = make_shared<op::QuantizedConvolution>(A, B, Strides{1, 1}, // move_strides Strides{1, 1}, // filter_dilation CoordinateDiff{1, 1}, // below_pads CoordinateDiff{1, 1}, // above_pads Strides{1, 1}, // data_dilation C, D, E, F, G, H); auto output_data = std::make_shared<op::GetOutputElement>(CV, 0); auto output_min = std::make_shared<op::GetOutputElement>(CV, 1); auto output_max = std::make_shared<op::GetOutputElement>(CV, 2); auto f = make_shared<Function>(NodeVector{output_data, output_min, output_max}, op::ParameterVector{A, B}); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::u8, shape_a); copy_data(a, a_data); auto b = backend->create_tensor(element::i8, shape_b); copy_data(b, b_data); auto result = backend->create_tensor(element::i8, shape_r); auto result_min = backend->create_tensor(element::f32, Shape{1}); auto result_max = backend->create_tensor(element::f32, Shape{1}); backend->call_with_validate(f, {result, result_min, result_max}, {a, b}); EXPECT_EQ((vector<int8_t>{31, 48, 42, 45, 54, 102, 127, 61, 47, 74, 61, 55}), read_vector<int8_t>(result)); EXPECT_EQ((vector<float>{22.0}), read_vector<float>(result_min)); EXPECT_EQ((vector<float>{90.0}), read_vector<float>(result_max)); } TEST(quantize_cpu, quantize_to_uint8_small) { vector<float> a_data = {-85.0, 0.0, 2.0, 10.0, 15.0}; Shape shape_a{5}; auto A = make_shared<op::Parameter>(element::f32, shape_a); auto B = op::Constant::create(element::f32, Shape{}, {-85.0f}); auto C = op::Constant::create(element::f32, Shape{}, {15.0f}); auto QT = make_shared<op::Quantize>(A, B, C, element::u8); auto output_data = std::make_shared<op::GetOutputElement>(QT, 0); auto output_min = std::make_shared<op::GetOutputElement>(QT, 1); auto output_max = std::make_shared<op::GetOutputElement>(QT, 2); auto f = make_shared<Function>(NodeVector{output_data, output_min, output_max}, op::ParameterVector{A}); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::f32, shape_a); copy_data(a, a_data); auto result = backend->create_tensor(element::u8, shape_a); auto result_min = backend->create_tensor(element::f32, Shape{}); auto result_max = backend->create_tensor(element::f32, Shape{}); backend->call_with_validate(f, {result, result_min, result_max}, {a}); EXPECT_EQ((vector<uint8_t>{0, 0, 6, 30, 45}), read_vector<uint8_t>(result)); EXPECT_EQ((vector<float>{0.0}), read_vector<float>(result_min)); EXPECT_EQ((vector<float>{85.0}), read_vector<float>(result_max)); } TEST(quantize_cpu, quantize_to_uint8) { vector<float> a_data = {-255.0, 0.0, 1.0, 1.25, 1.75, 64.0, 127.0, 500.0}; Shape shape_a{8}; auto A = make_shared<op::Parameter>(element::f32, shape_a); auto B = op::Constant::create(element::f32, Shape{}, {-255.0f}); auto C = op::Constant::create(element::f32, Shape{}, {127.0f}); auto QT = make_shared<op::Quantize>(A, B, C, element::u8); auto output_data = std::make_shared<op::GetOutputElement>(QT, 0); auto output_min = std::make_shared<op::GetOutputElement>(QT, 1); auto output_max = std::make_shared<op::GetOutputElement>(QT, 2); auto f = make_shared<Function>(NodeVector{output_data, output_min, output_max}, op::ParameterVector{A}); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::f32, shape_a); copy_data(a, a_data); auto result = backend->create_tensor(element::u8, shape_a); auto result_min = backend->create_tensor(element::f32, Shape{}); auto result_max = backend->create_tensor(element::f32, Shape{}); backend->call_with_validate(f, {result, result_min, result_max}, {a}); EXPECT_EQ((vector<uint8_t>{0, 0, 1, 1, 2, 64, 127, 255}), read_vector<uint8_t>(result)); EXPECT_EQ((vector<float>{0.0}), read_vector<float>(result_min)); EXPECT_EQ((vector<float>{255.0}), read_vector<float>(result_max)); } TEST(quantize_cpu, quantize_to_int8) { vector<float> a_data = {-127.0, 0.0, 1.0, 3.0, 5.0, 64.0, 127.0, 500.0}; Shape shape_a{8}; auto A = make_shared<op::Parameter>(element::f32, shape_a); auto B = op::Constant::create(element::f32, Shape{}, {-127.0f}); auto C = op::Constant::create(element::f32, Shape{}, {127.0f}); auto QT = make_shared<op::Quantize>(A, B, C, element::i8); auto output_data = std::make_shared<op::GetOutputElement>(QT, 0); auto output_min = std::make_shared<op::GetOutputElement>(QT, 1); auto output_max = std::make_shared<op::GetOutputElement>(QT, 2); auto f = make_shared<Function>(NodeVector{output_data, output_min, output_max}, op::ParameterVector{A}); auto backend = runtime::Backend::create("CPU"); // Create some tensors for input/output auto a = backend->create_tensor(element::f32, shape_a); copy_data(a, a_data); auto result = backend->create_tensor(element::i8, shape_a); auto result_min = backend->create_tensor(element::f32, Shape{}); auto result_max = backend->create_tensor(element::f32, Shape{}); backend->call_with_validate(f, {result, result_min, result_max}, {a}); EXPECT_EQ((vector<int8_t>{-127, 0, 1, 3, 5, 64, 127, 127}), read_vector<int8_t>(result)); EXPECT_EQ((vector<float>{-127}), read_vector<float>(result_min)); EXPECT_EQ((vector<float>{127}), read_vector<float>(result_max)); }