Commit 9bfc0e5f authored by Michał Karzyński's avatar Michał Karzyński Committed by Scott Cyphers

[ONNX] Unit test models for QLinearConv (#2705)

* [ONNX] Unit test models for QLinearConv

* Temp ref conv

* ref conv temp

* add qlinearconv2d test

* adding conv3d test case

* ignore tests on GPU

* Dynamic scale

* add builder based solution for zero point and conv

* Revert "Dynamic scale"

This reverts commit be8e57bdf7013967e5575164a0402dcc6d16b8ed.

* Revert "Merge remote-tracking branch 'origin/nishant_ref_conv_u8u8' into mkarzyns/qlinear_conv_uts"

This reverts commit dea29a18c474b644b5b531f0e59f21d21bd56bf2, reversing
changes made to b12fc13c5852efa4c335377164d9b7e5d9227a8a.

* style
parent 76c73c91
...@@ -15,13 +15,18 @@ ...@@ -15,13 +15,18 @@
//***************************************************************************** //*****************************************************************************
#include "ngraph/builder/quantization/quantized_linear_convolution.hpp" #include "ngraph/builder/quantization/quantized_linear_convolution.hpp"
#include "ngraph/axis_set.hpp"
#include "ngraph/builder/make_constant.hpp" #include "ngraph/builder/make_constant.hpp"
#include "ngraph/builder/quantization.hpp" #include "ngraph/builder/quantization.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/divide.hpp" #include "ngraph/op/divide.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp" #include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp" #include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/multiply.hpp" #include "ngraph/op/multiply.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/type/element_type.hpp"
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
...@@ -56,6 +61,48 @@ namespace ngraph ...@@ -56,6 +61,48 @@ namespace ngraph
requantization_scale); requantization_scale);
} }
shared_ptr<Node> QuantizedLinearConvolution(shared_ptr<Node> input,
shared_ptr<Node> filter,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
shared_ptr<Node> input_scale,
shared_ptr<Node> input_zero_point,
shared_ptr<Node> filter_scale,
shared_ptr<Node> filter_zero_point,
shared_ptr<Node> output_scale,
shared_ptr<Node> output_zero_point)
{
AxisSet axes;
auto dq_input = make_shared<op::Dequantize>(
input, input_scale, input_zero_point, input_scale->get_element_type(), axes);
auto dq_filter = make_shared<op::Dequantize>(filter,
filter_scale,
filter_zero_point,
filter_scale->get_element_type(),
axes);
auto convolution = make_shared<op::Convolution>(dq_input,
dq_filter,
window_movement_strides,
window_dilation_strides,
padding_below,
padding_above,
data_dilation_strides);
auto q_convolution =
make_shared<op::Quantize>(convolution,
output_scale,
output_zero_point,
output_zero_point->get_element_type(),
axes,
op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN);
return q_convolution;
}
shared_ptr<Node> QuantizedLinearConvolutionBias(shared_ptr<Node> input, shared_ptr<Node> QuantizedLinearConvolutionBias(shared_ptr<Node> input,
shared_ptr<Node> filter, shared_ptr<Node> filter,
shared_ptr<Node> bias, shared_ptr<Node> bias,
......
...@@ -36,6 +36,21 @@ namespace ngraph ...@@ -36,6 +36,21 @@ namespace ngraph
std::shared_ptr<Node> filter_scale, std::shared_ptr<Node> filter_scale,
std::shared_ptr<Node> output_scale); std::shared_ptr<Node> output_scale);
std::shared_ptr<Node>
QuantizedLinearConvolution(std::shared_ptr<Node> input,
std::shared_ptr<Node> filter,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
std::shared_ptr<Node> input_scale,
std::shared_ptr<Node> input_zero_point,
std::shared_ptr<Node> filter_scale,
std::shared_ptr<Node> filter_zero_point,
std::shared_ptr<Node> output_scale,
std::shared_ptr<Node> output_zero_point);
std::shared_ptr<Node> std::shared_ptr<Node>
QuantizedLinearConvolutionBias(std::shared_ptr<Node> input, QuantizedLinearConvolutionBias(std::shared_ptr<Node> input,
std::shared_ptr<Node> filter, std::shared_ptr<Node> filter,
......
...@@ -159,9 +159,6 @@ namespace ngraph ...@@ -159,9 +159,6 @@ namespace ngraph
NodeVector quant_conv(const Node& node) NodeVector quant_conv(const Node& node)
{ {
NGRAPH_WARN << "[" << node.get_name()
<< "] Zero point different from 0 is not supported. Assuming Zero "
"point is 0";
const NodeVector& inputs = node.get_ng_inputs(); const NodeVector& inputs = node.get_ng_inputs();
auto data = inputs.at(0); auto data = inputs.at(0);
auto filters = inputs.at(3); auto filters = inputs.at(3);
...@@ -214,16 +211,36 @@ namespace ngraph ...@@ -214,16 +211,36 @@ namespace ngraph
} }
else else
{ {
conv_node = if (filters->get_element_type() == ngraph::element::u8 && groups == 1)
make_ng_quant_conv(data, {
filters, conv_node = ngraph::builder::quantization::QuantizedLinearConvolution(
strides, data,
filter_dilations, filters,
padding_below, strides,
padding_above, filter_dilations,
data_dilations, padding_below,
groups, padding_above,
OpScale{data_scale, filters_scale, output_scale}); data_dilations,
data_scale,
inputs.at(2),
filters_scale,
inputs.at(5),
output_scale,
inputs.at(7));
}
else
{
conv_node = make_ng_quant_conv(
data,
filters,
strides,
filter_dilations,
padding_below,
padding_above,
data_dilations,
groups,
OpScale{data_scale, filters_scale, output_scale});
}
} }
return {conv_node}; return {conv_node};
......
...@@ -126,6 +126,8 @@ model_quantize_linear_zero_point ...@@ -126,6 +126,8 @@ model_quantize_linear_zero_point
quantize_linear_axis_zero quantize_linear_axis_zero
model_quantize_linear_axis_negative model_quantize_linear_axis_negative
model_quant_conv_linear model_quant_conv_linear
model_quant_conv_linear_2d
model_quant_conv_linear_3d
# This should be implemented # This should be implemented
create_tensor_2_input create_tensor_2_input
......
ir_version: 3
producer_name: "ngraph ONNXImporter"
graph {
node {
input: "x"
input: "x_scale"
input: "x_zero_point"
input: "w"
input: "w_scale"
input: "w_zero_point"
input: "y_scale"
input: "y_zero_point"
output: "y"
name: "node1"
op_type: "QLinearConv"
attribute {
name: "group"
i: 1
type: INT
}
attribute {
name: "auto_pad"
s: "NOTSET"
type: STRING
}
}
name: "test"
input {
name: "x"
type {
tensor_type {
elem_type: 2
shape {
dim {
dim_value: 1
}
dim {
dim_value: 1
}
dim {
dim_value: 7
}
dim {
dim_value: 7
}
}
}
}
}
input {
name: "x_scale"
type {
tensor_type {
elem_type: 1
shape {
}
}
}
}
input {
name: "x_zero_point"
type {
tensor_type {
elem_type: 2
shape {
}
}
}
}
input {
name: "w"
type {
tensor_type {
elem_type: 2
shape {
dim {
dim_value: 1
}
dim {
dim_value: 1
}
dim {
dim_value: 1
}
dim {
dim_value: 1
}
}
}
}
}
input {
name: "w_scale"
type {
tensor_type {
elem_type: 1
shape {
}
}
}
}
input {
name: "w_zero_point"
type {
tensor_type {
elem_type: 2
shape {
}
}
}
}
input {
name: "y_scale"
type {
tensor_type {
elem_type: 1
shape {
}
}
}
}
input {
name: "y_zero_point"
type {
tensor_type {
elem_type: 2
shape {
}
}
}
}
output {
name: "y"
type {
tensor_type {
elem_type: 2
shape {
dim {
dim_value: 1
}
dim {
dim_value: 1
}
dim {
dim_value: 7
}
dim {
dim_value: 7
}
}
}
}
}
}
opset_import {
version: 10
}
ir_version: 3
producer_name: "ngraph ONNXImporter"
graph {
node {
input: "x"
input: "x_scale"
input: "x_zero_point"
input: "w"
input: "w_scale"
input: "w_zero_point"
input: "y_scale"
input: "y_zero_point"
output: "y"
name: "node1"
op_type: "QLinearConv"
attribute {
name: "group"
i: 1
type: INT
}
attribute {
name: "auto_pad"
s: "NOTSET"
type: STRING
}
attribute {
name: "pads"
ints: 2
ints: 2
ints: 2
ints: 2
ints: 2
ints: 2
type: INTS
}
attribute {
name: "strides"
ints: 2
ints: 2
ints: 2
type: INTS
}
}
name: "test"
input {
name: "x"
type {
tensor_type {
elem_type: 2
shape {
dim {
dim_value: 1
}
dim {
dim_value: 1
}
dim {
dim_value: 4
}
dim {
dim_value: 4
}
dim {
dim_value: 4
}
}
}
}
}
input {
name: "x_scale"
type {
tensor_type {
elem_type: 1
shape {
}
}
}
}
input {
name: "x_zero_point"
type {
tensor_type {
elem_type: 2
shape {
}
}
}
}
input {
name: "w"
type {
tensor_type {
elem_type: 2
shape {
dim {
dim_value: 1
}
dim {
dim_value: 1
}
dim {
dim_value: 1
}
dim {
dim_value: 1
}
dim {
dim_value: 1
}
}
}
}
}
input {
name: "w_scale"
type {
tensor_type {
elem_type: 1
shape {
}
}
}
}
input {
name: "w_zero_point"
type {
tensor_type {
elem_type: 2
shape {
}
}
}
}
input {
name: "y_scale"
type {
tensor_type {
elem_type: 1
shape {
}
}
}
}
input {
name: "y_zero_point"
type {
tensor_type {
elem_type: 2
shape {
}
}
}
}
output {
name: "y"
type {
tensor_type {
elem_type: 2
shape {
dim {
dim_value: 1
}
dim {
dim_value: 1
}
dim {
dim_value: 4
}
dim {
dim_value: 4
}
dim {
dim_value: 4
}
}
}
}
}
}
opset_import {
version: 10
}
=|в:
\ No newline at end of file
--broken encoding: IBM424_ltr
\ No newline at end of file
::
\ No newline at end of file
\ No newline at end of file
/;
\ No newline at end of file
2:
\ No newline at end of file
...@@ -2684,3 +2684,147 @@ NGRAPH_TEST(onnx_${BACKEND_NAME}, model_quant_conv_linear) ...@@ -2684,3 +2684,147 @@ NGRAPH_TEST(onnx_${BACKEND_NAME}, model_quant_conv_linear)
execute<std::uint8_t, std::int8_t>(function, inputs, "${BACKEND_NAME}")}; execute<std::uint8_t, std::int8_t>(function, inputs, "${BACKEND_NAME}")};
EXPECT_TRUE(test::all_close(expected_output.front(), outputs.front())); EXPECT_TRUE(test::all_close(expected_output.front(), outputs.front()));
} }
NGRAPH_TEST(onnx_${BACKEND_NAME}, model_quant_conv_linear_2d)
{
auto function = onnx_import::import_onnx_model(
file_util::path_join(SERIALIZED_ZOO, "onnx/qlinear_conv_2d.prototxt"));
auto x =
read_binary_file<uint8_t>(file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv2d/x.bin"));
auto x_scale = read_binary_file<float>(
file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv2d/x_scale.bin"));
auto x_zero_point = read_binary_file<uint8_t>(
file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv2d/x_zero_point.bin"));
auto w =
read_binary_file<uint8_t>(file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv2d/w.bin"));
auto w_scale = read_binary_file<float>(
file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv2d/w_scale.bin"));
auto w_zero_point = read_binary_file<uint8_t>(
file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv2d/w_zero_point.bin"));
auto y_scale = read_binary_file<float>(
file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv2d/y_scale.bin"));
auto y_zero_point = read_binary_file<uint8_t>(
file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv2d/y_zero_point.bin"));
auto backend = ngraph::runtime::Backend::create("${BACKEND_NAME}");
auto params = function->get_parameters();
std::vector<std::shared_ptr<ngraph::runtime::Tensor>> input_tensors;
input_tensors.push_back(
backend->create_tensor(params.at(0)->get_element_type(), params.at(0)->get_shape()));
input_tensors.push_back(
backend->create_tensor(params.at(1)->get_element_type(), params.at(1)->get_shape()));
input_tensors.push_back(
backend->create_tensor(params.at(2)->get_element_type(), params.at(2)->get_shape()));
input_tensors.push_back(
backend->create_tensor(params.at(3)->get_element_type(), params.at(3)->get_shape()));
input_tensors.push_back(
backend->create_tensor(params.at(4)->get_element_type(), params.at(4)->get_shape()));
input_tensors.push_back(
backend->create_tensor(params.at(5)->get_element_type(), params.at(5)->get_shape()));
input_tensors.push_back(
backend->create_tensor(params.at(6)->get_element_type(), params.at(6)->get_shape()));
input_tensors.push_back(
backend->create_tensor(params.at(7)->get_element_type(), params.at(7)->get_shape()));
copy_data(input_tensors[0], x);
copy_data(input_tensors[1], x_scale);
copy_data(input_tensors[2], x_zero_point);
copy_data(input_tensors[3], w);
copy_data(input_tensors[4], w_scale);
copy_data(input_tensors[5], w_zero_point);
copy_data(input_tensors[6], y_scale);
copy_data(input_tensors[7], y_zero_point);
auto results = function->get_results();
std::vector<std::shared_ptr<ngraph::runtime::Tensor>> result_tensors;
result_tensors.push_back(
backend->create_tensor(results.at(0)->get_element_type(), results.at(0)->get_shape()));
auto handle = backend->compile(function);
handle->call_with_validate(result_tensors, input_tensors);
std::vector<std::vector<uint8_t>> outputs;
outputs.push_back(read_vector<uint8_t>(result_tensors[0]));
std::vector<std::vector<uint8_t>> expected_output;
expected_output.push_back(read_binary_file<uint8_t>(
file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv2d/y.bin")));
EXPECT_EQ(expected_output.front(), outputs.front());
}
NGRAPH_TEST(onnx_${BACKEND_NAME}, model_quant_conv_linear_3d)
{
auto function = onnx_import::import_onnx_model(
file_util::path_join(SERIALIZED_ZOO, "onnx/qlinear_conv_3d.prototxt"));
auto x =
read_binary_file<uint8_t>(file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv3d/x.bin"));
auto x_scale = read_binary_file<float>(
file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv3d/x_scale.bin"));
auto x_zero_point = read_binary_file<uint8_t>(
file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv3d/x_zero_point.bin"));
auto w =
read_binary_file<uint8_t>(file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv3d/w.bin"));
auto w_scale = read_binary_file<float>(
file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv3d/w_scale.bin"));
auto w_zero_point = read_binary_file<uint8_t>(
file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv3d/w_zero_point.bin"));
auto y_scale = read_binary_file<float>(
file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv3d/y_scale.bin"));
auto y_zero_point = read_binary_file<uint8_t>(
file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv3d/y_zero_point.bin"));
auto backend = ngraph::runtime::Backend::create("${BACKEND_NAME}");
auto params = function->get_parameters();
std::vector<std::shared_ptr<ngraph::runtime::Tensor>> input_tensors;
input_tensors.push_back(
backend->create_tensor(params.at(0)->get_element_type(), params.at(0)->get_shape()));
input_tensors.push_back(
backend->create_tensor(params.at(1)->get_element_type(), params.at(1)->get_shape()));
input_tensors.push_back(
backend->create_tensor(params.at(2)->get_element_type(), params.at(2)->get_shape()));
input_tensors.push_back(
backend->create_tensor(params.at(3)->get_element_type(), params.at(3)->get_shape()));
input_tensors.push_back(
backend->create_tensor(params.at(4)->get_element_type(), params.at(4)->get_shape()));
input_tensors.push_back(
backend->create_tensor(params.at(5)->get_element_type(), params.at(5)->get_shape()));
input_tensors.push_back(
backend->create_tensor(params.at(6)->get_element_type(), params.at(6)->get_shape()));
input_tensors.push_back(
backend->create_tensor(params.at(7)->get_element_type(), params.at(7)->get_shape()));
copy_data(input_tensors[0], x);
copy_data(input_tensors[1], x_scale);
copy_data(input_tensors[2], x_zero_point);
copy_data(input_tensors[3], w);
copy_data(input_tensors[4], w_scale);
copy_data(input_tensors[5], w_zero_point);
copy_data(input_tensors[6], y_scale);
copy_data(input_tensors[7], y_zero_point);
auto results = function->get_results();
std::vector<std::shared_ptr<ngraph::runtime::Tensor>> result_tensors;
result_tensors.push_back(
backend->create_tensor(results.at(0)->get_element_type(), results.at(0)->get_shape()));
auto handle = backend->compile(function);
handle->call_with_validate(result_tensors, input_tensors);
std::vector<std::vector<uint8_t>> outputs;
outputs.push_back(read_vector<uint8_t>(result_tensors[0]));
std::vector<std::vector<uint8_t>> expected_output;
expected_output.push_back(read_binary_file<uint8_t>(
file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv3d/y.bin")));
EXPECT_EQ(expected_output.front(), outputs.front());
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment