[ONNX] Unit test models for QLinearConv (#2705)

* [ONNX] Unit test models for QLinearConv * Temp ref conv * ref conv temp * add qlinearconv2d test * adding conv3d test case * ignore tests on GPU * Dynamic scale * add builder based solution for zero point and conv * Revert "Dynamic scale" This reverts commit be8e57bdf7013967e5575164a0402dcc6d16b8ed. * Revert "Merge remote-tracking branch 'origin/nishant_ref_conv_u8u8' into mkarzyns/qlinear_conv_uts" This reverts commit dea29a18c474b644b5b531f0e59f21d21bd56bf2, reversing changes made to b12fc13c5852efa4c335377164d9b7e5d9227a8a. * style

[ONNX] Unit test models for QLinearConv (#2705)
* [ONNX] Unit test models for QLinearConv * Temp ref conv * ref conv temp * add qlinearconv2d test * adding conv3d test case * ignore tests on GPU * Dynamic scale * add builder based solution for zero point and conv * Revert "Dynamic scale" This reverts commit be8e57bdf7013967e5575164a0402dcc6d16b8ed. * Revert "Merge remote-tracking branch 'origin/nishant_ref_conv_u8u8' into mkarzyns/qlinear_conv_uts" This reverts commit dea29a18c474b644b5b531f0e59f21d21bd56bf2, reversing changes made to b12fc13c5852efa4c335377164d9b7e5d9227a8a. * style
9bfc0e5f · Michał Karzyński · Scott Cyphers · 76c73c91 · 9bfc0e5f · 9bfc0e5f
Commit 9bfc0e5f authored Apr 08, 2019 by Michał Karzyński Committed by Scott Cyphers Apr 08, 2019
25 changed files
--- a/src/ngraph/builder/quantization/quantized_linear_convolution.cpp
+++ b/src/ngraph/builder/quantization/quantized_linear_convolution.cpp
@@ -15,13 +15,18 @@
 //*****************************************************************************
 #include "ngraph/builder/quantization/quantized_linear_convolution.hpp"
+#include "ngraph/axis_set.hpp"
 #include "ngraph/builder/make_constant.hpp"
 #include "ngraph/builder/quantization.hpp"
 #include "ngraph/op/constant.hpp"
+#include "ngraph/op/convolution.hpp"
+#include "ngraph/op/dequantize.hpp"
 #include "ngraph/op/divide.hpp"
 #include "ngraph/op/experimental/quantized_conv.hpp"
 #include "ngraph/op/experimental/quantized_conv_bias.hpp"
 #include "ngraph/op/multiply.hpp"
+#include "ngraph/op/quantize.hpp"
+#include "ngraph/type/element_type.hpp"
 using namespace std;
 using namespace ngraph;
@@ -56,6 +61,48 @@ namespace ngraph
                                                             requantization_scale);
            }
+            shared_ptr<Node> QuantizedLinearConvolution(shared_ptr<Node> input,
+                                                        shared_ptr<Node> filter,
+                                                        const Strides& window_movement_strides,
+                                                        const Strides& window_dilation_strides,
+                                                        const CoordinateDiff& padding_below,
+                                                        const CoordinateDiff& padding_above,
+                                                        const Strides& data_dilation_strides,
+                                                        shared_ptr<Node> input_scale,
+                                                        shared_ptr<Node> input_zero_point,
+                                                        shared_ptr<Node> filter_scale,
+                                                        shared_ptr<Node> filter_zero_point,
+                                                        shared_ptr<Node> output_scale,
+                                                        shared_ptr<Node> output_zero_point)
+            {
+                AxisSet axes;
+                auto dq_input = make_shared<op::Dequantize>(
+                    input, input_scale, input_zero_point, input_scale->get_element_type(), axes);
+                auto dq_filter = make_shared<op::Dequantize>(filter,
+                                                             filter_scale,
+                                                             filter_zero_point,
+                                                             filter_scale->get_element_type(),
+                                                             axes);
+                auto convolution = make_shared<op::Convolution>(dq_input,
+                                                                dq_filter,
+                                                                window_movement_strides,
+                                                                window_dilation_strides,
+                                                                padding_below,
+                                                                padding_above,
+                                                                data_dilation_strides);
+                auto q_convolution =
+                    make_shared<op::Quantize>(convolution,
+                                              output_scale,
+                                              output_zero_point,
+                                              output_zero_point->get_element_type(),
+                                              axes,
+                                              op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN);
+                return q_convolution;
+            }
            shared_ptr<Node> QuantizedLinearConvolutionBias(shared_ptr<Node> input,
                                                            shared_ptr<Node> filter,
                                                            shared_ptr<Node> bias,

--- a/src/ngraph/builder/quantization/quantized_linear_convolution.hpp
+++ b/src/ngraph/builder/quantization/quantized_linear_convolution.hpp
@@ -36,6 +36,21 @@ namespace ngraph
                                                             std::shared_ptr<Node> filter_scale,
                                                             std::shared_ptr<Node> output_scale);
+            std::shared_ptr<Node>
+                QuantizedLinearConvolution(std::shared_ptr<Node> input,
+                                           std::shared_ptr<Node> filter,
+                                           const Strides& window_movement_strides,
+                                           const Strides& window_dilation_strides,
+                                           const CoordinateDiff& padding_below,
+                                           const CoordinateDiff& padding_above,
+                                           const Strides& data_dilation_strides,
+                                           std::shared_ptr<Node> input_scale,
+                                           std::shared_ptr<Node> input_zero_point,
+                                           std::shared_ptr<Node> filter_scale,
+                                           std::shared_ptr<Node> filter_zero_point,
+                                           std::shared_ptr<Node> output_scale,
+                                           std::shared_ptr<Node> output_zero_point);
            std::shared_ptr<Node>
                QuantizedLinearConvolutionBias(std::shared_ptr<Node> input,
                                               std::shared_ptr<Node> filter,

--- a/src/ngraph/frontend/onnx_import/op/quant_conv.cpp
+++ b/src/ngraph/frontend/onnx_import/op/quant_conv.cpp
@@ -159,9 +159,6 @@ namespace ngraph
                NodeVector quant_conv(const Node& node)
                {
-                    NGRAPH_WARN << "[" << node.get_name()
-                                << "] Zero point different from 0 is not supported. Assuming Zero "
-                                   "point is 0";
                    const NodeVector& inputs = node.get_ng_inputs();
                    auto data = inputs.at(0);
                    auto filters = inputs.at(3);
@@ -214,16 +211,36 @@ namespace ngraph
                    }
                    else
                    {
-                        conv_node =
+                        if (filters->get_element_type() == ngraph::element::u8 && groups == 1)
-                            make_ng_quant_conv(data,
+                        {
-                                               filters,
+                            conv_node = ngraph::builder::quantization::QuantizedLinearConvolution(
-                                               strides,
+                                data,
-                                               filter_dilations,
+                                filters,
-                                               padding_below,
+                                strides,
-                                               padding_above,
+                                filter_dilations,
-                                               data_dilations,
+                                padding_below,
-                                               groups,
+                                padding_above,
-                                               OpScale{data_scale, filters_scale, output_scale});
+                                data_dilations,
+                                data_scale,
+                                inputs.at(2),
+                                filters_scale,
+                                inputs.at(5),
+                                output_scale,
+                                inputs.at(7));
+                        }
+                        else
+                        {
+                            conv_node = make_ng_quant_conv(
+                                data,
+                                filters,
+                                strides,
+                                filter_dilations,
+                                padding_below,
+                                padding_above,
+                                data_dilations,
+                                groups,
+                                OpScale{data_scale, filters_scale, output_scale});
+                        }
                    }
                    return {conv_node};

--- a/src/ngraph/runtime/gpu/unit_test.manifest
+++ b/src/ngraph/runtime/gpu/unit_test.manifest
@@ -126,6 +126,8 @@ model_quantize_linear_zero_point
 quantize_linear_axis_zero
 model_quantize_linear_axis_negative
 model_quant_conv_linear
+model_quant_conv_linear_2d
+model_quant_conv_linear_3d
 # This should be implemented
 create_tensor_2_input

--- a/test/models/onnx/qlinear_conv_2d.prototxt
+++ b/test/models/onnx/qlinear_conv_2d.prototxt
+ir_version: 3
+producer_name: "ngraph ONNXImporter"
+graph {
+  node {
+    input: "x"
+    input: "x_scale"
+    input: "x_zero_point"
+    input: "w"
+    input: "w_scale"
+    input: "w_zero_point"
+    input: "y_scale"
+    input: "y_zero_point"
+    output: "y"
+    name: "node1"
+    op_type: "QLinearConv"
+    attribute {
+      name: "group"
+      i: 1
+      type: INT
+    }
+    attribute {
+      name: "auto_pad"
+      s: "NOTSET"
+      type: STRING
+    }
+  }
+  name: "test"
+  input {
+    name: "x"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 7
+          }
+          dim {
+            dim_value: 7
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "x_scale"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "x_zero_point"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "w"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "w_scale"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "w_zero_point"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "y_scale"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "y_zero_point"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 7
+          }
+          dim {
+            dim_value: 7
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 10
+}
--- a/test/models/onnx/qlinear_conv_3d.prototxt
+++ b/test/models/onnx/qlinear_conv_3d.prototxt
+ir_version: 3
+producer_name: "ngraph ONNXImporter"
+graph {
+  node {
+    input: "x"
+    input: "x_scale"
+    input: "x_zero_point"
+    input: "w"
+    input: "w_scale"
+    input: "w_zero_point"
+    input: "y_scale"
+    input: "y_zero_point"
+    output: "y"
+    name: "node1"
+    op_type: "QLinearConv"
+    attribute {
+      name: "group"
+      i: 1
+      type: INT
+    }
+    attribute {
+      name: "auto_pad"
+      s: "NOTSET"
+      type: STRING
+    }
+    attribute {
+      name: "pads"
+      ints: 2
+      ints: 2
+      ints: 2
+      ints: 2
+      ints: 2
+      ints: 2
+      type: INTS
+    }
+    attribute {
+      name: "strides"
+      ints: 2
+      ints: 2
+      ints: 2
+      type: INTS
+    }
+  }
+  name: "test"
+  input {
+    name: "x"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "x_scale"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "x_zero_point"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "w"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "w_scale"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "w_zero_point"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "y_scale"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "y_zero_point"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 10
+}
--- a/test/models/onnx/qlinearconv2d/w.bin
+++ b/test/models/onnx/qlinearconv2d/w.bin
--- a/test/models/onnx/qlinearconv2d/w_scale.bin
+++ b/test/models/onnx/qlinearconv2d/w_scale.bin
+=|в:
\ No newline at end of file
--- a/test/models/onnx/qlinearconv2d/w_zero_point.bin
+++ b/test/models/onnx/qlinearconv2d/w_zero_point.bin
\ No newline at end of file
--- a/test/models/onnx/qlinearconv2d/x.bin
+++ b/test/models/onnx/qlinearconv2d/x.bin
--- a/test/models/onnx/qlinearconv2d/x_scale.bin
+++ b/test/models/onnx/qlinearconv2d/x_scale.bin
+--broken encoding: IBM424_ltr
\ No newline at end of file
--- a/test/models/onnx/qlinearconv2d/x_zero_point.bin
+++ b/test/models/onnx/qlinearconv2d/x_zero_point.bin
+„
\ No newline at end of file
--- a/test/models/onnx/qlinearconv2d/y.bin
+++ b/test/models/onnx/qlinearconv2d/y.bin
--- a/test/models/onnx/qlinearconv2d/y_scale.bin
+++ b/test/models/onnx/qlinearconv2d/y_scale.bin
+::
\ No newline at end of file
--- a/test/models/onnx/qlinearconv2d/y_zero_point.bin
+++ b/test/models/onnx/qlinearconv2d/y_zero_point.bin
+{
\ No newline at end of file
--- a/test/models/onnx/qlinearconv3d/w.bin
+++ b/test/models/onnx/qlinearconv3d/w.bin
\ No newline at end of file
--- a/test/models/onnx/qlinearconv3d/w_scale.bin
+++ b/test/models/onnx/qlinearconv3d/w_scale.bin
+:
\ No newline at end of file
--- a/test/models/onnx/qlinearconv3d/w_zero_point.bin
+++ b/test/models/onnx/qlinearconv3d/w_zero_point.bin
--- a/test/models/onnx/qlinearconv3d/x.bin
+++ b/test/models/onnx/qlinearconv3d/x.bin
--- a/test/models/onnx/qlinearconv3d/x_scale.bin
+++ b/test/models/onnx/qlinearconv3d/x_scale.bin
+/;
\ No newline at end of file
--- a/test/models/onnx/qlinearconv3d/x_zero_point.bin
+++ b/test/models/onnx/qlinearconv3d/x_zero_point.bin
\ No newline at end of file
--- a/test/models/onnx/qlinearconv3d/y.bin
+++ b/test/models/onnx/qlinearconv3d/y.bin
--- a/test/models/onnx/qlinearconv3d/y_scale.bin
+++ b/test/models/onnx/qlinearconv3d/y_scale.bin
+2:
\ No newline at end of file
--- a/test/models/onnx/qlinearconv3d/y_zero_point.bin
+++ b/test/models/onnx/qlinearconv3d/y_zero_point.bin
+€
\ No newline at end of file
--- a/test/onnx_import.in.cpp
+++ b/test/onnx_import.in.cpp
@@ -2684,3 +2684,147 @@ NGRAPH_TEST(onnx_${BACKEND_NAME}, model_quant_conv_linear)
        execute<std::uint8_t, std::int8_t>(function, inputs, "${BACKEND_NAME}")};
    EXPECT_TRUE(test::all_close(expected_output.front(), outputs.front()));
 }
+NGRAPH_TEST(onnx_${BACKEND_NAME}, model_quant_conv_linear_2d)
+{
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinear_conv_2d.prototxt"));
+    auto x =
+        read_binary_file<uint8_t>(file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv2d/x.bin"));
+    auto x_scale = read_binary_file<float>(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv2d/x_scale.bin"));
+    auto x_zero_point = read_binary_file<uint8_t>(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv2d/x_zero_point.bin"));
+    auto w =
+        read_binary_file<uint8_t>(file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv2d/w.bin"));
+    auto w_scale = read_binary_file<float>(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv2d/w_scale.bin"));
+    auto w_zero_point = read_binary_file<uint8_t>(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv2d/w_zero_point.bin"));
+    auto y_scale = read_binary_file<float>(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv2d/y_scale.bin"));
+    auto y_zero_point = read_binary_file<uint8_t>(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv2d/y_zero_point.bin"));
+    auto backend = ngraph::runtime::Backend::create("${BACKEND_NAME}");
+    auto params = function->get_parameters();
+    std::vector<std::shared_ptr<ngraph::runtime::Tensor>> input_tensors;
+    input_tensors.push_back(
+        backend->create_tensor(params.at(0)->get_element_type(), params.at(0)->get_shape()));
+    input_tensors.push_back(
+        backend->create_tensor(params.at(1)->get_element_type(), params.at(1)->get_shape()));
+    input_tensors.push_back(
+        backend->create_tensor(params.at(2)->get_element_type(), params.at(2)->get_shape()));
+    input_tensors.push_back(
+        backend->create_tensor(params.at(3)->get_element_type(), params.at(3)->get_shape()));
+    input_tensors.push_back(
+        backend->create_tensor(params.at(4)->get_element_type(), params.at(4)->get_shape()));
+    input_tensors.push_back(
+        backend->create_tensor(params.at(5)->get_element_type(), params.at(5)->get_shape()));
+    input_tensors.push_back(
+        backend->create_tensor(params.at(6)->get_element_type(), params.at(6)->get_shape()));
+    input_tensors.push_back(
+        backend->create_tensor(params.at(7)->get_element_type(), params.at(7)->get_shape()));
+    copy_data(input_tensors[0], x);
+    copy_data(input_tensors[1], x_scale);
+    copy_data(input_tensors[2], x_zero_point);
+    copy_data(input_tensors[3], w);
+    copy_data(input_tensors[4], w_scale);
+    copy_data(input_tensors[5], w_zero_point);
+    copy_data(input_tensors[6], y_scale);
+    copy_data(input_tensors[7], y_zero_point);
+    auto results = function->get_results();
+    std::vector<std::shared_ptr<ngraph::runtime::Tensor>> result_tensors;
+    result_tensors.push_back(
+        backend->create_tensor(results.at(0)->get_element_type(), results.at(0)->get_shape()));
+    auto handle = backend->compile(function);
+    handle->call_with_validate(result_tensors, input_tensors);
+    std::vector<std::vector<uint8_t>> outputs;
+    outputs.push_back(read_vector<uint8_t>(result_tensors[0]));
+    std::vector<std::vector<uint8_t>> expected_output;
+    expected_output.push_back(read_binary_file<uint8_t>(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv2d/y.bin")));
+    EXPECT_EQ(expected_output.front(), outputs.front());
+}
+NGRAPH_TEST(onnx_${BACKEND_NAME}, model_quant_conv_linear_3d)
+{
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinear_conv_3d.prototxt"));
+    auto x =
+        read_binary_file<uint8_t>(file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv3d/x.bin"));
+    auto x_scale = read_binary_file<float>(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv3d/x_scale.bin"));
+    auto x_zero_point = read_binary_file<uint8_t>(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv3d/x_zero_point.bin"));
+    auto w =
+        read_binary_file<uint8_t>(file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv3d/w.bin"));
+    auto w_scale = read_binary_file<float>(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv3d/w_scale.bin"));
+    auto w_zero_point = read_binary_file<uint8_t>(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv3d/w_zero_point.bin"));
+    auto y_scale = read_binary_file<float>(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv3d/y_scale.bin"));
+    auto y_zero_point = read_binary_file<uint8_t>(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv3d/y_zero_point.bin"));
+    auto backend = ngraph::runtime::Backend::create("${BACKEND_NAME}");
+    auto params = function->get_parameters();
+    std::vector<std::shared_ptr<ngraph::runtime::Tensor>> input_tensors;
+    input_tensors.push_back(
+        backend->create_tensor(params.at(0)->get_element_type(), params.at(0)->get_shape()));
+    input_tensors.push_back(
+        backend->create_tensor(params.at(1)->get_element_type(), params.at(1)->get_shape()));
+    input_tensors.push_back(
+        backend->create_tensor(params.at(2)->get_element_type(), params.at(2)->get_shape()));
+    input_tensors.push_back(
+        backend->create_tensor(params.at(3)->get_element_type(), params.at(3)->get_shape()));
+    input_tensors.push_back(
+        backend->create_tensor(params.at(4)->get_element_type(), params.at(4)->get_shape()));
+    input_tensors.push_back(
+        backend->create_tensor(params.at(5)->get_element_type(), params.at(5)->get_shape()));
+    input_tensors.push_back(
+        backend->create_tensor(params.at(6)->get_element_type(), params.at(6)->get_shape()));
+    input_tensors.push_back(
+        backend->create_tensor(params.at(7)->get_element_type(), params.at(7)->get_shape()));
+    copy_data(input_tensors[0], x);
+    copy_data(input_tensors[1], x_scale);
+    copy_data(input_tensors[2], x_zero_point);
+    copy_data(input_tensors[3], w);
+    copy_data(input_tensors[4], w_scale);
+    copy_data(input_tensors[5], w_zero_point);
+    copy_data(input_tensors[6], y_scale);
+    copy_data(input_tensors[7], y_zero_point);
+    auto results = function->get_results();
+    std::vector<std::shared_ptr<ngraph::runtime::Tensor>> result_tensors;
+    result_tensors.push_back(
+        backend->create_tensor(results.at(0)->get_element_type(), results.at(0)->get_shape()));
+    auto handle = backend->compile(function);
+    handle->call_with_validate(result_tensors, input_tensors);
+    std::vector<std::vector<uint8_t>> outputs;
+    outputs.push_back(read_vector<uint8_t>(result_tensors[0]));
+    std::vector<std::vector<uint8_t>> expected_output;
+    expected_output.push_back(read_binary_file<uint8_t>(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinearconv3d/y.bin")));
+    EXPECT_EQ(expected_output.front(), outputs.front());
+}
\ No newline at end of file