[ONNX] Add ConvInteger op (#3012)

* Unit tests for ConvInteger * Add ONNX ConvInteger op * Add QuantizedConvInteger builder * Add unit tests * Exclude tests on nVidia GPU backend * Fix merge artifact * Add const-correctness and allow RVO

[ONNX] Add ConvInteger op (#3012)
* Unit tests for ConvInteger * Add ONNX ConvInteger op * Add QuantizedConvInteger builder * Add unit tests * Exclude tests on nVidia GPU backend * Fix merge artifact * Add const-correctness and allow RVO
fa300fae · Michał Karzyński · Scott Cyphers · 073aedcd · fa300fae · fa300fae
Commit fa300fae authored Jun 13, 2019 by Michał Karzyński Committed by Scott Cyphers Jun 13, 2019
12 changed files
--- a/src/ngraph/builder/quantization/quantized_linear_convolution.cpp
+++ b/src/ngraph/builder/quantization/quantized_linear_convolution.cpp
@@ -49,7 +49,7 @@ namespace ngraph
                                                        const shared_ptr<Node>& output_scale)
            {
                // TODO: need to establish cross-nGraph view of scale (mult or div)
-                auto requantization_scale = (input_scale * filter_scale) / output_scale;
+                const auto requantization_scale = (input_scale * filter_scale) / output_scale;

                return make_shared<op::QuantizedConvolution>(input,
                                                             filter,
@@ -79,32 +79,32 @@ namespace ngraph
                                                        const shared_ptr<Node>& output_scale,
                                                        const shared_ptr<Node>& output_zero_point)
            {
-                AxisSet axes;
+                const AxisSet axes;

-                auto dq_input = make_shared<op::Dequantize>(
+                const auto dq_input = make_shared<op::Dequantize>(
                    input, input_scale, input_zero_point, input_scale->get_element_type(), axes);

-                auto dq_filter = make_shared<op::Dequantize>(filter,
-                                                             filter_scale,
-                                                             filter_zero_point,
-                                                             filter_scale->get_element_type(),
-                                                             axes);
-
-                auto convolution = make_shared<op::Convolution>(dq_input,
-                                                                dq_filter,
-                                                                window_movement_strides,
-                                                                window_dilation_strides,
-                                                                padding_below,
-                                                                padding_above,
-                                                                data_dilation_strides);
-                auto q_convolution =
-                    make_shared<op::Quantize>(convolution,
-                                              output_scale,
-                                              output_zero_point,
-                                              output_zero_point->get_element_type(),
-                                              axes,
-                                              op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN);
-                return move(q_convolution);
+                const auto dq_filter = make_shared<op::Dequantize>(filter,
+                                                                   filter_scale,
+                                                                   filter_zero_point,
+                                                                   filter_scale->get_element_type(),
+                                                                   axes);
+
+                const auto convolution = make_shared<op::Convolution>(dq_input,
+                                                                      dq_filter,
+                                                                      window_movement_strides,
+                                                                      window_dilation_strides,
+                                                                      padding_below,
+                                                                      padding_above,
+                                                                      data_dilation_strides);
+                // Return quantized convolution
+                return make_shared<op::Quantize>(
+                    convolution,
+                    output_scale,
+                    output_zero_point,
+                    output_zero_point->get_element_type(),
+                    axes,
+                    op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN);
            }

            shared_ptr<Node> QuantizedLinearConvolutionBias(const shared_ptr<Node>& input,
@@ -125,9 +125,9 @@ namespace ngraph
                auto mybias = bias;
                if (bias->get_element_type() != element::i32)
                {
-                    auto zero = make_constant(element::i32, input_scale->get_shape(), 0);
-                    AxisSet quantization_axes;
-                    auto bias_scale = input_scale * filter_scale;
+                    const auto zero = make_constant(element::i32, input_scale->get_shape(), 0);
+                    const AxisSet quantization_axes;
+                    const auto bias_scale = input_scale * filter_scale;
                    op::Quantize::RoundMode round_mode =
                        op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;

@@ -166,6 +166,69 @@ namespace ngraph
                                                             output_scale,
                                                             false);
            }
+
+            shared_ptr<Node> QuantizedConvInteger(const shared_ptr<Node>& input,
+                                                  const shared_ptr<Node>& filter,
+                                                  const Strides& window_movement_strides,
+                                                  const Strides& window_dilation_strides,
+                                                  const CoordinateDiff& padding_below,
+                                                  const CoordinateDiff& padding_above,
+                                                  const Strides& data_dilation_strides,
+                                                  const std::shared_ptr<Node>& input_zero_point,
+                                                  const std::shared_ptr<Node>& filter_zero_point)
+            {
+                // Check if zero points are constant and zero
+                if (ngraph::is_zero(input_zero_point) && ngraph::is_zero(filter_zero_point))
+                {
+                    return QuantizedConvInteger(input,
+                                                filter,
+                                                window_movement_strides,
+                                                window_dilation_strides,
+                                                padding_below,
+                                                padding_above,
+                                                data_dilation_strides);
+                }
+                else
+                {
+                    // Fall back to performing operation on dequantized floating-point values
+                    const auto input_scale = make_constant(element::f32, Shape{}, 1);
+                    const auto filter_scale = make_constant(element::f32, Shape{}, 1);
+                    const auto output_scale = make_constant(element::f32, Shape{}, 1);
+                    const auto output_zero_point = make_constant(element::i32, Shape{}, 0);
+                    const AxisSet axes;
+
+                    const auto dq_input =
+                        make_shared<op::Dequantize>(input,
+                                                    input_scale,
+                                                    input_zero_point,
+                                                    input_scale->get_element_type(),
+                                                    axes);
+
+                    const auto dq_filter =
+                        make_shared<op::Dequantize>(filter,
+                                                    filter_scale,
+                                                    filter_zero_point,
+                                                    filter_scale->get_element_type(),
+                                                    axes);
+
+                    const auto output = make_shared<op::Convolution>(dq_input,
+                                                                     dq_filter,
+                                                                     window_movement_strides,
+                                                                     window_dilation_strides,
+                                                                     padding_below,
+                                                                     padding_above,
+                                                                     data_dilation_strides);
+
+                    // Return quantized output
+                    return make_shared<op::Quantize>(
+                        output,
+                        output_scale,
+                        output_zero_point,
+                        output_zero_point->get_element_type(),
+                        axes,
+                        op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN);
+                }
+            }
        }
    }
 }
--- a/src/ngraph/builder/quantization/quantized_linear_convolution.hpp
+++ b/src/ngraph/builder/quantization/quantized_linear_convolution.hpp
@@ -72,6 +72,17 @@ namespace ngraph
                                                       const CoordinateDiff& padding_below,
                                                       const CoordinateDiff& padding_above,
                                                       const Strides& data_dilation_strides);
+
+            std::shared_ptr<Node>
+                QuantizedConvInteger(const std::shared_ptr<Node>& input,
+                                     const std::shared_ptr<Node>& filter,
+                                     const Strides& window_movement_strides,
+                                     const Strides& window_dilation_strides,
+                                     const CoordinateDiff& padding_below,
+                                     const CoordinateDiff& padding_above,
+                                     const Strides& data_dilation_strides,
+                                     const std::shared_ptr<Node>& input_zero_point,
+                                     const std::shared_ptr<Node>& filter_zero_point);
        }
    }
 }
--- a/src/ngraph/frontend/onnx_import/CMakeLists.txt
+++ b/src/ngraph/frontend/onnx_import/CMakeLists.txt
@@ -70,6 +70,8 @@ add_library(onnx_import STATIC
        op/constant.hpp
        op/conv.cpp
        op/conv.hpp
+        op/conv_integer.cpp
+        op/conv_integer.hpp
        op/conv_transpose.cpp
        op/conv_transpose.hpp
        op/depth_to_space.cpp

--- a/src/ngraph/frontend/onnx_import/op/conv_integer.cpp
+++ b/src/ngraph/frontend/onnx_import/op/conv_integer.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "op/conv_integer.hpp"
+#include "ngraph/builder/make_constant.hpp"
+#include "ngraph/builder/quantization/quantized_linear_convolution.hpp"
+#include "ngraph/frontend/onnx_import/exceptions.hpp"
+#include "ngraph/frontend/onnx_import/utils/convpool.hpp"
+
+using namespace ngraph::builder;
+
+namespace ngraph
+{
+    namespace onnx_import
+    {
+        namespace op
+        {
+            namespace set_1
+            {
+                NodeVector conv_integer(const Node& node)
+                {
+                    const NodeVector& inputs = node.get_ng_inputs();
+                    auto num_inputs = inputs.size();
+                    auto input = inputs.at(0);
+                    auto filters = inputs.at(1);
+
+                    int64_t groups{node.get_attribute_value<int64_t>("group", 1)};
+                    ASSERT_VALID_ARGUMENT(node, (groups == 1))
+                        << "Only value of 1 for 'group' supported for ConvInteger. Given: "
+                        << groups;
+
+                    auto window_movement_strides = convpool::get_strides(node);
+                    auto window_dilation_strides = convpool::get_dilations(node);
+                    auto paddings = convpool::get_pads(node);
+                    const auto& padding_below = paddings.first;
+                    const auto& padding_above = paddings.second;
+                    const Strides default_data_dilation_strides(input->get_shape().size() - 2, 1);
+
+                    if (num_inputs == 2)
+                    {
+                        return {quantization::QuantizedConvInteger(input,
+                                                                   filters,
+                                                                   window_movement_strides,
+                                                                   window_dilation_strides,
+                                                                   padding_below,
+                                                                   padding_above,
+                                                                   default_data_dilation_strides)};
+                    }
+
+                    auto input_zero_point = inputs.at(2);
+                    auto filters_zero_point =
+                        make_constant(filters->get_element_type(), Shape{}, 0);
+                    if (num_inputs == 4)
+                    {
+                        filters_zero_point = inputs.at(3);
+                    }
+
+                    return {quantization::QuantizedConvInteger(input,
+                                                               filters,
+                                                               window_movement_strides,
+                                                               window_dilation_strides,
+                                                               padding_below,
+                                                               padding_above,
+                                                               default_data_dilation_strides,
+                                                               input_zero_point,
+                                                               filters_zero_point)};
+                }
+            } // namespace set_1
+
+        } //namespace op
+
+    } // namespace onnx_import
+
+} // namespace ngraph
--- a/src/ngraph/frontend/onnx_import/op/conv_integer.hpp
+++ b/src/ngraph/frontend/onnx_import/op/conv_integer.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "core/node.hpp"
+#include "ngraph/node.hpp"
+
+namespace ngraph
+{
+    namespace onnx_import
+    {
+        namespace op
+        {
+            namespace set_1
+            {
+                /// \brief Performs ONNX ConvInteger operation.
+                ///
+                /// \param node   The ONNX node object representing this operation.
+                ///
+                /// \return The vector containing Ngraph nodes producing output of quantized ONNX convolution
+                ///         operation.
+                NodeVector conv_integer(const Node& node);
+
+            } // namespace set_1
+
+        } //namespace op
+
+    } // namespace onnx_import
+
+} // namespace ngraph
--- a/src/ngraph/frontend/onnx_import/ops_bridge.cpp
+++ b/src/ngraph/frontend/onnx_import/ops_bridge.cpp
@@ -41,6 +41,7 @@
 #include "op/concat.hpp"
 #include "op/constant.hpp"
 #include "op/conv.hpp"
+#include "op/conv_integer.hpp"
 #include "op/conv_transpose.hpp"
 #include "op/cos.hpp"
 #include "op/cosh.hpp"
@@ -244,6 +245,7 @@ namespace ngraph
            REGISTER_OPERATOR("Concat", 1, concat);
            REGISTER_OPERATOR("Constant", 1, constant);
            REGISTER_OPERATOR("Conv", 1, conv);
+            REGISTER_OPERATOR("ConvInteger", 1, conv_integer);
            REGISTER_OPERATOR("ConvTranspose", 1, conv_transpose);
            REGISTER_OPERATOR("Cos", 1, cos);
            REGISTER_OPERATOR("Cosh", 1, cosh);

--- a/src/ngraph/runtime/gpu/unit_test.manifest
+++ b/src/ngraph/runtime/gpu/unit_test.manifest
@@ -133,6 +133,10 @@ model_quant_conv_linear_2d
 model_quant_conv_linear_3d
 model_qlinear_matmul
 model_qlinear_matmul_3d
+model_conv_integer_zero_point_zero
+model_conv_integer_no_zero_point
+model_conv_integer
+model_conv_integer_pads
 model_matmul_integer
 model_matmul_integer_zero_point_zero
 model_matmul_integer_no_zero_point

--- a/src/ngraph/runtime/interpreter/unit_test.manifest
+++ b/src/ngraph/runtime/interpreter/unit_test.manifest
@@ -2,8 +2,10 @@
 model_quant_conv_linear
 model_qlinear_matmul
 model_qlinear_matmul_3d
+model_conv_integer_no_zero_point
 model_matmul_integer_no_zero_point
 model_matmul_integer_4d_no_zero_point
+
 fake_quantize
 fake_quantize_with_clip
 fake_quantize_with_clip_across_channels
--- a/test/models/onnx/conv_integer.prototxt
+++ b/test/models/onnx/conv_integer.prototxt
+ir_version: 5
+producer_name: "nGraph ONNX Importer"
+graph {
+  node {
+    input: "x"
+    input: "w"
+    input: "x_zero_point"
+    output: "y"
+    name: "node1"
+    op_type: "ConvInteger"
+    attribute {
+      name: "group"
+      i: 1
+      type: INT
+    }
+    attribute {
+      name: "auto_pad"
+      s: "NOTSET"
+      type: STRING
+    }
+    doc_string: "ConvInteger"
+    domain: ""
+  }
+  name: "test"
+  input {
+    name: "x"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "w"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "x_zero_point"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 6
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 10
+}
--- a/test/models/onnx/conv_integer_no_zero_point.prototxt
+++ b/test/models/onnx/conv_integer_no_zero_point.prototxt
+ir_version: 5
+producer_name: "nGraph ONNX Importer"
+graph {
+  node {
+    input: "x"
+    input: "w"
+    output: "y"
+    name: "node1"
+    op_type: "ConvInteger"
+    attribute {
+      name: "group"
+      i: 1
+      type: INT
+    }
+    attribute {
+      name: "auto_pad"
+      s: "NOTSET"
+      type: STRING
+    }
+    doc_string: "ConvInteger"
+    domain: ""
+  }
+  name: "test"
+  input {
+    name: "x"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "w"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 6
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 10
+}
--- a/test/models/onnx/conv_integer_pads.prototxt
+++ b/test/models/onnx/conv_integer_pads.prototxt
+ir_version: 5
+producer_name: "nGraph ONNX Importer"
+graph {
+  node {
+    input: "x"
+    input: "w"
+    input: "x_zero_point"
+    output: "y"
+    name: "node1"
+    op_type: "ConvInteger"
+    attribute {
+      name: "group"
+      i: 1
+      type: INT
+    }
+    attribute {
+      name: "pads"
+      ints: 1
+      ints: 1
+      ints: 1
+      ints: 1
+      type: INTS
+    }
+    attribute {
+      name: "auto_pad"
+      s: "NOTSET"
+      type: STRING
+    }
+    doc_string: "ConvInteger"
+    domain: ""
+  }
+  name: "test"
+  input {
+    name: "x"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "w"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "x_zero_point"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 6
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 10
+}
--- a/test/onnx/onnx_import_quant.in.cpp
+++ b/test/onnx/onnx_import_quant.in.cpp
@@ -339,6 +339,63 @@ NGRAPH_TEST(onnx_${BACKEND_NAME}, model_qlinear_matmul_3d)
    test_case.run();
 }

+NGRAPH_TEST(onnx_${BACKEND_NAME}, model_conv_integer)
+{
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_integer.prototxt"));
+    auto test_case = ngraph::test::NgraphTestCase(function, "${BACKEND_NAME}");
+
+    test_case.add_input(std::vector<uint8_t>{2, 3, 4, 5, 6, 7, 8, 9, 10}); // x
+    test_case.add_input(std::vector<uint8_t>{1, 1, 1, 1});                 // w
+    test_case.add_input(std::vector<uint8_t>{1});                          // x_zero_point
+
+    test_case.add_expected_output({1, 1, 2, 2}, std::vector<uint8_t>{12, 16, 24, 28}); // y
+    test_case.run();
+}
+
+NGRAPH_TEST(onnx_${BACKEND_NAME}, model_conv_integer_zero_point_zero)
+{
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_integer.prototxt"));
+    auto test_case = ngraph::test::NgraphTestCase(function, "${BACKEND_NAME}");
+
+    test_case.add_input(std::vector<uint8_t>{1, 2, 3, 4, 5, 6, 7, 8, 9}); // x
+    test_case.add_input(std::vector<uint8_t>{1, 1, 1, 1});                // w
+    test_case.add_input(std::vector<uint8_t>{0});                         // x_zero_point
+
+    test_case.add_expected_output({1, 1, 2, 2}, std::vector<uint8_t>{12, 16, 24, 28}); // y
+    test_case.run();
+}
+
+NGRAPH_TEST(onnx_${BACKEND_NAME}, model_conv_integer_no_zero_point)
+{
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_integer_no_zero_point.prototxt"));
+    auto test_case = ngraph::test::NgraphTestCase(function, "${BACKEND_NAME}");
+
+    test_case.add_input(std::vector<uint8_t>{1, 2, 3, 4, 5, 6, 7, 8, 9}); // x
+    test_case.add_input(std::vector<uint8_t>{1, 1, 1, 1});                // w
+
+    test_case.add_expected_output({1, 1, 2, 2}, std::vector<uint8_t>{12, 16, 24, 28}); // y
+    test_case.run();
+}
+
+NGRAPH_TEST(onnx_${BACKEND_NAME}, model_conv_integer_pads)
+{
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_integer_pads.prototxt"));
+    auto test_case = ngraph::test::NgraphTestCase(function, "${BACKEND_NAME}");
+
+    test_case.add_input(std::vector<uint8_t>{2, 3, 4, 5, 6, 7, 8, 9, 10}); // x
+    test_case.add_input(std::vector<uint8_t>{1, 1, 1, 1});                 // w
+    test_case.add_input(std::vector<uint8_t>{1});                          // x_zero_point
+
+    test_case.add_expected_output(
+        {1, 1, 4, 4},
+        std::vector<uint8_t>{1, 3, 5, 3, 5, 12, 16, 9, 11, 24, 28, 15, 7, 15, 17, 9}); // y
+    test_case.run();
+}
+
 NGRAPH_TEST(onnx_${BACKEND_NAME}, model_matmul_integer)
 {
    auto function = onnx_import::import_onnx_model(