[ONNX] Quantization operators (#2495)

* Lin quant ONNX operators * Add EOL at EOF * Update licence year * Cleanup namespaces * Add missing headers * Check if group is correct * Remove Check in dequantize * Remove TODO * Add converting zero point to data element type * Remove unused scale variable * Removing zero point * Remove make_ng_conv_bias helper function * Add test for Quantize Linear * Remove dead code * Add test for Dequantize Linear * Add Quant Conv test * Convert models to prototxt * Remove artifact test * Skip test on INTERPRETER * Style check * Remove stupid skipping * Enable test skipping in onnx_backend tests * Skip GPU tests on quantized operators * Review fix

[ONNX] Quantization operators (#2495)
* Lin quant ONNX operators * Add EOL at EOF * Update licence year * Cleanup namespaces * Add missing headers * Check if group is correct * Remove Check in dequantize * Remove TODO * Add converting zero point to data element type * Remove unused scale variable * Removing zero point * Remove make_ng_conv_bias helper function * Add test for Quantize Linear * Remove dead code * Add test for Dequantize Linear * Add Quant Conv test * Convert models to prototxt * Remove artifact test * Skip test on INTERPRETER * Style check * Remove stupid skipping * Enable test skipping in onnx_backend tests * Skip GPU tests on quantized operators * Review fix
ee1a5ca8 · tsocha · Scott Cyphers · 6ca8ba97 · ee1a5ca8 · ee1a5ca8
Commit ee1a5ca8 authored Mar 13, 2019 by tsocha Committed by Scott Cyphers Mar 13, 2019
14 changed files
--- a/src/ngraph/frontend/onnx_import/CMakeLists.txt
+++ b/src/ngraph/frontend/onnx_import/CMakeLists.txt
@@ -74,6 +74,8 @@ add_library(onnx_import STATIC
        op/conv_transpose.hpp
        op/depth_to_space.cpp
        op/depth_to_space.hpp
+        op/dequantize_linear.cpp
+        op/dequantize_linear.hpp
        op/div.hpp
        op/dropout.hpp
        op/elu.cpp
@@ -123,6 +125,10 @@ add_library(onnx_import STATIC
        op/pow.hpp
        op/prelu.cpp
        op/prelu.hpp
+        op/quant_conv.cpp
+        op/quant_conv.hpp
+        op/quantize_linear.cpp
+        op/quantize_linear.hpp
        op/reciprocal.cpp
        op/reciprocal.hpp
        op/reduce.cpp

--- a/src/ngraph/frontend/onnx_import/op/dequantize_linear.cpp
+++ b/src/ngraph/frontend/onnx_import/op/dequantize_linear.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+
+#include "exceptions.hpp"
+#include "ngraph/axis_set.hpp"
+#include "ngraph/op/constant.hpp"
+#include "ngraph/op/convert.hpp"
+#include "ngraph/op/dequantize.hpp"
+#include "ngraph/shape.hpp"
+#include "quantize_linear.hpp"
+#include "utils/common.hpp"
+
+namespace ngraph
+{
+    namespace onnx_import
+    {
+        namespace op
+        {
+            namespace set_1
+            {
+                NodeVector dequantize_linear(const Node& node)
+                {
+                    NodeVector inputs{node.get_ng_inputs()};
+                    std::shared_ptr<ngraph::Node> x = inputs.at(0);
+                    std::shared_ptr<ngraph::Node> x_scale = inputs.at(1);
+                    std::shared_ptr<ngraph::Node> zero_point;
+                    if (inputs.size() == 3 && !inputs.at(2)->is_null())
+                    {
+                        zero_point = inputs.at(2);
+                    }
+                    else
+                    {
+                        zero_point = common::make_constant_node(
+                            x->get_element_type(), Shape{}, std::vector<std::uint8_t>{0});
+                    }
+
+                    Shape y_scale_shape = x_scale->get_shape();
+                    Shape y_zero_point_shape = zero_point->get_shape();
+
+                    ASSERT_VALID_ARGUMENT(node, y_scale_shape.size() == 0)
+                        << "x_scale must be a scalar.";
+                    ASSERT_VALID_ARGUMENT(node, y_zero_point_shape.size() == 0)
+                        << "zero_point must be a scalar.";
+
+                    if (x->get_element_type() != zero_point->get_element_type())
+                    {
+                        zero_point = std::make_shared<ngraph::op::Convert>(zero_point,
+                                                                           x->get_element_type());
+                    }
+
+                    return {std::make_shared<ngraph::op::Dequantize>(
+                        x, x_scale, zero_point, x_scale->get_element_type(), AxisSet{})};
+                }
+
+            } // namespace set_1
+
+        } //namespace op
+
+    } // namespace onnx_import
+
+} // namespace ngraph
--- a/src/ngraph/frontend/onnx_import/op/dequantize_linear.hpp
+++ b/src/ngraph/frontend/onnx_import/op/dequantize_linear.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "core/node.hpp"
+#include "ngraph/node_vector.hpp"
+
+namespace ngraph
+{
+    namespace onnx_import
+    {
+        namespace op
+        {
+            namespace set_1
+            {
+                NodeVector dequantize_linear(const Node& node);
+
+            } // namespace set_1
+
+        } //namespace op
+
+    } // namespace onnx_import
+
+} // namespace ngraph
--- a/src/ngraph/frontend/onnx_import/op/quant_conv.cpp
+++ b/src/ngraph/frontend/onnx_import/op/quant_conv.cpp
--- a/src/ngraph/frontend/onnx_import/op/quant_conv.hpp
+++ b/src/ngraph/frontend/onnx_import/op/quant_conv.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "core/node.hpp"
+#include "ngraph/node_vector.hpp"
+
+namespace ngraph
+{
+    namespace onnx_import
+    {
+        namespace op
+        {
+            namespace set_1
+            {
+                /// \brief Performs ONNX Quant Conv operation.
+                ///
+                /// \param node   The ONNX node object representing this operation.
+                ///
+                /// \return The vector containing Ngraph nodes producing output of ONNX quantizied convolution
+                ///         operation.
+                NodeVector quant_conv(const Node& node);
+
+            } // namespace set_1
+
+        } //namespace op
+
+    } // namespace onnx_import
+
+} // namespace ngraph
--- a/src/ngraph/frontend/onnx_import/op/quantize_linear.cpp
+++ b/src/ngraph/frontend/onnx_import/op/quantize_linear.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+
+#include "exceptions.hpp"
+#include "ngraph/axis_set.hpp"
+#include "ngraph/op/quantize.hpp"
+#include "ngraph/shape.hpp"
+#include "quantize_linear.hpp"
+
+namespace ngraph
+{
+    namespace onnx_import
+    {
+        namespace op
+        {
+            namespace set_1
+            {
+                NodeVector quantize_linear(const Node& node)
+                {
+                    NodeVector inputs{node.get_ng_inputs()};
+                    std::shared_ptr<ngraph::Node> x = inputs.at(0);
+                    std::shared_ptr<ngraph::Node> y_scale = inputs.at(1);
+                    std::shared_ptr<ngraph::Node> y_zero_point = inputs.at(2);
+
+                    Shape y_scale_shape = y_scale->get_shape();
+                    Shape y_zero_point_shape = y_zero_point->get_shape();
+
+                    ASSERT_VALID_ARGUMENT(node, y_scale_shape.size() == 0)
+                        << "y_scale must be a scalar.";
+                    ASSERT_VALID_ARGUMENT(node, y_zero_point_shape.size() == 0)
+                        << "y_zero_point must be a scalar.";
+
+                    return {std::make_shared<ngraph::op::Quantize>(
+                        x,
+                        y_scale,
+                        y_zero_point,
+                        y_zero_point->get_element_type(),
+                        AxisSet{},
+                        ngraph::op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN)};
+                }
+
+            } // namespace set_1
+
+        } //namespace op
+
+    } // namespace onnx_import
+
+} // namespace ngraph
--- a/src/ngraph/frontend/onnx_import/op/quantize_linear.hpp
+++ b/src/ngraph/frontend/onnx_import/op/quantize_linear.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "core/node.hpp"
+#include "ngraph/node_vector.hpp"
+
+namespace ngraph
+{
+    namespace onnx_import
+    {
+        namespace op
+        {
+            namespace set_1
+            {
+                NodeVector quantize_linear(const Node& node);
+
+            } // namespace set_1
+
+        } //namespace op
+
+    } // namespace onnx_import
+
+} // namespace ngraph
--- a/src/ngraph/frontend/onnx_import/ops_bridge.cpp
+++ b/src/ngraph/frontend/onnx_import/ops_bridge.cpp
@@ -45,6 +45,7 @@
 #include "op/cos.hpp"
 #include "op/cosh.hpp"
 #include "op/depth_to_space.hpp"
+#include "op/dequantize_linear.hpp"
 #include "op/div.hpp"
 #include "op/dropout.hpp"
 #include "op/elu.hpp"
@@ -79,6 +80,8 @@
 #include "op/pad.hpp"
 #include "op/pow.hpp"
 #include "op/prelu.hpp"
+#include "op/quant_conv.hpp"
+#include "op/quantize_linear.hpp"
 #include "op/reciprocal.hpp"
 #include "op/reduce.hpp"
 #include "op/relu.hpp"
@@ -240,6 +243,7 @@ namespace ngraph
            REGISTER_OPERATOR("Cos", 1, cos);
            REGISTER_OPERATOR("Cosh", 1, cosh);
            REGISTER_OPERATOR("DepthToSpace", 1, depth_to_space);
+            REGISTER_OPERATOR("DequantizeLinear", 1, dequantize_linear);
            REGISTER_OPERATOR("Div", 1, div);
            REGISTER_OPERATOR("Div", 7, div);
            REGISTER_OPERATOR("Dropout", 1, dropout);
@@ -278,6 +282,8 @@ namespace ngraph
            REGISTER_OPERATOR("Pad", 1, pad);
            REGISTER_OPERATOR("Pow", 1, pow);
            REGISTER_OPERATOR("PRelu", 1, prelu);
+            REGISTER_OPERATOR("QLinearConv", 1, quant_conv);
+            REGISTER_OPERATOR("QuantizeLinear", 1, quantize_linear);
            REGISTER_OPERATOR("Reciprocal", 1, reciprocal);
            REGISTER_OPERATOR("ReduceLogSum", 1, reduce_log_sum);
            REGISTER_OPERATOR("ReduceLogSumExp", 1, reduce_log_sum_exp);

--- a/src/ngraph/runtime/gpu/unit_test.manifest
+++ b/src/ngraph/runtime/gpu/unit_test.manifest
@@ -96,3 +96,8 @@ all_2x2x3_eliminate_dims_0_1_2
 floor_int32
 divide_int32
 one_hot_scalar_oob_in_3
+
+# Quantized operators are not supported on gpu backend
+model_dequantize_linear
+model_quantize_linear
+model_quant_conv_linear
--- a/src/ngraph/runtime/interpreter/unit_test.manifest
+++ b/src/ngraph/runtime/interpreter/unit_test.manifest
-
+# Quantized convolution is not supported on interpreter
+model_quant_conv_linear
--- a/test/models/onnx/dequant_lin.prototxt
+++ b/test/models/onnx/dequant_lin.prototxt
+ir_version: 3
+producer_name: "ngraph ONNXImporter"
+graph {
+  node {
+    input: "X"
+    input: "x_scale"
+    input: "zero_point"
+    output: "Y"
+    name: "DequantizeLinear"
+    op_type: "DequantizeLinear"
+  }
+  name: "test_graph"
+  initializer {
+    data_type: 2
+    name: "zero_point"
+    raw_data: "\000"
+  }
+  initializer {
+    data_type: 1
+    float_data: 4
+    name: "x_scale"
+  }
+  input {
+    name: "X"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "x_scale"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "zero_point"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 4
+}
--- a/test/models/onnx/quant_conv_lin.prototxt
+++ b/test/models/onnx/quant_conv_lin.prototxt
+ir_version: 3
+producer_name: "backend-test"
+graph {
+  node {
+    input: "x"
+    input: "x_scale"
+    input: "x_zero_point"
+    input: "w"
+    input: "w_scale"
+    input: "w_zero_point"
+    input: "y_scale"
+    input: "y_zero_point"
+    output: "y"
+    op_type: "QLinearConv"
+    attribute {
+      name: "pads"
+      ints: 1
+      ints: 1
+      ints: 1
+      ints: 1
+      type: INTS
+    }
+  }
+  name: "test_conv_with_strides_padding"
+  initializer {
+    data_type: 2
+    name: "x_zero_point"
+    raw_data: "\000"
+  }
+  initializer {
+    data_type: 1
+    float_data: 1
+    name: "x_scale"
+  }
+  initializer {
+    data_type: 2
+    name: "w_zero_point"
+    raw_data: "\000"
+  }
+  initializer {
+    data_type: 1
+    float_data: 1
+    name: "w_scale"
+  }
+  initializer {
+    data_type: 2
+    name: "y_zero_point"
+    raw_data: "\000"
+  }
+  initializer {
+    data_type: 1
+    float_data: 16
+    name: "y_scale"
+  }
+  initializer {
+    dims: 1
+    dims: 1
+    dims: 3
+    dims: 3
+    data_type: 3
+    int32_data: 1
+    int32_data: 0
+    int32_data: 2
+    int32_data: 1
+    int32_data: 0
+    int32_data: 2
+    int32_data: 1
+    int32_data: 0
+    int32_data: 2
+    name: "w"
+  }
+  input {
+    name: "x"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 9
+          }
+          dim {
+            dim_value: 9
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "x_scale"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "x_zero_point"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "w"
+    type {
+      tensor_type {
+        elem_type: 3
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "w_scale"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "w_zero_point"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "y_scale"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "y_zero_point"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 9
+          }
+          dim {
+            dim_value: 9
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 8
+}
--- a/test/models/onnx/quant_lin.prototxt
+++ b/test/models/onnx/quant_lin.prototxt
+ir_version: 3
+producer_name: "ngraph ONNXImporter"
+graph {
+  node {
+    input: "X"
+    input: "y_scale"
+    input: "y_zero_point"
+    output: "Y"
+    name: "QuantizeLinear"
+    op_type: "QuantizeLinear"
+  }
+  name: "test_graph"
+  initializer {
+    data_type: 2
+    name: "y_zero_point"
+    raw_data: "\000"
+  }
+  input {
+    name: "X"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "y_scale"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+        }
+      }
+    }
+  }
+  input {
+    name: "y_zero_point"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 4
+}
--- a/test/onnx_import.in.cpp
+++ b/test/onnx_import.in.cpp
@@ -2226,3 +2226,57 @@ NGRAPH_TEST(onnx_${BACKEND_NAME}, import_non_existing_file)
        EXPECT_TRUE(msg.find("i.dont.exist") != std::string::npos);
    }
 }
+
+NGRAPH_TEST(onnx_${BACKEND_NAME}, model_quantize_linear)
+{
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/quant_lin.prototxt"));
+
+    Inputs inputs;
+    inputs.emplace_back(std::vector<float>{32.25f, 48.34f, 50.f, 83.f});
+    inputs.emplace_back(std::vector<float>{0.5f});
+
+    std::vector<std::vector<std::uint8_t>> expected_output{
+        std::vector<std::uint8_t>{64, 97, 100, 166}};
+
+    std::vector<std::vector<std::uint8_t>> outputs{
+        execute<float, std::uint8_t>(function, inputs, "${BACKEND_NAME}")};
+    EXPECT_TRUE(test::all_close(expected_output.front(), outputs.front()));
+}
+
+NGRAPH_TEST(onnx_${BACKEND_NAME}, model_dequantize_linear)
+{
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dequant_lin.prototxt"));
+
+    std::vector<std::vector<std::uint8_t>> inputs;
+    inputs.emplace_back(std::vector<std::uint8_t>{19, 210, 21, 10});
+
+    Outputs expected_output{std::vector<float>{76.f, 840.f, 84.f, 40.f}};
+
+    Outputs outputs{execute<std::uint8_t, float>(function, inputs, "${BACKEND_NAME}")};
+    EXPECT_TRUE(test::all_close_f(expected_output.front(), outputs.front()));
+}
+
+NGRAPH_TEST(onnx_${BACKEND_NAME}, model_quant_conv_linear)
+{
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/quant_conv_lin.prototxt"));
+
+    std::vector<std::vector<std::uint8_t>> inputs;
+    inputs.emplace_back(std::vector<std::uint8_t>{
+        1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+        22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
+        43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+        64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81});
+
+    std::vector<std::vector<std::int8_t>> expected_output{std::vector<std::int8_t>{
+        2,  3,  3,  3,  4,  4,  4,  5,  2,  4,  6,  7,  8,  8,  9,  9,  10, 3,  8,  11, 12,
+        13, 13, 14, 14, 15, 5,  11, 16, 17, 18, 18, 19, 19, 20, 7,  14, 22, 22, 23, 23, 24,
+        24, 25, 8,  18, 27, 27, 28, 28, 29, 29, 30, 10, 21, 32, 32, 33, 33, 34, 34, 35, 12,
+        24, 37, 37, 38, 38, 39, 40, 40, 13, 17, 26, 27, 27, 27, 28, 28, 28, 9}};
+
+    std::vector<std::vector<std::int8_t>> outputs{
+        execute<std::uint8_t, std::int8_t>(function, inputs, "${BACKEND_NAME}")};
+    EXPECT_TRUE(test::all_close(expected_output.front(), outputs.front()));
+}