Add support for QConvInteger (#2679)

90edb4f6 · Nishant Patel · Scott Cyphers · 14b9bab2 · 90edb4f6 · 90edb4f6
Commit 90edb4f6 authored Apr 02, 2019 by Nishant Patel Committed by Scott Cyphers Apr 02, 2019
5 changed files
--- a/src/ngraph/builder/quantization/quantized_linear_convolution.cpp
+++ b/src/ngraph/builder/quantization/quantized_linear_convolution.cpp
@@ -94,6 +94,26 @@ namespace ngraph
                                                                 requantization_scale,
                                                                 false);
            }
+
+            shared_ptr<Node> QuantizedConvInteger(shared_ptr<Node> input,
+                                                  shared_ptr<Node> filter,
+                                                  const Strides& window_movement_strides,
+                                                  const Strides& window_dilation_strides,
+                                                  const CoordinateDiff& padding_below,
+                                                  const CoordinateDiff& padding_above,
+                                                  const Strides& data_dilation_strides)
+            {
+                auto output_scale = make_constant(element::f32, Shape{}, 1);
+                return make_shared<op::QuantizedConvolution>(input,
+                                                             filter,
+                                                             window_movement_strides,
+                                                             window_dilation_strides,
+                                                             padding_below,
+                                                             padding_above,
+                                                             data_dilation_strides,
+                                                             output_scale,
+                                                             false);
+            }
        }
    }
 }
--- a/src/ngraph/builder/quantization/quantized_linear_convolution.hpp
+++ b/src/ngraph/builder/quantization/quantized_linear_convolution.hpp
@@ -48,6 +48,14 @@ namespace ngraph
                                               std::shared_ptr<Node> input_scale,
                                               std::shared_ptr<Node> filter_scale,
                                               std::shared_ptr<Node> output_scale);
+
+            std::shared_ptr<Node> QuantizedConvInteger(std::shared_ptr<Node> input,
+                                                       std::shared_ptr<Node> filter,
+                                                       const Strides& window_movement_strides,
+                                                       const Strides& window_dilation_strides,
+                                                       const CoordinateDiff& padding_below,
+                                                       const CoordinateDiff& padding_above,
+                                                       const Strides& data_dilation_strides);
        }
    }
 }
--- a/src/ngraph/op/experimental/quantized_conv.cpp
+++ b/src/ngraph/op/experimental/quantized_conv.cpp
@@ -30,13 +30,15 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc
                                               const CoordinateDiff& padding_below,
                                               const CoordinateDiff& padding_above,
                                               const Strides& data_dilation_strides,
-                                               const std::shared_ptr<Node> scale)
+                                               const std::shared_ptr<Node> scale,
+                                               const bool requantize)
    : Op("QuantizedConvolution", check_single_output_args({data_batch, filters, scale}))
    , m_window_movement_strides(window_movement_strides)
    , m_window_dilation_strides(window_dilation_strides)
    , m_padding_below(padding_below)
    , m_padding_above(padding_above)
    , m_data_dilation_strides(data_dilation_strides)
+    , m_requantize(requantize)
 {
    constructor_validate_and_infer_types();

@@ -45,8 +47,10 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc
    auto& data_batch_shape = data_batch->get_shape();
    auto& filters_shape = filters->get_shape();

+    auto output_et = requantize ? element::i8 : element::i32;
+
    set_output_type(0,
-                    element::i8,
+                    output_et,
                    util::infer_convolution_output_shape(this,
                                                         data_batch_shape,
                                                         filters_shape,
@@ -76,5 +80,6 @@ shared_ptr<Node> op::QuantizedConvolution::copy_with_new_args(const NodeVector&
                                                     get_padding_below(),
                                                     get_padding_above(),
                                                     get_data_dilation_strides(),
-                                                     new_args.at(2)));
+                                                     new_args.at(2),
+                                                     m_requantize));
 }
--- a/src/ngraph/op/experimental/quantized_conv.hpp
+++ b/src/ngraph/op/experimental/quantized_conv.hpp
@@ -33,7 +33,8 @@ namespace ngraph
                                 const CoordinateDiff& padding_below,
                                 const CoordinateDiff& padding_above,
                                 const Strides& data_dilation_strides,
-                                 const std::shared_ptr<Node> scale);
+                                 const std::shared_ptr<Node> scale,
+                                 const bool requantize = true);
            const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
            const Strides& get_window_dilation_strides() const { return m_window_dilation_strides; }
            const CoordinateDiff& get_padding_below() const { return m_padding_below; }
@@ -41,6 +42,7 @@ namespace ngraph
            const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
            std::shared_ptr<Node> get_filters() { return get_argument(1); }
            std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
+            bool requantize() const { return m_requantize; }
            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;

@@ -50,6 +52,7 @@ namespace ngraph
            CoordinateDiff m_padding_below;
            CoordinateDiff m_padding_above;
            Strides m_data_dilation_strides;
+            bool m_requantize;
        };
    }
 }
--- a/test/builder_quantization.cpp
+++ b/test/builder_quantization.cpp
@@ -22,6 +22,7 @@

 #include "gtest/gtest.h"
 #include "ngraph/builder/quantization.hpp"
+#include "ngraph/builder/quantization/quantized_linear_convolution.hpp"
 #include "ngraph/ngraph.hpp"
 #include "ngraph/op/constant.hpp"
 #include "ngraph/pass/constant_folding.hpp"
@@ -187,6 +188,39 @@ TEST(builder, scaled_QC)
              read_vector<int8_t>(result));
 }

+TEST(builder, scaled_QConvInteger)
+{
+    Shape shape_a{1, 1, 3, 4}; // input shape
+    Shape shape_b{1, 1, 3, 3}; // filter shape
+    Shape shape_r{1, 1, 3, 4}; // output shape
+    vector<uint8_t> a_data = {1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4};
+    vector<int8_t> b_data = {1, 2, 3, 4, 5, 0, 0, 1, 2};
+    auto A = make_shared<op::Parameter>(element::u8, shape_a);
+    auto B = make_shared<op::Parameter>(element::i8, shape_b);
+    auto CV =
+        ngraph::builder::quantization::QuantizedConvInteger(A,
+                                                            B,
+                                                            Strides{1, 1},        // move_strides
+                                                            Strides{1, 1},        // filter_dilation
+                                                            CoordinateDiff{1, 1}, // below_pads
+                                                            CoordinateDiff{1, 1}, // above_pads
+                                                            Strides{1, 1});       // data_dilation
+    auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B});
+    constant_fold(f);
+
+    auto backend = runtime::Backend::create("CPU");
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::u8, shape_a);
+    copy_data(a, a_data);
+    auto b = backend->create_tensor(element::i8, shape_b);
+    copy_data(b, b_data);
+    auto result = backend->create_tensor(element::i32, shape_r);
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a, b});
+    EXPECT_EQ((vector<int32_t>{22, 34, 30, 32, 38, 72, 90, 43, 33, 52, 43, 39}),
+              read_vector<int32_t>(result));
+}
+
 TEST(builder, dynamic_scaled_QC)
 {
    Shape shape_a{1, 1, 3, 4}; // input shape