Add support for QDotInteger (#2687)

* Add support for QDotInteger * Address Feedback * Support int32 as output of QuantizedDot without requantization similar to QuantizedConvolution

Add support for QDotInteger (#2687)
* Add support for QDotInteger * Address Feedback * Support int32 as output of QuantizedDot without requantization similar to QuantizedConvolution
f9d0bd57 · Nishant Patel · Scott Cyphers · 7775d49d · f9d0bd57 · f9d0bd57
Commit f9d0bd57 authored Apr 05, 2019 by Nishant Patel Committed by Scott Cyphers Apr 05, 2019
5 changed files
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -31,6 +31,8 @@ set (SRC
    builder/quantization.hpp
    builder/quantization/quantized_linear_convolution.cpp
    builder/quantization/quantized_linear_convolution.hpp
+    builder/quantization/quantized_linear_dot.cpp
+    builder/quantization/quantized_linear_dot.hpp
    builder/quantization_util.hpp
    builder/reduce_ops.cpp
    builder/reduce_ops.hpp

--- a/src/ngraph/builder/quantization/quantized_linear_dot.cpp
+++ b/src/ngraph/builder/quantization/quantized_linear_dot.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/builder/quantization/quantized_linear_dot.hpp"
+#include "ngraph/builder/make_constant.hpp"
+#include "ngraph/builder/quantization.hpp"
+#include "ngraph/op/experimental/quantized_dot.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+namespace ngraph
+{
+    namespace builder
+    {
+        namespace quantization
+        {
+            shared_ptr<Node> QuantizedDotInteger(shared_ptr<Node> input, shared_ptr<Node> filter)
+            {
+                auto output_scale = make_constant(element::f32, Shape{}, 1);
+                return make_shared<op::QuantizedDot>(input, filter, output_scale, false, false);
+            }
+        }
+    }
+}
--- a/src/ngraph/builder/quantization/quantized_linear_dot.hpp
+++ b/src/ngraph/builder/quantization/quantized_linear_dot.hpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "ngraph/coordinate_diff.hpp"
+#include "ngraph/node.hpp"
+
+namespace ngraph
+{
+    namespace builder
+    {
+        namespace quantization
+        {
+            std::shared_ptr<Node> QuantizedDotInteger(std::shared_ptr<Node> input,
+                                                      std::shared_ptr<Node> filter);
+        }
+    }
+}
--- a/src/ngraph/op/experimental/quantized_dot.cpp
+++ b/src/ngraph/op/experimental/quantized_dot.cpp
@@ -37,6 +37,7 @@ op::QuantizedDot::QuantizedDot(const shared_ptr<Node>& data,

    auto& data_shape = data->get_shape();
    auto& weights_shape = weights->get_shape();
+    // QuantizedDot does [n, ic] * [oc, ic] = [n, oc]
    NODE_VALIDATION_CHECK(this,
                          data_shape.size() == 2 && weights_shape.size() == 2 &&
                              data_shape[1] == weights_shape[1],
@@ -45,6 +46,6 @@ op::QuantizedDot::QuantizedDot(const shared_ptr<Node>& data,
                          " weights shape ",
                          weights_shape);

-    auto output_et = requantize ? (with_relu ? element::u8 : element::i8) : element::f32;
+    auto output_et = requantize ? (with_relu ? element::u8 : element::i8) : element::i32;
    set_output_type(0, output_et, Shape{data_shape[0], weights_shape[0]});
 }
--- a/test/builder_quantization.cpp
+++ b/test/builder_quantization.cpp
@@ -23,6 +23,7 @@
 #include "gtest/gtest.h"
 #include "ngraph/builder/quantization.hpp"
 #include "ngraph/builder/quantization/quantized_linear_convolution.hpp"
+#include "ngraph/builder/quantization/quantized_linear_dot.hpp"
 #include "ngraph/ngraph.hpp"
 #include "ngraph/op/constant.hpp"
 #include "ngraph/pass/constant_folding.hpp"
@@ -1216,6 +1217,31 @@ TEST(builder, scaled_quantize_concat_unsigned_varying)
              read_vector<uint8_t>(result));
 }

+TEST(builder, scaled_QDotInteger)
+{
+    Shape shape_a{1, 2}; // input shape
+    vector<uint8_t> a_data = {2, 3};
+    Shape shape_b{3, 2}; // filter shape
+    vector<int8_t> b_data = {0, 1, 2, 3, 4, 5};
+    auto A = make_shared<op::Parameter>(element::u8, shape_a);
+    auto B = make_shared<op::Parameter>(element::i8, shape_b);
+
+    Shape shape_r{1, 3}; // output shape
+    auto QD = ngraph::builder::quantization::QuantizedDotInteger(A, B);
+    auto f = make_shared<Function>(NodeVector{QD}, ParameterVector{A, B});
+    constant_fold(f);
+    auto backend = runtime::Backend::create("CPU");
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::u8, shape_a);
+    copy_data(a, a_data);
+    auto b = backend->create_tensor(element::i8, shape_b);
+    copy_data(b, b_data);
+    auto result = backend->create_tensor(element::i32, shape_r);
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a, b});
+    EXPECT_EQ((vector<int32_t>{3, 13, 23}), read_vector<int32_t>(result));
+}
+
 // QuantizedDot
 TEST(builder, dynamic_scaled_QD)
 {
@@ -1261,42 +1287,20 @@ TEST(builder, dynamic_scaled_QD)

    // QuantizedDot (no requantize, no relu)
    auto f_nrequantize = make_function(false, false);
-    auto f_nrequantize_r = backend->create_tensor(element::f32, shape_r);
+    auto f_nrequantize_r = backend->create_tensor(element::i32, shape_r);
    auto f_nrequantize_handle = backend->compile(f_nrequantize);
    f_nrequantize_handle->call_with_validate({f_nrequantize_r}, {a, b, d, e, e_a, g, h, i});
-    EXPECT_EQ((vector<float>{25.584705352783203,
-                             33.88588333129883,
-                             44.71411895751953,
-                             70.78588104248047,
-                             -1.3305882215499878,
-                             105.76588439941406,
-                             66.03529357910156,
-                             37.86000061035156,
-                             117.58235168457031,
-                             63.0811767578125,
-                             -2.6364705562591553,
-                             124.02706146240234}),
-              read_vector<float>(f_nrequantize_r));
+    EXPECT_EQ((vector<int32_t>{26, 34, 45, 71, -1, 106, 66, 38, 118, 63, -3, 124}),
+              read_vector<int32_t>(f_nrequantize_r));

    // QuantizedDot with relu
    auto f_nrequantize_relu = make_function(false, true);
-    auto f_nrequantize_relu_r = backend->create_tensor(element::f32, shape_r);
+    auto f_nrequantize_relu_r = backend->create_tensor(element::i32, shape_r);
    auto f_nrequantize_relu_handle = backend->compile(f_nrequantize_relu);
    f_nrequantize_relu_handle->call_with_validate({f_nrequantize_relu_r},
                                                  {a, b, d, e, e_a, g, h, i});
-    EXPECT_EQ((vector<float>{25.584705352783203,
-                             33.88588333129883,
-                             44.71411895751953,
-                             70.78588104248047,
-                             -0.0,
-                             105.76588439941406,
-                             66.03529357910156,
-                             37.86000061035156,
-                             117.58235168457031,
-                             63.0811767578125,
-                             -0.0,
-                             124.02706146240234}),
-              read_vector<float>(f_nrequantize_relu_r));
+    EXPECT_EQ((vector<int32_t>{26, 34, 45, 71, 0, 106, 66, 38, 118, 63, 0, 124}),
+              read_vector<int32_t>(f_nrequantize_relu_r));

    // QuantizedDot with requantize and no relu
    auto f_requantize = make_function(true, false);