graph builders for quantize scale (#1976)

* quantize scale passing unit tests * epsilon bump * finished with quantization scale * unit tests passing with convolution scale as builder * broadcasted constants and cleanup * api consistency for quant builders * code style * cleanup * newline at EOF * use requantization_scale * drop TF license as we are no longer using TF code directly

graph builders for quantize scale (#1976)
* quantize scale passing unit tests * epsilon bump * finished with quantization scale * unit tests passing with convolution scale as builder * broadcasted constants and cleanup * api consistency for quant builders * code style * cleanup * newline at EOF * use requantization_scale * drop TF license as we are no longer using TF code directly
8bd3846f · Adam Straw · Robert Kimball · 2a26558a · 8bd3846f · 8bd3846f
Commit 8bd3846f authored Nov 07, 2018 by Adam Straw Committed by Robert Kimball Nov 07, 2018
13 changed files
--- a/src/ngraph/builder/make_constant.hpp
+++ b/src/ngraph/builder/make_constant.hpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "ngraph/node.hpp"
+#include "ngraph/op/broadcast.hpp"
+#include "ngraph/op/constant.hpp"
+
+namespace ngraph
+{
+    namespace builder
+    {
+        template <class T>
+        std::shared_ptr<Node>
+            make_constant(const element::Type& type, const Shape& shape, const T& num)
+        {
+            std::shared_ptr<Node> val = nullptr;
+
+            if (type == element::f32)
+            {
+                val = std::make_shared<ngraph::op::Constant>(
+                    type, ngraph::Shape{}, std::vector<float>{static_cast<float>(num)});
+            }
+            else if (type == element::f64)
+            {
+                val = std::make_shared<ngraph::op::Constant>(
+                    type, ngraph::Shape{}, std::vector<double>{static_cast<double>(num)});
+            }
+            else if (type == element::i64)
+            {
+                val = std::make_shared<ngraph::op::Constant>(
+                    type, ngraph::Shape{}, std::vector<int64_t>{static_cast<int64_t>(num)});
+            }
+            else if (type == element::i32)
+            {
+                val = std::make_shared<ngraph::op::Constant>(
+                    type, ngraph::Shape{}, std::vector<int32_t>{static_cast<int32_t>(num)});
+            }
+            else if (type == element::i16)
+            {
+                val = std::make_shared<ngraph::op::Constant>(
+                    type, ngraph::Shape{}, std::vector<int16_t>{static_cast<int16_t>(num)});
+            }
+            else if (type == element::i8)
+            {
+                val = std::make_shared<ngraph::op::Constant>(
+                    type, ngraph::Shape{}, std::vector<int8_t>{static_cast<int8_t>(num)});
+            }
+            else if (type == element::u64)
+            {
+                val = std::make_shared<ngraph::op::Constant>(
+                    type, ngraph::Shape{}, std::vector<uint64_t>{static_cast<uint64_t>(num)});
+            }
+            else if (type == element::u32)
+            {
+                val = std::make_shared<ngraph::op::Constant>(
+                    type, ngraph::Shape{}, std::vector<uint32_t>{static_cast<uint32_t>(num)});
+            }
+            else if (type == element::u16)
+            {
+                val = std::make_shared<ngraph::op::Constant>(
+                    type, ngraph::Shape{}, std::vector<uint16_t>{static_cast<uint16_t>(num)});
+            }
+            else if (type == element::u8)
+            {
+                val = std::make_shared<ngraph::op::Constant>(
+                    type, ngraph::Shape{}, std::vector<uint8_t>{static_cast<uint8_t>(num)});
+            }
+            else
+            {
+                throw ngraph_error("make_constant: Unsupported element type");
+            }
+
+            if (shape.size() > 0)
+            {
+                ngraph::AxisSet axes;
+                for (size_t i = 0; i < shape.size(); i++)
+                {
+                    axes.insert(i);
+                }
+                val = std::make_shared<ngraph::op::Broadcast>(val, shape, axes);
+            }
+
+            return val;
+        }
+    }
+}
--- a/src/ngraph/builder/quantization.cpp
+++ b/src/ngraph/builder/quantization.cpp
--- a/src/ngraph/builder/quantization.hpp
+++ b/src/ngraph/builder/quantization.hpp
@@ -43,68 +43,67 @@ namespace ngraph
                                               const ngraph::element::Type& type,
                                               const ngraph::AxisSet& axes);

-        std::shared_ptr<Node> ScaledQuantizedAvgPool(const std::shared_ptr<Node>& arg,
+        std::shared_ptr<Node> ScaledQuantizedAvgPool(std::shared_ptr<Node> input,
                                                     const Shape& window_shape,
                                                     const Strides& window_movement_strides,
                                                     const Shape& padding_below,
                                                     const Shape& padding_above,
                                                     bool include_padding_in_avg_computation,
-                                                     const std::shared_ptr<Node> min,
-                                                     const std::shared_ptr<Node> max);
+                                                     std::shared_ptr<Node> min,
+                                                     std::shared_ptr<Node> max);

        std::shared_ptr<Node>
-            ScaledQuantizedConvolutionBias(const std::shared_ptr<Node>& data_batch,
-                                           const std::shared_ptr<Node>& filters,
-                                           const std::shared_ptr<Node>& bias,
+            ScaledQuantizedConvolutionBias(std::shared_ptr<Node> input,
+                                           std::shared_ptr<Node> filters,
+                                           std::shared_ptr<Node> bias,
                                           const Strides& window_movement_strides,
                                           const Strides& window_dilation_strides,
                                           const CoordinateDiff& padding_below,
                                           const CoordinateDiff& padding_above,
                                           const Strides& data_dilation_strides,
-                                           const std::shared_ptr<Node> min_input,
-                                           const std::shared_ptr<Node> max_input,
-                                           const std::shared_ptr<Node> min_filter,
-                                           const std::shared_ptr<Node> max_filter,
-                                           const std::shared_ptr<Node> min_freezed_output,
-                                           const std::shared_ptr<Node> max_freezed_output,
+                                           std::shared_ptr<Node> min_input,
+                                           std::shared_ptr<Node> max_input,
+                                           std::shared_ptr<Node> min_filter,
+                                           std::shared_ptr<Node> max_filter,
+                                           std::shared_ptr<Node> min_freezed_output,
+                                           std::shared_ptr<Node> max_freezed_output,
                                           const bool with_relu = false);

        std::shared_ptr<Node>
-            ScaledQuantizedConvolutionRelu(const std::shared_ptr<Node>& data_batch,
-                                           const std::shared_ptr<Node>& filters,
+            ScaledQuantizedConvolutionRelu(std::shared_ptr<Node> input,
+                                           std::shared_ptr<Node> filters,
                                           const Strides& window_movement_strides,
                                           const Strides& window_dilation_strides,
                                           const CoordinateDiff& padding_below,
                                           const CoordinateDiff& padding_above,
                                           const Strides& data_dilation_strides,
-                                           const std::shared_ptr<Node> min_input,
-                                           const std::shared_ptr<Node> max_input,
-                                           const std::shared_ptr<Node> min_filter,
-                                           const std::shared_ptr<Node> max_filter,
-                                           const std::shared_ptr<Node> min_freezed_output,
-                                           const std::shared_ptr<Node> max_freezed_output);
+                                           std::shared_ptr<Node> min_input,
+                                           std::shared_ptr<Node> max_input,
+                                           std::shared_ptr<Node> min_filter,
+                                           std::shared_ptr<Node> max_filter,
+                                           std::shared_ptr<Node> min_freezed_output,
+                                           std::shared_ptr<Node> max_freezed_output);

-        std::shared_ptr<Node>
-            ScaledQuantizedConvolution(const std::shared_ptr<Node>& data_batch,
-                                       const std::shared_ptr<Node>& filters,
-                                       const Strides& window_movement_strides,
-                                       const Strides& window_dilation_strides,
-                                       const CoordinateDiff& padding_below,
-                                       const CoordinateDiff& padding_above,
-                                       const Strides& data_dilation_strides,
-                                       const std::shared_ptr<Node> min_input,
-                                       const std::shared_ptr<Node> max_input,
-                                       const std::shared_ptr<Node> min_filter,
-                                       const std::shared_ptr<Node> max_filter,
-                                       const std::shared_ptr<Node> min_freezed_output,
-                                       const std::shared_ptr<Node> max_freezed_output);
+        std::shared_ptr<Node> ScaledQuantizedConvolution(std::shared_ptr<Node> input,
+                                                         std::shared_ptr<Node> filters,
+                                                         const Strides& window_movement_strides,
+                                                         const Strides& window_dilation_strides,
+                                                         const CoordinateDiff& padding_below,
+                                                         const CoordinateDiff& padding_above,
+                                                         const Strides& data_dilation_strides,
+                                                         std::shared_ptr<Node> min_input,
+                                                         std::shared_ptr<Node> max_input,
+                                                         std::shared_ptr<Node> min_filter,
+                                                         std::shared_ptr<Node> max_filter,
+                                                         std::shared_ptr<Node> min_freezed_output,
+                                                         std::shared_ptr<Node> max_freezed_output);

-        std::shared_ptr<Node> ScaledQuantizedMaxPool(const std::shared_ptr<Node>& arg,
+        std::shared_ptr<Node> ScaledQuantizedMaxPool(std::shared_ptr<Node> input,
                                                     const Shape& window_shape,
                                                     const Strides& window_movement_strides,
                                                     const Shape& padding_below,
                                                     const Shape& padding_above,
-                                                     const std::shared_ptr<Node> min,
-                                                     const std::shared_ptr<Node> max);
+                                                     std::shared_ptr<Node> min,
+                                                     std::shared_ptr<Node> max);
    }
 }
--- a/src/ngraph/builder/quantization_util.hpp
+++ b/src/ngraph/builder/quantization_util.hpp
--- a/src/ngraph/op/experimental/quantized_conv.cpp
+++ b/src/ngraph/op/experimental/quantized_conv.cpp
@@ -45,11 +45,6 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc
    auto& data_batch_shape = data_batch->get_shape();
    auto& filters_shape = filters->get_shape();

-    auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
-    auto scale_val = scale_const_op->get_vector<float>();
-
-    this->m_scale = scale_val[0];
-
    set_output_type(0,
                    element::i8,
                    util::infer_convolution_output_shape(this,

--- a/src/ngraph/op/experimental/quantized_conv.hpp
+++ b/src/ngraph/op/experimental/quantized_conv.hpp
@@ -41,7 +41,6 @@ namespace ngraph
            const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
            std::shared_ptr<Node> get_filters() { return get_argument(1); }
            std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
-            float get_scale() const { return m_scale; }
            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;

@@ -51,7 +50,6 @@ namespace ngraph
            CoordinateDiff m_padding_below;
            CoordinateDiff m_padding_above;
            Strides m_data_dilation_strides;
-            float m_scale;
        };
    }
 }
--- a/src/ngraph/op/experimental/quantized_conv_bias.cpp
+++ b/src/ngraph/op/experimental/quantized_conv_bias.cpp
@@ -49,10 +49,6 @@ op::QuantizedConvolutionBias::QuantizedConvolutionBias(const shared_ptr<Node>& d
    auto& data_batch_shape = data_batch->get_shape();
    auto& filters_shape = filters->get_shape();

-    auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
-    auto scale_val = scale_const_op->get_vector<float>();
-    this->m_scale = scale_val[0];
-
    // TODO: call ngraph util
    // util::validate_convbias_shapes(data_batch_shape, filters_shape, bias->get_shape());


--- a/src/ngraph/op/experimental/quantized_conv_bias.hpp
+++ b/src/ngraph/op/experimental/quantized_conv_bias.hpp
@@ -47,7 +47,6 @@ namespace ngraph
            const CoordinateDiff& get_padding_below() const { return m_padding_below; }
            const CoordinateDiff& get_padding_above() const { return m_padding_above; }
            const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
-            float get_scale() const { return m_scale; }
            std::shared_ptr<Node> get_bias() { return get_argument(2); }
            std::shared_ptr<Node> get_filters() { return get_argument(1); }
            std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
@@ -62,7 +61,6 @@ namespace ngraph
            CoordinateDiff m_padding_above;
            Strides m_data_dilation_strides;
            bool m_with_relu;
-            float m_scale;
        };
    }
 }
--- a/src/ngraph/op/experimental/quantized_conv_relu.cpp
+++ b/src/ngraph/op/experimental/quantized_conv_relu.cpp
@@ -44,10 +44,6 @@ op::QuantizedConvolutionRelu::QuantizedConvolutionRelu(const std::shared_ptr<Nod
    auto& data_batch_shape = data_batch->get_shape();
    auto& filters_shape = filters->get_shape();

-    auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
-    float scale_val = *(static_cast<float const*>(scale_const_op->get_data_ptr()));
-    this->m_scale = scale_val;
-
    set_output_type(0,
                    element::u8,
                    util::infer_convolution_output_shape(this,

--- a/src/ngraph/op/experimental/quantized_conv_relu.hpp
+++ b/src/ngraph/op/experimental/quantized_conv_relu.hpp
@@ -43,7 +43,6 @@ namespace ngraph
            const CoordinateDiff& get_padding_below() const { return m_padding_below; }
            const CoordinateDiff& get_padding_above() const { return m_padding_above; }
            const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
-            float get_scale() const { return m_scale; }
            std::shared_ptr<Node> get_filters() { return get_argument(1); }
            std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
            virtual std::shared_ptr<Node>
@@ -55,7 +54,6 @@ namespace ngraph
            CoordinateDiff m_padding_below;
            CoordinateDiff m_padding_above;
            Strides m_data_dilation_strides;
-            float m_scale;
        };
    }
 }
--- a/src/ngraph/pass/constant_folding.cpp
+++ b/src/ngraph/pass/constant_folding.cpp
@@ -30,6 +30,7 @@
 #include "ngraph/op/negative.hpp"
 #include "ngraph/op/pad.hpp"
 #include "ngraph/op/quantize.hpp"
+#include "ngraph/op/relu.hpp"
 #include "ngraph/op/reshape.hpp"
 #include "ngraph/op/subtract.hpp"
 #include "ngraph/pattern/matcher.hpp"
@@ -45,6 +46,7 @@
 #include "ngraph/runtime/reference/negate.hpp"
 #include "ngraph/runtime/reference/pad.hpp"
 #include "ngraph/runtime/reference/quantize.hpp"
+#include "ngraph/runtime/reference/relu.hpp"
 #include "ngraph/runtime/reference/reshape.hpp"
 #include "ngraph/runtime/reference/subtract.hpp"

@@ -378,7 +380,8 @@ void ngraph::pass::ConstantFolding::construct_constant_binary()

 bool is_supported_unary_op(std::shared_ptr<Node> n)
 {
-    return std::dynamic_pointer_cast<op::Abs>(n) || std::dynamic_pointer_cast<op::Negative>(n);
+    return std::dynamic_pointer_cast<op::Abs>(n) || std::dynamic_pointer_cast<op::Negative>(n) ||
+           std::dynamic_pointer_cast<op::Relu>(n);
 }

 template <class T>
@@ -398,6 +401,11 @@ shared_ptr<op::Constant> make_constant_unary(shared_ptr<op::Constant> constant,
        runtime::reference::negate<T>(
            constant->get_vector<T>().data(), out_vec.data(), shape_size(out_shape));
    }
+    else if (std::dynamic_pointer_cast<op::Relu>(unary))
+    {
+        runtime::reference::relu<T>(
+            constant->get_vector<T>().data(), out_vec.data(), shape_size(out_shape));
+    }
    else
    {
        NGRAPH_ASSERT(false) << "must be consistent with is_supported_unary_op";

--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
@@ -118,6 +118,7 @@
 #include "ngraph/op/topk.hpp"
 #include "ngraph/pass/algebraic_simplification.hpp"
 #include "ngraph/pass/common_function_collection.hpp"
+#include "ngraph/pass/constant_folding.hpp"
 #include "ngraph/pass/core_fusion.hpp"
 #include "ngraph/pass/cse.hpp"
 #include "ngraph/pass/dump_sorted.hpp"
@@ -1049,6 +1050,7 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes(ngraph::pass::Ma
    NodeVector nv_cwi; // We dont need CPUWorkspaceInsertion to return list of indices
    pass_manager.register_pass<runtime::cpu::pass::CPUWorkspaceInsertion>(nv_cwi, false);
    pass_manager.register_pass<runtime::cpu::pass::CPUAssignment>(this);
+    pass_manager.register_pass<ngraph::pass::ConstantFolding>();
    pass_manager.register_pass<runtime::cpu::pass::CPULayout>(this);
    pass_manager.register_pass<ngraph::pass::CommonSubexpressionElimination>(
        runtime::cpu::get_cse_handlers_map());

--- a/src/ngraph/runtime/cpu/mkldnn_emitter.hpp
+++ b/src/ngraph/runtime/cpu/mkldnn_emitter.hpp
@@ -25,6 +25,7 @@

 #include "ngraph/coordinate_diff.hpp"
 #include "ngraph/node.hpp"
+#include "ngraph/op/constant.hpp"
 #include "ngraph/op/convolution.hpp"
 #include "ngraph/op/experimental/quantized_conv.hpp"
 #include "ngraph/op/experimental/quantized_conv_bias.hpp"
@@ -226,6 +227,16 @@ namespace ngraph
                    }
                    else if (std::is_same<OP, ngraph::op::QuantizedConvolution>())
                    {
+                        auto qc = dynamic_cast<const ngraph::op::QuantizedConvolution*>(node);
+                        auto scale_const_op =
+                            std::dynamic_pointer_cast<ngraph::op::Constant>(qc->get_arguments()[2]);
+                        if (scale_const_op == nullptr)
+                        {
+                            throw ngraph_error("QuantizedConvolution scale must be a Constant");
+                        }
+
+                        auto scale_val = scale_const_op->get_vector<float>();
+
                        return build_quantized_convolution(
                            data_desc,
                            weights_desc,
@@ -234,12 +245,21 @@ namespace ngraph
                            window_dilation_strides_adjusted,
                            convolution->get_padding_below(),
                            convolution->get_padding_above(),
-                            (dynamic_cast<const ngraph::op::QuantizedConvolution*>(node))
-                                ->get_scale(),
+                            scale_val[0],
                            ops);
                    }
                    else if (std::is_same<OP, ngraph::op::QuantizedConvolutionRelu>())
                    {
+                        auto qcr = dynamic_cast<const ngraph::op::QuantizedConvolutionRelu*>(node);
+                        auto scale_const_op = std::dynamic_pointer_cast<ngraph::op::Constant>(
+                            qcr->get_arguments()[2]);
+                        if (scale_const_op == nullptr)
+                        {
+                            throw ngraph_error("QuantizedConvolutionRelu scale must be a Constant");
+                        }
+
+                        auto scale_val = scale_const_op->get_vector<float>();
+
                        return build_quantized_convolution(
                            data_desc,
                            weights_desc,
@@ -248,12 +268,21 @@ namespace ngraph
                            window_dilation_strides_adjusted,
                            convolution->get_padding_below(),
                            convolution->get_padding_above(),
-                            (dynamic_cast<const ngraph::op::QuantizedConvolutionRelu*>(node))
-                                ->get_scale(),
+                            scale_val[0],
                            ops);
                    }
                    else if (std::is_same<OP, ngraph::op::QuantizedConvolutionBias>())
                    {
+                        auto qcb = dynamic_cast<const ngraph::op::QuantizedConvolutionBias*>(node);
+                        auto scale_const_op = std::dynamic_pointer_cast<ngraph::op::Constant>(
+                            qcb->get_arguments()[3]);
+                        if (scale_const_op == nullptr)
+                        {
+                            throw ngraph_error("QuantizedConvolutionBias scale must be a Constant");
+                        }
+
+                        auto scale_val = scale_const_op->get_vector<float>();
+
                        // conv+bias = cvt_to_int8(scale*(dst + bias))
                        auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
                        return build_quantized_convolution(
@@ -265,8 +294,7 @@ namespace ngraph
                            window_dilation_strides_adjusted,
                            convolution->get_padding_below(),
                            convolution->get_padding_above(),
-                            (dynamic_cast<const ngraph::op::QuantizedConvolutionBias*>(node))
-                                ->get_scale(),
+                            scale_val[0],
                            ops);
                    }
                    else