Segregate the quant builders op wise (#3501)

* Segregate builders op wise * Style * Update ngraph.hpp

Segregate the quant builders op wise (#3501)
* Segregate builders op wise * Style * Update ngraph.hpp
f3b9389c · Nishant Patel · Scott Cyphers · 98205845 · f3b9389c · f3b9389c
Commit f3b9389c authored Aug 23, 2019 by Nishant Patel Committed by Scott Cyphers Aug 23, 2019
19 changed files
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -24,20 +24,23 @@ set (SRC
    axis_vector.hpp
    builder/autobroadcast.cpp
    builder/autobroadcast.hpp
+    builder/dequantize_builder.cpp
+    builder/dequantize_builder.hpp
    builder/make_constant.hpp
    builder/norm.cpp
    builder/norm.hpp
    builder/numpy_transpose.cpp
    builder/numpy_transpose.hpp
-    builder/quantization.cpp
-    builder/quantization.hpp
+    builder/quantize_builder.cpp
+    builder/quantize_builder.hpp
+    builder/quantized_concat_builder.cpp
+    builder/quantized_concat_builder.hpp
    builder/quantized_conv_builder.cpp
    builder/quantized_conv_builder.hpp
    builder/quantized_dot_builder.cpp
    builder/quantized_dot_builder.hpp
    builder/quantization/quantized_linear_convolution.cpp
    builder/quantization/quantized_linear_convolution.hpp
-    builder/quantization_util.hpp
    builder/quantization_utils.hpp
    builder/quantization_utils.cpp
    builder/reduce_ops.cpp

--- a/src/ngraph/builder/dequantize_builder.cpp
+++ b/src/ngraph/builder/dequantize_builder.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <memory>
+
+#include "ngraph/builder/dequantize_builder.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+namespace ngraph
+{
+    namespace builder
+    {
+        shared_ptr<Node> DequantizeBuilder(const Output<Node>& input,
+                                           const Output<Node>& min,
+                                           const Output<Node>& max,
+                                           const ngraph::element::Type& real_type,
+                                           const ngraph::AxisSet& axes)
+        {
+            auto quant_type = input.get_element_type();
+
+            if (min.get_element_type() != real_type)
+            {
+                throw ngraph_error("DequantizeBuilder: min must match input type");
+            }
+
+            if (max.get_element_type() != real_type)
+            {
+                throw ngraph_error("DequantizeBuilder: max must match input type");
+            }
+
+            auto shape = min.get_shape();
+            if (shape != max.get_shape())
+            {
+                throw ngraph_error("DequantizeBuilder: min and max must have same shape");
+            }
+
+            auto zero = make_constant(quant_type, shape, 0);
+            auto scale = quantization_utils::get_scale(min, max, quant_type);
+            return make_shared<op::Dequantize>(input, scale, zero, real_type, axes);
+        }
+    }
+}
--- a/src/ngraph/builder/dequantize_builder.hpp
+++ b/src/ngraph/builder/dequantize_builder.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "ngraph/builder/make_constant.hpp"
+#include "ngraph/coordinate_diff.hpp"
+#include "ngraph/node.hpp"
+#include "ngraph/op/dequantize.hpp"
+#include "quantization_utils.hpp"
+
+namespace ngraph
+{
+    namespace builder
+    {
+        std::shared_ptr<Node> DequantizeBuilder(const Output<Node>& input,
+                                                const Output<Node>& min,
+                                                const Output<Node>& max,
+                                                const ngraph::element::Type& real_type,
+                                                const ngraph::AxisSet& axes);
+    }
+}
--- a/src/ngraph/builder/quantization.cpp
+++ b/src/ngraph/builder/quantization.cpp
--- a/src/ngraph/builder/quantization.hpp
+++ b/src/ngraph/builder/quantization.hpp
-//*****************************************************************************
-// Copyright 2017-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-
-#pragma once
-
-#include "ngraph/coordinate_diff.hpp"
-#include "ngraph/node.hpp"
-#include "ngraph/op/dequantize.hpp"
-#include "ngraph/op/experimental/quantized_conv_bias.hpp"
-#include "ngraph/op/experimental/quantized_conv_relu.hpp"
-#include "ngraph/op/experimental/quantized_dot_bias.hpp"
-#include "ngraph/op/quantize.hpp"
-
-namespace ngraph
-{
-    namespace builder
-    {
-        std::shared_ptr<Node> ScaledQuantize(const Output<Node>& input,
-                                             const Output<Node>& min,
-                                             const Output<Node>& max,
-                                             const ngraph::element::Type& type,
-                                             const ngraph::AxisSet& axes,
-                                             op::Quantize::RoundMode round_mode);
-
-        std::shared_ptr<Node> ScaledDequantize(const Output<Node>& input,
-                                               const Output<Node>& min,
-                                               const Output<Node>& max,
-                                               const ngraph::element::Type& type,
-                                               const ngraph::AxisSet& axes);
-
-        std::shared_ptr<Node> ScaledQuantizedConcat(const NodeVector& args,
-                                                    size_t concatenation_axis,
-                                                    const NodeVector& mins,
-                                                    const NodeVector& maxes);
-
-        std::shared_ptr<Node> ScaledQuantizedConvolutionBias(const Output<Node>& input,
-                                                             const Output<Node>& filters,
-                                                             const Output<Node>& bias,
-                                                             const Strides& window_movement_strides,
-                                                             const Strides& window_dilation_strides,
-                                                             const CoordinateDiff& padding_below,
-                                                             const CoordinateDiff& padding_above,
-                                                             const Strides& data_dilation_strides,
-                                                             const Output<Node>& min_input,
-                                                             const Output<Node>& max_input,
-                                                             const Output<Node>& min_filter,
-                                                             const Output<Node>& max_filter,
-                                                             const Output<Node>& min_output,
-                                                             const Output<Node>& max_output,
-                                                             const bool with_relu = false);
-
-        std::shared_ptr<Node> ScaledQuantizedConvolutionRelu(const Output<Node>& input,
-                                                             const Output<Node>& filters,
-                                                             const Strides& window_movement_strides,
-                                                             const Strides& window_dilation_strides,
-                                                             const CoordinateDiff& padding_below,
-                                                             const CoordinateDiff& padding_above,
-                                                             const Strides& data_dilation_strides,
-                                                             const Output<Node>& min_input,
-                                                             const Output<Node>& max_input,
-                                                             const Output<Node>& min_filter,
-                                                             const Output<Node>& max_filter,
-                                                             const Output<Node>& min_output,
-                                                             const Output<Node>& max_output);
-
-        std::shared_ptr<Node>
-            ScaledQuantizedConvolutionBiasAdd(const Output<Node>& input,
-                                              const Output<Node>& filters,
-                                              const Output<Node>& bias,
-                                              const Output<Node>& sum_input,
-                                              const Strides& window_movement_strides,
-                                              const Strides& window_dilation_strides,
-                                              const CoordinateDiff& padding_below,
-                                              const CoordinateDiff& padding_above,
-                                              const Strides& data_dilation_strides,
-                                              const Output<Node>& min_input,
-                                              const Output<Node>& max_input,
-                                              const Output<Node>& min_filter,
-                                              const Output<Node>& max_filter,
-                                              const Output<Node>& min_output,
-                                              const Output<Node>& max_output,
-                                              const Output<Node>& min_sum_input,
-                                              const Output<Node>& max_sum_input,
-                                              const bool with_relu = false);
-
-        std::shared_ptr<Node>
-            ScaledQuantizedConvolutionBiasSignedAdd(const Output<Node>& input,
-                                                    const Output<Node>& filters,
-                                                    const Output<Node>& bias,
-                                                    const Output<Node>& sum_input,
-                                                    const Strides& window_movement_strides,
-                                                    const Strides& window_dilation_strides,
-                                                    const CoordinateDiff& padding_below,
-                                                    const CoordinateDiff& padding_above,
-                                                    const Strides& data_dilation_strides,
-                                                    const Output<Node>& min_input,
-                                                    const Output<Node>& max_input,
-                                                    const Output<Node>& min_filter,
-                                                    const Output<Node>& max_filter,
-                                                    const Output<Node>& min_output,
-                                                    const Output<Node>& max_output,
-                                                    const Output<Node>& min_sum_input,
-                                                    const Output<Node>& max_sum_input,
-                                                    const bool with_relu = false);
-
-        std::shared_ptr<Node> ScaledQuantizedDotBias(const Output<Node>& input,
-                                                     const Output<Node>& filters,
-                                                     const Output<Node>& bias,
-                                                     const Output<Node>& min_input,
-                                                     const Output<Node>& max_input,
-                                                     const Output<Node>& min_filter,
-                                                     const Output<Node>& max_filter,
-                                                     const Output<Node>& min_output,
-                                                     const Output<Node>& max_output,
-                                                     const bool requantize = true,
-                                                     const bool with_relu = false);
-
-    } // namespace builder
-} // namespace ngraph
--- a/src/ngraph/builder/quantization/quantized_linear_convolution.cpp
+++ b/src/ngraph/builder/quantization/quantized_linear_convolution.cpp
@@ -17,7 +17,6 @@
 #include "ngraph/builder/quantization/quantized_linear_convolution.hpp"
 #include "ngraph/axis_set.hpp"
 #include "ngraph/builder/make_constant.hpp"
-#include "ngraph/builder/quantization.hpp"
 #include "ngraph/op/constant.hpp"
 #include "ngraph/op/convolution.hpp"
 #include "ngraph/op/dequantize.hpp"

--- a/src/ngraph/builder/quantization_util.hpp
+++ b/src/ngraph/builder/quantization_util.hpp
--- a/src/ngraph/builder/quantization_utils.cpp
+++ b/src/ngraph/builder/quantization_utils.cpp
@@ -74,6 +74,128 @@ namespace ngraph

                return max_abs_range / target_range;
            }
+
+            std::shared_ptr<Node> get_bias_scale(Output<Node> min_input,
+                                                 Output<Node> max_input,
+                                                 Output<Node> min_filter,
+                                                 Output<Node> max_filter)
+            {
+                auto type = min_input.get_element_type();
+                if (type != max_input.get_element_type() || type != min_filter.get_element_type() ||
+                    type != max_filter.get_element_type())
+                {
+                    throw ngraph_error("get_bias_scale: min and max must have same type");
+                }
+
+                auto shape = min_input.get_shape();
+                if (shape != max_input.get_shape() || shape != min_filter.get_shape() ||
+                    shape != max_filter.get_shape())
+                {
+                    throw ngraph_error("get_bias_scale: min and max must have same shape");
+                }
+
+                auto max_abs_input_range = max_abs(min_input, max_input);
+                auto max_abs_filter_range = max_abs(min_filter, max_filter);
+                auto range = make_constant(type,
+                                           shape,
+                                           std::numeric_limits<uint8_t>::max() *
+                                               std::numeric_limits<int8_t>::max());
+
+                // Inverting the scale calculation here as the Quantize op passes scale as 1/scale.
+                return (max_abs_input_range * max_abs_filter_range) / range;
+            }
+
+            std::shared_ptr<Node> get_sum_scale(Output<Node> min_freezed_output_conv_1,
+                                                Output<Node> max_freezed_output_conv_1,
+                                                Output<Node> min_freezed_output_conv_2,
+                                                Output<Node> max_freezed_output_conv_2)
+            {
+                auto type = min_freezed_output_conv_1.get_element_type();
+                if (type != max_freezed_output_conv_1.get_element_type() ||
+                    type != min_freezed_output_conv_2.get_element_type() ||
+                    type != max_freezed_output_conv_2.get_element_type())
+                {
+                    throw ngraph_error("get_sum_scale: min and max must have same type");
+                }
+
+                auto shape = min_freezed_output_conv_1.get_shape();
+                if (shape != max_freezed_output_conv_1.get_shape() ||
+                    shape != min_freezed_output_conv_2.get_shape() ||
+                    shape != max_freezed_output_conv_2.get_shape())
+                {
+                    throw ngraph_error("get_sum_scale: min and max must have same shape");
+                }
+
+                auto max_abs_conv_1 = max_abs(min_freezed_output_conv_1, max_freezed_output_conv_1);
+                auto max_abs_conv_2 = max_abs(min_freezed_output_conv_2, max_freezed_output_conv_2);
+                return max_abs_conv_2 / max_abs_conv_1;
+            }
+
+            std::shared_ptr<Node> get_dot_scale(Output<Node> min_input,
+                                                Output<Node> max_input,
+                                                Output<Node> min_filter,
+                                                Output<Node> max_filter,
+                                                Output<Node> min_freezed_output,
+                                                Output<Node> max_freezed_output,
+                                                const ngraph::element::Type& input_type,
+                                                const ngraph::element::Type& output_type,
+                                                const bool requantize)
+            {
+                auto type = min_input.get_element_type();
+                if (type != max_input.get_element_type() || type != min_filter.get_element_type() ||
+                    type != max_filter.get_element_type() ||
+                    type != min_freezed_output.get_element_type() ||
+                    type != max_freezed_output.get_element_type())
+                {
+                    throw ngraph_error("get_dot_scale: min and max must have same type");
+                }
+
+                auto shape = min_input.get_shape();
+                if (shape != max_input.get_shape() || shape != min_filter.get_shape() ||
+                    shape != max_filter.get_shape() || shape != min_freezed_output.get_shape() ||
+                    shape != max_freezed_output.get_shape())
+                {
+                    throw ngraph_error("get_dot_scale: min and max must have same shape");
+                }
+                auto data_scale = get_scale(min_input, max_input, input_type);
+                auto weight_scale = get_scale(min_filter, max_filter, element::i8);
+                auto out_scale = get_scale(min_freezed_output, max_freezed_output, output_type);
+                if (requantize)
+                {
+                    return data_scale * weight_scale / out_scale;
+                }
+                else
+                {
+                    return data_scale * weight_scale;
+                }
+            }
+
+            void
+                check_concat(const NodeVector& args, const NodeVector& mins, const NodeVector& maxs)
+            {
+                auto size = args.size();
+                if (size != mins.size() || size != maxs.size())
+                {
+                    throw ngraph_error("Min and Max node vectors must be of same length");
+                }
+                for (size_t i = 0; i < size; i++)
+                {
+                    auto min = mins[i];
+                    auto max = maxs[i];
+                    auto type = min->get_element_type();
+                    if (type != max->get_element_type())
+                    {
+                        throw ngraph_error("check_concat: min and max must have same type");
+                    }
+
+                    if (min->get_shape() != Shape{1} || max->get_shape() != Shape{1})
+                    {
+                        throw ngraph_error("check_concat: min/max shape not Shape{1}: " +
+                                           vector_to_string(min->get_shape()) +
+                                           vector_to_string(max->get_shape()));
+                    }
+                }
+            }
        }
    }
 }
--- a/src/ngraph/builder/quantization_utils.hpp
+++ b/src/ngraph/builder/quantization_utils.hpp
@@ -43,6 +43,30 @@ namespace ngraph
                                            const Output<Node>& input_max_range,
                                            const ngraph::element::Type& quant_type,
                                            bool bump_by_eps = false);
+
+            std::shared_ptr<Node> get_bias_scale(Output<Node> min_input,
+                                                 Output<Node> max_input,
+                                                 Output<Node> min_filter,
+                                                 Output<Node> max_filter);
+
+            std::shared_ptr<Node> get_sum_scale(Output<Node> min_freezed_output_conv_1,
+                                                Output<Node> max_freezed_output_conv_1,
+                                                Output<Node> min_freezed_output_conv_2,
+                                                Output<Node> max_freezed_output_conv_2);
+
+            std::shared_ptr<Node> get_dot_scale(Output<Node> min_input,
+                                                Output<Node> max_input,
+                                                Output<Node> min_filter,
+                                                Output<Node> max_filter,
+                                                Output<Node> min_freezed_output,
+                                                Output<Node> max_freezed_output,
+                                                const ngraph::element::Type& input_type,
+                                                const ngraph::element::Type& output_type,
+                                                const bool requantize = true);
+
+            void check_concat(const NodeVector& args,
+                              const NodeVector& mins,
+                              const NodeVector& maxs);
        }
    }
 }
--- a/src/ngraph/builder/quantize_builder.cpp
+++ b/src/ngraph/builder/quantize_builder.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <memory>
+
+#include "ngraph/builder/quantize_builder.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+namespace ngraph
+{
+    namespace builder
+    {
+        shared_ptr<Node> QuantizeBuilder(const Output<Node>& input,
+                                         const Output<Node>& min,
+                                         const Output<Node>& max,
+                                         const ngraph::element::Type& quant_type,
+                                         const ngraph::AxisSet& axes,
+                                         op::Quantize::RoundMode round_mode)
+        {
+            auto real_type = input.get_element_type();
+
+            if (min.get_element_type() != real_type)
+            {
+                throw ngraph_error("QuantizeBuilder: min must match input type");
+            }
+
+            if (max.get_element_type() != real_type)
+            {
+                throw ngraph_error("QuantizeBuilder: max must match input type");
+            }
+
+            auto shape = min.get_shape();
+            if (shape != max.get_shape())
+            {
+                throw ngraph_error("QuantizeBuilder: min and max must have same shape");
+            }
+
+            auto zero = make_constant(quant_type, shape, 0);
+            auto scale = quantization_utils::get_scale(min, max, quant_type, true);
+            return make_shared<op::Quantize>(input, scale, zero, quant_type, axes, round_mode);
+        }
+    }
+}
--- a/src/ngraph/builder/quantize_builder.hpp
+++ b/src/ngraph/builder/quantize_builder.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "ngraph/builder/make_constant.hpp"
+#include "ngraph/coordinate_diff.hpp"
+#include "ngraph/node.hpp"
+#include "ngraph/op/quantize.hpp"
+#include "quantization_utils.hpp"
+
+namespace ngraph
+{
+    namespace builder
+    {
+        std::shared_ptr<Node> QuantizeBuilder(const Output<Node>& input,
+                                              const Output<Node>& min,
+                                              const Output<Node>& max,
+                                              const ngraph::element::Type& quant_type,
+                                              const ngraph::AxisSet& axes,
+                                              op::Quantize::RoundMode round_mode);
+    }
+}
--- a/src/ngraph/builder/quantized_concat_builder.cpp
+++ b/src/ngraph/builder/quantized_concat_builder.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <memory>
+
+#include "ngraph/builder/quantized_concat_builder.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+namespace ngraph
+{
+    namespace builder
+    {
+        shared_ptr<Node> QuantizedConcatBuilder(const NodeVector& args,
+                                                size_t concatenation_axis,
+                                                const NodeVector& mins,
+                                                const NodeVector& maxs)
+        {
+            quantization_utils::check_concat(args, mins, maxs);
+            auto quant_type = args[0]->get_element_type();
+
+            // output scale
+            auto min = make_shared<op::Min>(make_shared<op::Concat>(mins, 0), ngraph::AxisSet{0});
+            auto max = make_shared<op::Max>(make_shared<op::Concat>(maxs, 0), ngraph::AxisSet{0});
+            auto out_scale = quantization_utils::get_scale(min, max, quant_type);
+
+            NodeVector rescaled_args(args.size());
+            for (size_t i = 0; i < args.size(); ++i)
+            {
+                auto q_type = args[i]->get_element_type();
+                auto in_scale = make_shared<ngraph::op::Reshape>(
+                    quantization_utils::get_scale(mins[i], maxs[i], q_type),
+                    AxisVector{0},
+                    Shape{});
+                auto zero = make_constant(q_type, in_scale->get_shape(), 0);
+
+                rescaled_args[i] =
+                    make_shared<op::Dequantize>(args[i], in_scale, zero, element::f32, AxisSet{});
+                rescaled_args[i] =
+                    make_shared<op::Quantize>(rescaled_args[i],
+                                              out_scale,
+                                              zero,
+                                              q_type,
+                                              AxisSet{},
+                                              op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN);
+            }
+
+            return make_shared<op::Concat>(rescaled_args, concatenation_axis);
+        }
+    }
+}
--- a/src/ngraph/builder/quantized_concat_builder.hpp
+++ b/src/ngraph/builder/quantized_concat_builder.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "ngraph/builder/make_constant.hpp"
+#include "ngraph/coordinate_diff.hpp"
+#include "ngraph/node.hpp"
+#include "ngraph/op/concat.hpp"
+#include "ngraph/op/dequantize.hpp"
+#include "ngraph/op/max.hpp"
+#include "ngraph/op/min.hpp"
+#include "ngraph/op/quantize.hpp"
+#include "ngraph/op/reshape.hpp"
+#include "quantization_utils.hpp"
+
+namespace ngraph
+{
+    namespace builder
+    {
+        std::shared_ptr<Node> QuantizedConcatBuilder(const NodeVector& args,
+                                                     size_t concatenation_axis,
+                                                     const NodeVector& mins,
+                                                     const NodeVector& maxs);
+    }
+}
--- a/src/ngraph/builder/quantized_conv_builder.cpp
+++ b/src/ngraph/builder/quantized_conv_builder.cpp
--- a/src/ngraph/builder/quantized_conv_builder.hpp
+++ b/src/ngraph/builder/quantized_conv_builder.hpp
@@ -18,6 +18,11 @@

 #include "ngraph/coordinate_diff.hpp"
 #include "ngraph/node.hpp"
+#include "ngraph/op/constant.hpp"
+#include "ngraph/op/convert.hpp"
+#include "ngraph/op/experimental/quantized_conv_bias.hpp"
+#include "ngraph/op/experimental/quantized_conv_relu.hpp"
+#include "ngraph/op/quantize.hpp"
 #include "ngraph/op/quantized_convolution.hpp"
 #include "quantization_utils.hpp"

@@ -43,5 +48,77 @@ namespace ngraph
                                        const ngraph::AxisSet& input_axes = ngraph::AxisSet{},
                                        const ngraph::AxisSet& filter_axes = ngraph::AxisSet{},
                                        const ngraph::AxisSet& output_axes = ngraph::AxisSet{});
+
+        std::shared_ptr<Node>
+            QuantizedConvolutionBiasBuilder(const Output<Node>& input,
+                                            const Output<Node>& filters,
+                                            const Output<Node>& bias,
+                                            const Strides& window_movement_strides,
+                                            const Strides& window_dilation_strides,
+                                            const CoordinateDiff& padding_below,
+                                            const CoordinateDiff& padding_above,
+                                            const Strides& data_dilation_strides,
+                                            const Output<Node>& min_input,
+                                            const Output<Node>& max_input,
+                                            const Output<Node>& min_filter,
+                                            const Output<Node>& max_filter,
+                                            const Output<Node>& min_output,
+                                            const Output<Node>& max_output,
+                                            const bool with_relu = false);
+
+        std::shared_ptr<Node>
+            QuantizedConvolutionReluBuilder(const Output<Node>& input,
+                                            const Output<Node>& filters,
+                                            const Strides& window_movement_strides,
+                                            const Strides& window_dilation_strides,
+                                            const CoordinateDiff& padding_below,
+                                            const CoordinateDiff& padding_above,
+                                            const Strides& data_dilation_strides,
+                                            const Output<Node>& min_input,
+                                            const Output<Node>& max_input,
+                                            const Output<Node>& min_filter,
+                                            const Output<Node>& max_filter,
+                                            const Output<Node>& min_output,
+                                            const Output<Node>& max_output);
+
+        std::shared_ptr<Node>
+            QuantizedConvolutionBiasAddBuilder(const Output<Node>& input,
+                                               const Output<Node>& filters,
+                                               const Output<Node>& bias,
+                                               const Output<Node>& sum_input,
+                                               const Strides& window_movement_strides,
+                                               const Strides& window_dilation_strides,
+                                               const CoordinateDiff& padding_below,
+                                               const CoordinateDiff& padding_above,
+                                               const Strides& data_dilation_strides,
+                                               const Output<Node>& min_input,
+                                               const Output<Node>& max_input,
+                                               const Output<Node>& min_filter,
+                                               const Output<Node>& max_filter,
+                                               const Output<Node>& min_output,
+                                               const Output<Node>& max_output,
+                                               const Output<Node>& min_sum_input,
+                                               const Output<Node>& max_sum_input,
+                                               const bool with_relu = false);
+
+        std::shared_ptr<Node>
+            QuantizedConvolutionBiasSignedAddBuilder(const Output<Node>& input,
+                                                     const Output<Node>& filters,
+                                                     const Output<Node>& bias,
+                                                     const Output<Node>& sum_input,
+                                                     const Strides& window_movement_strides,
+                                                     const Strides& window_dilation_strides,
+                                                     const CoordinateDiff& padding_below,
+                                                     const CoordinateDiff& padding_above,
+                                                     const Strides& data_dilation_strides,
+                                                     const Output<Node>& min_input,
+                                                     const Output<Node>& max_input,
+                                                     const Output<Node>& min_filter,
+                                                     const Output<Node>& max_filter,
+                                                     const Output<Node>& min_output,
+                                                     const Output<Node>& max_output,
+                                                     const Output<Node>& min_sum_input,
+                                                     const Output<Node>& max_sum_input,
+                                                     const bool with_relu = false);
    }
 }
--- a/src/ngraph/builder/quantized_dot_builder.cpp
+++ b/src/ngraph/builder/quantized_dot_builder.cpp
@@ -17,7 +17,6 @@
 #include <memory>

 #include "ngraph/builder/quantized_dot_builder.hpp"
-#include "ngraph/op/constant.hpp"

 using namespace std;
 using namespace ngraph;
@@ -65,5 +64,45 @@ namespace ngraph
                                                 input1_axes,
                                                 output_axes);
        }
+
+        shared_ptr<Node> QuantizedDotBiasBuilder(const Output<Node>& input,
+                                                 const Output<Node>& filters,
+                                                 const Output<Node>& bias,
+                                                 const Output<Node>& min_input,
+                                                 const Output<Node>& max_input,
+                                                 const Output<Node>& min_filter,
+                                                 const Output<Node>& max_filter,
+                                                 const Output<Node>& min_output,
+                                                 const Output<Node>& max_output,
+                                                 const bool requantize,
+                                                 const bool with_relu)
+        {
+            auto requantization_scale =
+                quantization_utils::get_dot_scale(min_input,
+                                                  max_input,
+                                                  min_filter,
+                                                  max_filter,
+                                                  min_output,
+                                                  max_output,
+                                                  input.get_element_type(),
+                                                  with_relu ? element::u8 : element::i8,
+                                                  requantize);
+
+            auto mybias = bias;
+            if (bias.get_element_type() != element::i32)
+            {
+                auto zero = make_constant(element::i32, min_input.get_shape(), 0);
+                AxisSet quantization_axes;
+                auto bias_scale = quantization_utils::get_bias_scale(
+                    min_input, max_input, min_filter, max_filter);
+                op::Quantize::RoundMode round_mode =
+                    op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
+
+                mybias = make_shared<op::Quantize>(
+                    bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
+            }
+            return make_shared<op::QuantizedDotBias>(
+                input, filters, mybias, requantization_scale, requantize, with_relu);
+        }
    }
 }
--- a/src/ngraph/builder/quantized_dot_builder.hpp
+++ b/src/ngraph/builder/quantized_dot_builder.hpp
@@ -18,6 +18,9 @@

 #include "ngraph/coordinate_diff.hpp"
 #include "ngraph/node.hpp"
+#include "ngraph/op/constant.hpp"
+#include "ngraph/op/experimental/quantized_dot_bias.hpp"
+#include "ngraph/op/quantize.hpp"
 #include "ngraph/op/quantized_dot.hpp"
 #include "quantization_utils.hpp"

@@ -38,5 +41,17 @@ namespace ngraph
                                                  const ngraph::AxisSet& input0_axes,
                                                  const ngraph::AxisSet& input1_axes,
                                                  const ngraph::AxisSet& output_axes);
+
+        std::shared_ptr<Node> QuantizedDotBiasBuilder(const Output<Node>& input,
+                                                      const Output<Node>& filters,
+                                                      const Output<Node>& bias,
+                                                      const Output<Node>& min_input,
+                                                      const Output<Node>& max_input,
+                                                      const Output<Node>& min_filter,
+                                                      const Output<Node>& max_filter,
+                                                      const Output<Node>& min_output,
+                                                      const Output<Node>& max_output,
+                                                      const bool requantize = true,
+                                                      const bool with_relu = false);
    }
 }
--- a/src/ngraph/ngraph.hpp
+++ b/src/ngraph/ngraph.hpp
@@ -62,8 +62,12 @@ namespace ngraph
 ///        recipes, for example auto-broadcast.

 #include "ngraph/builder/autobroadcast.hpp"
+#include "ngraph/builder/dequantize_builder.hpp"
 #include "ngraph/builder/numpy_transpose.hpp"
+#include "ngraph/builder/quantize_builder.hpp"
+#include "ngraph/builder/quantized_concat_builder.hpp"
 #include "ngraph/builder/quantized_conv_builder.hpp"
+#include "ngraph/builder/quantized_dot_builder.hpp"
 #include "ngraph/builder/reduce_ops.hpp"
 #include "ngraph/builder/reshape.hpp"
 #include "ngraph/builder/tensor_mask.hpp"

--- a/test/builder_quantization.cpp
+++ b/test/builder_quantization.cpp