Segregate the quant builders op wise (#3501)

* Segregate builders op wise * Style * Update ngraph.hpp

Segregate the quant builders op wise (#3501)
* Segregate builders op wise * Style * Update ngraph.hpp
f3b9389c · Nishant Patel · Scott Cyphers · 98205845 · f3b9389c · f3b9389c
Commit f3b9389c authored Aug 23, 2019 by Nishant Patel Committed by Scott Cyphers Aug 23, 2019
19 changed files
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -24,20 +24,23 @@ set (SRC
    axis_vector.hpp
    builder/autobroadcast.cpp
    builder/autobroadcast.hpp
+    builder/dequantize_builder.cpp
+    builder/dequantize_builder.hpp
    builder/make_constant.hpp
    builder/norm.cpp
    builder/norm.hpp
    builder/numpy_transpose.cpp
    builder/numpy_transpose.hpp
-    builder/quantization.cpp
-    builder/quantization.hpp
+    builder/quantize_builder.cpp
+    builder/quantize_builder.hpp
+    builder/quantized_concat_builder.cpp
+    builder/quantized_concat_builder.hpp
    builder/quantized_conv_builder.cpp
    builder/quantized_conv_builder.hpp
    builder/quantized_dot_builder.cpp
    builder/quantized_dot_builder.hpp
    builder/quantization/quantized_linear_convolution.cpp
    builder/quantization/quantized_linear_convolution.hpp
-    builder/quantization_util.hpp
    builder/quantization_utils.hpp
    builder/quantization_utils.cpp
    builder/reduce_ops.cpp

--- a/src/ngraph/builder/dequantize_builder.cpp
+++ b/src/ngraph/builder/dequantize_builder.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <memory>
+
+#include "ngraph/builder/dequantize_builder.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+namespace ngraph
+{
+    namespace builder
+    {
+        shared_ptr<Node> DequantizeBuilder(const Output<Node>& input,
+                                           const Output<Node>& min,
+                                           const Output<Node>& max,
+                                           const ngraph::element::Type& real_type,
+                                           const ngraph::AxisSet& axes)
+        {
+            auto quant_type = input.get_element_type();
+
+            if (min.get_element_type() != real_type)
+            {
+                throw ngraph_error("DequantizeBuilder: min must match input type");
+            }
+
+            if (max.get_element_type() != real_type)
+            {
+                throw ngraph_error("DequantizeBuilder: max must match input type");
+            }
+
+            auto shape = min.get_shape();
+            if (shape != max.get_shape())
+            {
+                throw ngraph_error("DequantizeBuilder: min and max must have same shape");
+            }
+
+            auto zero = make_constant(quant_type, shape, 0);
+            auto scale = quantization_utils::get_scale(min, max, quant_type);
+            return make_shared<op::Dequantize>(input, scale, zero, real_type, axes);
+        }
+    }
+}
--- a/src/ngraph/builder/dequantize_builder.hpp
+++ b/src/ngraph/builder/dequantize_builder.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "ngraph/builder/make_constant.hpp"
+#include "ngraph/coordinate_diff.hpp"
+#include "ngraph/node.hpp"
+#include "ngraph/op/dequantize.hpp"
+#include "quantization_utils.hpp"
+
+namespace ngraph
+{
+    namespace builder
+    {
+        std::shared_ptr<Node> DequantizeBuilder(const Output<Node>& input,
+                                                const Output<Node>& min,
+                                                const Output<Node>& max,
+                                                const ngraph::element::Type& real_type,
+                                                const ngraph::AxisSet& axes);
+    }
+}
--- a/src/ngraph/builder/quantization.cpp
+++ b/src/ngraph/builder/quantization.cpp
-//*****************************************************************************
-// Copyright 2017-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-
-#include <memory>
-
-#include "ngraph/builder/make_constant.hpp"
-#include "ngraph/builder/quantization.hpp"
-#include "ngraph/op/concat.hpp"
-#include "ngraph/op/constant.hpp"
-#include "ngraph/op/convert.hpp"
-#include "ngraph/op/max.hpp"
-#include "ngraph/op/min.hpp"
-#include "ngraph/op/reshape.hpp"
-#include "quantization_util.hpp"
-
-using namespace std;
-using namespace ngraph;
-
-namespace ngraph
-{
-    namespace builder
-    {
-        shared_ptr<Node> ScaledQuantize(const Output<Node>& input,
-                                        const Output<Node>& min,
-                                        const Output<Node>& max,
-                                        const ngraph::element::Type& quant_type,
-                                        const ngraph::AxisSet& axes,
-                                        op::Quantize::RoundMode round_mode)
-        {
-            auto real_type = input.get_element_type();
-
-            if (min.get_element_type() != real_type)
-            {
-                throw ngraph_error("ScaledQuantize: min must match input type");
-            }
-
-            if (max.get_element_type() != real_type)
-            {
-                throw ngraph_error("ScaledQuantize: max must match input type");
-            }
-
-            auto shape = min.get_shape();
-            if (shape != max.get_shape())
-            {
-                throw ngraph_error("ScaledQuantize: min and max must have same shape");
-            }
-
-            auto zero = make_constant(quant_type, shape, 0);
-            auto scale = quantization_util::get_scale(min, max, quant_type, true);
-            return make_shared<op::Quantize>(input, scale, zero, quant_type, axes, round_mode);
-        }
-
-        shared_ptr<Node> ScaledDequantize(const Output<Node>& input,
-                                          const Output<Node>& min,
-                                          const Output<Node>& max,
-                                          const ngraph::element::Type& real_type,
-                                          const ngraph::AxisSet& axes)
-        {
-            auto quant_type = input.get_element_type();
-
-            if (min.get_element_type() != real_type)
-            {
-                throw ngraph_error("ScaledDequantize: min must match output type");
-            }
-
-            if (max.get_element_type() != real_type)
-            {
-                throw ngraph_error("ScaledDequantize: max must match output type");
-            }
-
-            auto shape = min.get_shape();
-            if (shape != max.get_shape())
-            {
-                throw ngraph_error("ScaledDequantize: min and max must have same shape");
-            }
-
-            auto zero = make_constant(quant_type, shape, 0);
-            auto scale = quantization_util::get_scale(min, max, quant_type);
-            return make_shared<op::Dequantize>(input, scale, zero, real_type, axes);
-        }
-
-        shared_ptr<Node> ScaledQuantizedConcat(const NodeVector& args,
-                                               size_t concatenation_axis,
-                                               const NodeVector& mins,
-                                               const NodeVector& maxs)
-        {
-            quantization_util::check_concat(args, mins, maxs);
-            auto quant_type = args[0]->get_element_type();
-
-            // output scale
-            auto min = make_shared<op::Min>(make_shared<op::Concat>(mins, 0), ngraph::AxisSet{0});
-            auto max = make_shared<op::Max>(make_shared<op::Concat>(maxs, 0), ngraph::AxisSet{0});
-            auto out_scale = quantization_util::get_scale(min, max, quant_type);
-
-            NodeVector rescaled_args(args.size());
-            for (size_t i = 0; i < args.size(); ++i)
-            {
-                auto q_type = args[i]->get_element_type();
-                auto in_scale = make_shared<ngraph::op::Reshape>(
-                    quantization_util::get_scale(mins[i], maxs[i], q_type), AxisVector{0}, Shape{});
-                auto zero = make_constant(q_type, in_scale->get_shape(), 0);
-
-                rescaled_args[i] =
-                    make_shared<op::Dequantize>(args[i], in_scale, zero, element::f32, AxisSet{});
-                rescaled_args[i] =
-                    make_shared<op::Quantize>(rescaled_args[i],
-                                              out_scale,
-                                              zero,
-                                              q_type,
-                                              AxisSet{},
-                                              op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN);
-            }
-
-            return make_shared<op::Concat>(rescaled_args, concatenation_axis);
-        }
-
-        shared_ptr<Node> ScaledQuantizedConvolutionBias(const Output<Node>& input,
-                                                        const Output<Node>& filters,
-                                                        const Output<Node>& bias,
-                                                        const Strides& window_movement_strides,
-                                                        const Strides& window_dilation_strides,
-                                                        const CoordinateDiff& padding_below,
-                                                        const CoordinateDiff& padding_above,
-                                                        const Strides& data_dilation_strides,
-                                                        const Output<Node>& min_input,
-                                                        const Output<Node>& max_input,
-                                                        const Output<Node>& min_filter,
-                                                        const Output<Node>& max_filter,
-                                                        const Output<Node>& min_output,
-                                                        const Output<Node>& max_output,
-                                                        const bool with_relu)
-        {
-            auto output_et = with_relu ? element::u8 : element::i8;
-            auto requantization_scale = quantization_util::get_scale(
-                min_input, max_input, min_filter, max_filter, min_output, max_output, output_et);
-
-            auto mybias = bias;
-            if (bias.get_element_type() != element::i32)
-            {
-                auto zero = make_constant(element::i32, min_input.get_shape(), 0);
-                AxisSet quantization_axes;
-                auto bias_scale =
-                    quantization_util::get_bias_scale(min_input, max_input, min_filter, max_filter);
-                op::Quantize::RoundMode round_mode =
-                    op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
-
-                mybias = make_shared<op::Quantize>(
-                    bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
-            }
-
-            return make_shared<op::QuantizedConvolutionBias>(input,
-                                                             filters,
-                                                             mybias,
-                                                             window_movement_strides,
-                                                             window_dilation_strides,
-                                                             padding_below,
-                                                             padding_above,
-                                                             data_dilation_strides,
-                                                             requantization_scale,
-                                                             with_relu);
-        }
-
-        shared_ptr<Node> ScaledQuantizedConvolutionRelu(const Output<Node>& input,
-                                                        const Output<Node>& filters,
-                                                        const Strides& window_movement_strides,
-                                                        const Strides& window_dilation_strides,
-                                                        const CoordinateDiff& padding_below,
-                                                        const CoordinateDiff& padding_above,
-                                                        const Strides& data_dilation_strides,
-                                                        const Output<Node>& min_input,
-                                                        const Output<Node>& max_input,
-                                                        const Output<Node>& min_filter,
-                                                        const Output<Node>& max_filter,
-                                                        const Output<Node>& min_output,
-                                                        const Output<Node>& max_output)
-        {
-            auto requantization_scale = quantization_util::get_scale(
-                min_input, max_input, min_filter, max_filter, min_output, max_output, element::u8);
-
-            return make_shared<op::QuantizedConvolutionRelu>(input,
-                                                             filters,
-                                                             window_movement_strides,
-                                                             window_dilation_strides,
-                                                             padding_below,
-                                                             padding_above,
-                                                             data_dilation_strides,
-                                                             requantization_scale);
-        }
-
-        shared_ptr<Node> ScaledQuantizedConvolutionBiasAdd(const Output<Node>& input,
-                                                           const Output<Node>& filters,
-                                                           const Output<Node>& bias,
-                                                           const Output<Node>& sum_input,
-                                                           const Strides& window_movement_strides,
-                                                           const Strides& window_dilation_strides,
-                                                           const CoordinateDiff& padding_below,
-                                                           const CoordinateDiff& padding_above,
-                                                           const Strides& data_dilation_strides,
-                                                           const Output<Node>& min_input,
-                                                           const Output<Node>& max_input,
-                                                           const Output<Node>& min_filter,
-                                                           const Output<Node>& max_filter,
-                                                           const Output<Node>& min_output,
-                                                           const Output<Node>& max_output,
-                                                           const Output<Node>& min_sum_input,
-                                                           const Output<Node>& max_sum_input,
-                                                           const bool with_relu)
-        {
-            auto output_et = with_relu ? element::u8 : element::i8;
-            auto requantization_scale = quantization_util::get_scale(
-                min_input, max_input, min_filter, max_filter, min_output, max_output, output_et);
-
-            auto sum_scale = builder::quantization_util::get_sum_scale(
-                min_output, max_output, min_sum_input, max_sum_input);
-
-            auto mybias = bias;
-            if (bias.get_element_type() != element::i32)
-            {
-                auto zero = make_constant(element::i32, min_input.get_shape(), 0);
-                AxisSet quantization_axes;
-                auto bias_scale =
-                    quantization_util::get_bias_scale(min_input, max_input, min_filter, max_filter);
-                op::Quantize::RoundMode round_mode =
-                    op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
-
-                mybias = make_shared<op::Quantize>(
-                    bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
-            }
-
-            return make_shared<op::QuantizedConvolutionBiasAdd>(input,
-                                                                filters,
-                                                                mybias,
-                                                                sum_input,
-                                                                window_movement_strides,
-                                                                window_dilation_strides,
-                                                                padding_below,
-                                                                padding_above,
-                                                                data_dilation_strides,
-                                                                requantization_scale,
-                                                                sum_scale,
-                                                                with_relu);
-        }
-
-        shared_ptr<Node>
-            ScaledQuantizedConvolutionBiasSignedAdd(const Output<Node>& input,
-                                                    const Output<Node>& filters,
-                                                    const Output<Node>& bias,
-                                                    const Output<Node>& sum_input,
-                                                    const Strides& window_movement_strides,
-                                                    const Strides& window_dilation_strides,
-                                                    const CoordinateDiff& padding_below,
-                                                    const CoordinateDiff& padding_above,
-                                                    const Strides& data_dilation_strides,
-                                                    const Output<Node>& min_input,
-                                                    const Output<Node>& max_input,
-                                                    const Output<Node>& min_filter,
-                                                    const Output<Node>& max_filter,
-                                                    const Output<Node>& min_output,
-                                                    const Output<Node>& max_output,
-                                                    const Output<Node>& min_sum_input,
-                                                    const Output<Node>& max_sum_input,
-                                                    const bool with_relu)
-        {
-            auto output_et = with_relu ? element::u8 : element::i8;
-            auto requantization_scale = quantization_util::get_scale(
-                min_input, max_input, min_filter, max_filter, min_output, max_output, output_et);
-
-            auto sum_scale = builder::quantization_util::get_sum_scale(
-                min_output, max_output, min_sum_input, max_sum_input);
-            if (output_et == element::u8)
-            {
-                // Need to multiply by two to account for u8 requantization_scale
-                auto two = make_constant(element::f32, sum_scale->get_shape(), 2.0f);
-                sum_scale = two * sum_scale;
-            }
-
-            auto mybias = bias;
-            if (bias.get_element_type() != element::i32)
-            {
-                auto zero = make_constant(element::i32, min_input.get_shape(), 0);
-                AxisSet quantization_axes;
-                auto bias_scale =
-                    quantization_util::get_bias_scale(min_input, max_input, min_filter, max_filter);
-                op::Quantize::RoundMode round_mode =
-                    op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
-
-                mybias = make_shared<op::Quantize>(
-                    bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
-            }
-            auto qconv = make_shared<op::QuantizedConvolutionBiasSignedAdd>(input,
-                                                                            filters,
-                                                                            mybias,
-                                                                            sum_input,
-                                                                            window_movement_strides,
-                                                                            window_dilation_strides,
-                                                                            padding_below,
-                                                                            padding_above,
-                                                                            data_dilation_strides,
-                                                                            requantization_scale,
-                                                                            sum_scale,
-                                                                            with_relu);
-            return make_shared<op::Convert>(qconv, element::u8);
-        }
-
-        shared_ptr<Node> ScaledQuantizedDotBias(const Output<Node>& input,
-                                                const Output<Node>& filters,
-                                                const Output<Node>& bias,
-                                                const Output<Node>& min_input,
-                                                const Output<Node>& max_input,
-                                                const Output<Node>& min_filter,
-                                                const Output<Node>& max_filter,
-                                                const Output<Node>& min_output,
-                                                const Output<Node>& max_output,
-                                                const bool requantize,
-                                                const bool with_relu)
-        {
-            auto requantization_scale =
-                quantization_util::get_dot_scale(min_input,
-                                                 max_input,
-                                                 min_filter,
-                                                 max_filter,
-                                                 min_output,
-                                                 max_output,
-                                                 input.get_element_type(),
-                                                 with_relu ? element::u8 : element::i8,
-                                                 requantize);
-
-            auto mybias = bias;
-            if (bias.get_element_type() != element::i32)
-            {
-                auto zero = make_constant(element::i32, min_input.get_shape(), 0);
-                AxisSet quantization_axes;
-                auto bias_scale =
-                    quantization_util::get_bias_scale(min_input, max_input, min_filter, max_filter);
-                op::Quantize::RoundMode round_mode =
-                    op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
-
-                mybias = make_shared<op::Quantize>(
-                    bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
-            }
-            return make_shared<op::QuantizedDotBias>(
-                input, filters, mybias, requantization_scale, requantize, with_relu);
-        }
-
-    } // namespace builder
-} // namespace ngraph
--- a/src/ngraph/builder/quantization.hpp
+++ b/src/ngraph/builder/quantization.hpp
-//*****************************************************************************
-// Copyright 2017-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-
-#pragma once
-
-#include "ngraph/coordinate_diff.hpp"
-#include "ngraph/node.hpp"
-#include "ngraph/op/dequantize.hpp"
-#include "ngraph/op/experimental/quantized_conv_bias.hpp"
-#include "ngraph/op/experimental/quantized_conv_relu.hpp"
-#include "ngraph/op/experimental/quantized_dot_bias.hpp"
-#include "ngraph/op/quantize.hpp"
-
-namespace ngraph
-{
-    namespace builder
-    {
-        std::shared_ptr<Node> ScaledQuantize(const Output<Node>& input,
-                                             const Output<Node>& min,
-                                             const Output<Node>& max,
-                                             const ngraph::element::Type& type,
-                                             const ngraph::AxisSet& axes,
-                                             op::Quantize::RoundMode round_mode);
-
-        std::shared_ptr<Node> ScaledDequantize(const Output<Node>& input,
-                                               const Output<Node>& min,
-                                               const Output<Node>& max,
-                                               const ngraph::element::Type& type,
-                                               const ngraph::AxisSet& axes);
-
-        std::shared_ptr<Node> ScaledQuantizedConcat(const NodeVector& args,
-                                                    size_t concatenation_axis,
-                                                    const NodeVector& mins,
-                                                    const NodeVector& maxes);
-
-        std::shared_ptr<Node> ScaledQuantizedConvolutionBias(const Output<Node>& input,
-                                                             const Output<Node>& filters,
-                                                             const Output<Node>& bias,
-                                                             const Strides& window_movement_strides,
-                                                             const Strides& window_dilation_strides,
-                                                             const CoordinateDiff& padding_below,
-                                                             const CoordinateDiff& padding_above,
-                                                             const Strides& data_dilation_strides,
-                                                             const Output<Node>& min_input,
-                                                             const Output<Node>& max_input,
-                                                             const Output<Node>& min_filter,
-                                                             const Output<Node>& max_filter,
-                                                             const Output<Node>& min_output,
-                                                             const Output<Node>& max_output,
-                                                             const bool with_relu = false);
-
-        std::shared_ptr<Node> ScaledQuantizedConvolutionRelu(const Output<Node>& input,
-                                                             const Output<Node>& filters,
-                                                             const Strides& window_movement_strides,
-                                                             const Strides& window_dilation_strides,
-                                                             const CoordinateDiff& padding_below,
-                                                             const CoordinateDiff& padding_above,
-                                                             const Strides& data_dilation_strides,
-                                                             const Output<Node>& min_input,
-                                                             const Output<Node>& max_input,
-                                                             const Output<Node>& min_filter,
-                                                             const Output<Node>& max_filter,
-                                                             const Output<Node>& min_output,
-                                                             const Output<Node>& max_output);
-
-        std::shared_ptr<Node>
-            ScaledQuantizedConvolutionBiasAdd(const Output<Node>& input,
-                                              const Output<Node>& filters,
-                                              const Output<Node>& bias,
-                                              const Output<Node>& sum_input,
-                                              const Strides& window_movement_strides,
-                                              const Strides& window_dilation_strides,
-                                              const CoordinateDiff& padding_below,
-                                              const CoordinateDiff& padding_above,
-                                              const Strides& data_dilation_strides,
-                                              const Output<Node>& min_input,
-                                              const Output<Node>& max_input,
-                                              const Output<Node>& min_filter,
-                                              const Output<Node>& max_filter,
-                                              const Output<Node>& min_output,
-                                              const Output<Node>& max_output,
-                                              const Output<Node>& min_sum_input,
-                                              const Output<Node>& max_sum_input,
-                                              const bool with_relu = false);
-
-        std::shared_ptr<Node>
-            ScaledQuantizedConvolutionBiasSignedAdd(const Output<Node>& input,
-                                                    const Output<Node>& filters,
-                                                    const Output<Node>& bias,
-                                                    const Output<Node>& sum_input,
-                                                    const Strides& window_movement_strides,
-                                                    const Strides& window_dilation_strides,
-                                                    const CoordinateDiff& padding_below,
-                                                    const CoordinateDiff& padding_above,
-                                                    const Strides& data_dilation_strides,
-                                                    const Output<Node>& min_input,
-                                                    const Output<Node>& max_input,
-                                                    const Output<Node>& min_filter,
-                                                    const Output<Node>& max_filter,
-                                                    const Output<Node>& min_output,
-                                                    const Output<Node>& max_output,
-                                                    const Output<Node>& min_sum_input,
-                                                    const Output<Node>& max_sum_input,
-                                                    const bool with_relu = false);
-
-        std::shared_ptr<Node> ScaledQuantizedDotBias(const Output<Node>& input,
-                                                     const Output<Node>& filters,
-                                                     const Output<Node>& bias,
-                                                     const Output<Node>& min_input,
-                                                     const Output<Node>& max_input,
-                                                     const Output<Node>& min_filter,
-                                                     const Output<Node>& max_filter,
-                                                     const Output<Node>& min_output,
-                                                     const Output<Node>& max_output,
-                                                     const bool requantize = true,
-                                                     const bool with_relu = false);
-
-    } // namespace builder
-} // namespace ngraph
--- a/src/ngraph/builder/quantization/quantized_linear_convolution.cpp
+++ b/src/ngraph/builder/quantization/quantized_linear_convolution.cpp
@@ -17,7 +17,6 @@
 #include "ngraph/builder/quantization/quantized_linear_convolution.hpp"
 #include "ngraph/axis_set.hpp"
 #include "ngraph/builder/make_constant.hpp"
-#include "ngraph/builder/quantization.hpp"
 #include "ngraph/op/constant.hpp"
 #include "ngraph/op/convolution.hpp"
 #include "ngraph/op/dequantize.hpp"

--- a/src/ngraph/builder/quantization_util.hpp
+++ b/src/ngraph/builder/quantization_util.hpp
-//*****************************************************************************
-// Copyright 2017-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-
-#pragma once
-
-#include <limits>
-#include <vector>
-#include "ngraph/builder/make_constant.hpp"
-#include "ngraph/node.hpp"
-#include "ngraph/op/abs.hpp"
-#include "ngraph/op/add.hpp"
-#include "ngraph/op/broadcast.hpp"
-#include "ngraph/op/constant.hpp"
-#include "ngraph/op/divide.hpp"
-#include "ngraph/op/maximum.hpp"
-#include "ngraph/op/minimum.hpp"
-#include "ngraph/op/multiply.hpp"
-#include "ngraph/op/subtract.hpp"
-#include "ngraph/util.hpp"
-
-namespace ngraph
-{
-    namespace builder
-    {
-        namespace quantization_util
-        {
-            std::shared_ptr<Node> max_abs(Output<Node> a, Output<Node> b)
-            {
-                auto abs_a = std::make_shared<op::Abs>(a);
-                auto abs_b = std::make_shared<op::Abs>(b);
-                return std::make_shared<op::Maximum>(abs_a, abs_b);
-            }
-
-            std::pair<std::shared_ptr<Node>, std::shared_ptr<Node>>
-                quantization_range_for_multiplication(Output<Node> min_a,
-                                                      Output<Node> max_a,
-                                                      Output<Node> min_b,
-                                                      Output<Node> max_b)
-            {
-                auto type = min_a.get_element_type();
-                if (type != max_a.get_element_type() || type != min_b.get_element_type() ||
-                    type != max_b.get_element_type())
-                {
-                    throw ngraph_error(
-                        "quantization_range_for_multiplication: min and max must have same type");
-                }
-
-                auto shape = min_a.get_shape();
-                if (shape != max_a.get_shape() || shape != min_b.get_shape() ||
-                    shape != max_b.get_shape())
-                {
-                    throw ngraph_error(
-                        "quantization_range_for_multiplication: min and max must have same shape");
-                }
-
-                auto u8_range = make_constant(type,
-                                              shape,
-                                              std::numeric_limits<uint8_t>::max() -
-                                                  std::numeric_limits<uint8_t>::min());
-                auto i8_range = make_constant(type,
-                                              shape,
-                                              std::numeric_limits<int8_t>::max() -
-                                                  std::numeric_limits<int8_t>::min());
-
-                auto a_one_quant_level = (max_a - min_a) / u8_range;
-                auto b_one_quant_level = (max_b - min_b) / i8_range;
-                auto c_one_quant_level = a_one_quant_level * b_one_quant_level;
-
-                auto i32_min = make_constant(type, shape, std::numeric_limits<int32_t>::min());
-                auto i32_max = make_constant(type, shape, std::numeric_limits<int32_t>::max());
-
-                auto min_c = c_one_quant_level * i32_min;
-                auto max_c = c_one_quant_level * i32_max;
-                return std::pair<std::shared_ptr<Node>, std::shared_ptr<Node>>(min_c, max_c);
-            }
-
-            std::shared_ptr<Node> get_scale(Output<Node> min_input,
-                                            Output<Node> max_input,
-                                            Output<Node> min_filter,
-                                            Output<Node> max_filter,
-                                            Output<Node> min_freezed_output,
-                                            Output<Node> max_freezed_output,
-                                            const ngraph::element::Type& output_type)
-            {
-                auto type = min_input.get_element_type();
-                if (type != max_input.get_element_type() || type != min_filter.get_element_type() ||
-                    type != max_filter.get_element_type() ||
-                    type != min_freezed_output.get_element_type() ||
-                    type != max_freezed_output.get_element_type())
-                {
-                    throw ngraph_error("get_scale: min and max must have same type");
-                }
-
-                auto shape = min_input.get_shape();
-                if (shape != max_input.get_shape() || shape != min_filter.get_shape() ||
-                    shape != max_filter.get_shape() || shape != min_freezed_output.get_shape() ||
-                    shape != max_freezed_output.get_shape())
-                {
-                    throw ngraph_error("get_scale: min and max must have same shape");
-                }
-
-                auto ranges = quantization_range_for_multiplication(
-                    min_input, max_input, min_filter, max_filter);
-
-                auto min_out_value = ranges.first;
-                auto max_out_value = ranges.second;
-
-                auto max_abs32 = max_abs(min_out_value, max_out_value);
-                auto max_abs8 = max_abs(min_freezed_output, max_freezed_output);
-
-                // The output of int8 convolution is accumalated in int32.
-                // Mkldnn needs a scale to requantize the output back to {u}int8 based on
-                // if relu is fused or not.
-
-                // Equation to go from f32 to s32. std::pow(2, 31)/ max_abs32 can be thought of
-                // as the scale used for the quantization..
-                // 1. s32 = f32 * std::pow(2, 31)/ max_abs32;
-
-                // Equation to go from f32 to u8.
-                // 2. u8 = f32 * std::pow(2, 8)/ max_abs8;
-
-                // Equation to go from f32 to s8.
-                // 3. s8 = f32 * std::pow(2, 7)/ max_abs8;
-
-                // Replacing f32 from eq 1 in eq 2.
-                // 4. u8 = s32 * std::pow(2, -23) * max_abs32 / max_abs8;
-
-                // Replacing f32 from eq 1 in eq 3.
-                // 5. s8 = s32 * std::pow(2, -24) * max_abs32 / max_abs8;
-
-                return make_constant(
-                           type, shape, std::pow(2, (output_type == element::i8) ? -24 : -23)) *
-                       (max_abs32 / max_abs8);
-            }
-
-            std::shared_ptr<Node> get_bias_scale(Output<Node> min_input,
-                                                 Output<Node> max_input,
-                                                 Output<Node> min_filter,
-                                                 Output<Node> max_filter)
-            {
-                auto type = min_input.get_element_type();
-                if (type != max_input.get_element_type() || type != min_filter.get_element_type() ||
-                    type != max_filter.get_element_type())
-                {
-                    throw ngraph_error("get_bias_scale: min and max must have same type");
-                }
-
-                auto shape = min_input.get_shape();
-                if (shape != max_input.get_shape() || shape != min_filter.get_shape() ||
-                    shape != max_filter.get_shape())
-                {
-                    throw ngraph_error("get_bias_scale: min and max must have same shape");
-                }
-
-                auto max_abs_input_range = max_abs(min_input, max_input);
-                auto max_abs_filter_range = max_abs(min_filter, max_filter);
-                auto range = make_constant(type,
-                                           shape,
-                                           std::numeric_limits<uint8_t>::max() *
-                                               std::numeric_limits<int8_t>::max());
-
-                // Inverting the scale calculation here as the Quantize op passes scale as 1/scale.
-                return (max_abs_input_range * max_abs_filter_range) / range;
-            }
-
-            std::shared_ptr<Node> get_sum_scale(Output<Node> min_freezed_output_conv_1,
-                                                Output<Node> max_freezed_output_conv_1,
-                                                Output<Node> min_freezed_output_conv_2,
-                                                Output<Node> max_freezed_output_conv_2)
-            {
-                auto type = min_freezed_output_conv_1.get_element_type();
-                if (type != max_freezed_output_conv_1.get_element_type() ||
-                    type != min_freezed_output_conv_2.get_element_type() ||
-                    type != max_freezed_output_conv_2.get_element_type())
-                {
-                    throw ngraph_error("get_sum_scale: min and max must have same type");
-                }
-
-                auto shape = min_freezed_output_conv_1.get_shape();
-                if (shape != max_freezed_output_conv_1.get_shape() ||
-                    shape != min_freezed_output_conv_2.get_shape() ||
-                    shape != max_freezed_output_conv_2.get_shape())
-                {
-                    throw ngraph_error("get_sum_scale: min and max must have same shape");
-                }
-
-                auto max_abs_conv_1 = max_abs(min_freezed_output_conv_1, max_freezed_output_conv_1);
-                auto max_abs_conv_2 = max_abs(min_freezed_output_conv_2, max_freezed_output_conv_2);
-                return max_abs_conv_2 / max_abs_conv_1;
-            }
-
-            std::shared_ptr<Node> get_scale(Output<Node> input_min_range,
-                                            Output<Node> input_max_range,
-                                            const ngraph::element::Type& quant_type,
-                                            bool bump_by_eps = false)
-            {
-                auto type = input_min_range.get_element_type();
-                if (type != input_max_range.get_element_type())
-                {
-                    throw ngraph_error("get_scale: min and max must have same type");
-                }
-
-                auto shape = input_min_range.get_shape();
-                if (shape != input_max_range.get_shape())
-                {
-                    throw ngraph_error("get_scale: min and max must have same shape");
-                }
-
-                auto min_range = input_min_range;
-                auto max_range = input_max_range;
-
-                if (bump_by_eps)
-                {
-                    auto zero = make_constant(type, shape, 0);
-                    min_range = std::make_shared<op::Minimum>(zero, input_min_range);
-
-                    auto max_abs_input_range = max_abs(input_min_range, input_max_range);
-
-                    auto one = make_constant(type, shape, 1);
-                    auto hundred = make_constant(type, shape, 100);
-                    auto epsilon =
-                        std::make_shared<op::Maximum>(one, max_abs_input_range) / hundred;
-
-                    max_range = std::make_shared<op::Maximum>(input_max_range, min_range + epsilon);
-                    max_range = std::make_shared<op::Maximum>(zero, max_range);
-                }
-
-                size_t bw = quant_type.bitwidth();
-                float range = static_cast<float>(
-                    (quant_type.is_signed() ? std::pow(2, (bw - 1)) : std::pow(2, bw)) - 1);
-
-                auto max_abs_range = max_abs(min_range, max_range);
-                auto target_range = make_constant(type, shape, range);
-
-                return max_abs_range / target_range;
-            }
-
-            void
-                check_concat(const NodeVector& args, const NodeVector& mins, const NodeVector& maxs)
-            {
-                auto size = args.size();
-                if (size != mins.size() || size != maxs.size())
-                {
-                    throw ngraph_error("Min and Max node vectors must be of same length");
-                }
-                for (size_t i = 0; i < size; i++)
-                {
-                    auto min = mins[i];
-                    auto max = maxs[i];
-                    auto type = min->get_element_type();
-                    if (type != max->get_element_type())
-                    {
-                        throw ngraph_error("check_concat: min and max must have same type");
-                    }
-
-                    if (min->get_shape() != Shape{1} || max->get_shape() != Shape{1})
-                    {
-                        throw ngraph_error("check_concat: min/max shape not Shape{1}: " +
-                                           vector_to_string(min->get_shape()) +
-                                           vector_to_string(max->get_shape()));
-                    }
-                }
-            }
-
-            std::shared_ptr<Node> get_dot_scale(Output<Node> min_input,
-                                                Output<Node> max_input,
-                                                Output<Node> min_filter,
-                                                Output<Node> max_filter,
-                                                Output<Node> min_freezed_output,
-                                                Output<Node> max_freezed_output,
-                                                const ngraph::element::Type& input_type,
-                                                const ngraph::element::Type& output_type,
-                                                const bool requantize = true)
-            {
-                auto type = min_input.get_element_type();
-                if (type != max_input.get_element_type() || type != min_filter.get_element_type() ||
-                    type != max_filter.get_element_type() ||
-                    type != min_freezed_output.get_element_type() ||
-                    type != max_freezed_output.get_element_type())
-                {
-                    throw ngraph_error("get_dot_scale: min and max must have same type");
-                }
-
-                auto shape = min_input.get_shape();
-                if (shape != max_input.get_shape() || shape != min_filter.get_shape() ||
-                    shape != max_filter.get_shape() || shape != min_freezed_output.get_shape() ||
-                    shape != max_freezed_output.get_shape())
-                {
-                    throw ngraph_error("get_dot_scale: min and max must have same shape");
-                }
-                auto data_scale = get_scale(min_input, max_input, input_type);
-                auto weight_scale = get_scale(min_filter, max_filter, element::i8);
-                auto out_scale = get_scale(min_freezed_output, max_freezed_output, output_type);
-                if (requantize)
-                {
-                    return data_scale * weight_scale / out_scale;
-                }
-                else
-                {
-                    return data_scale * weight_scale;
-                }
-            }
-
-        } // namespace quantization_util
-    }     // namespace builder
-} // namespace ngraph
--- a/src/ngraph/builder/quantization_utils.cpp
+++ b/src/ngraph/builder/quantization_utils.cpp
@@ -74,6 +74,128 @@ namespace ngraph

                return max_abs_range / target_range;
            }
+
+            std::shared_ptr<Node> get_bias_scale(Output<Node> min_input,
+                                                 Output<Node> max_input,
+                                                 Output<Node> min_filter,
+                                                 Output<Node> max_filter)
+            {
+                auto type = min_input.get_element_type();
+                if (type != max_input.get_element_type() || type != min_filter.get_element_type() ||
+                    type != max_filter.get_element_type())
+                {
+                    throw ngraph_error("get_bias_scale: min and max must have same type");
+                }
+
+                auto shape = min_input.get_shape();
+                if (shape != max_input.get_shape() || shape != min_filter.get_shape() ||
+                    shape != max_filter.get_shape())
+                {
+                    throw ngraph_error("get_bias_scale: min and max must have same shape");
+                }
+
+                auto max_abs_input_range = max_abs(min_input, max_input);
+                auto max_abs_filter_range = max_abs(min_filter, max_filter);
+                auto range = make_constant(type,
+                                           shape,
+                                           std::numeric_limits<uint8_t>::max() *
+                                               std::numeric_limits<int8_t>::max());
+
+                // Inverting the scale calculation here as the Quantize op passes scale as 1/scale.
+                return (max_abs_input_range * max_abs_filter_range) / range;
+            }
+
+            std::shared_ptr<Node> get_sum_scale(Output<Node> min_freezed_output_conv_1,
+                                                Output<Node> max_freezed_output_conv_1,
+                                                Output<Node> min_freezed_output_conv_2,
+                                                Output<Node> max_freezed_output_conv_2)
+            {
+                auto type = min_freezed_output_conv_1.get_element_type();
+                if (type != max_freezed_output_conv_1.get_element_type() ||
+                    type != min_freezed_output_conv_2.get_element_type() ||
+                    type != max_freezed_output_conv_2.get_element_type())
+                {
+                    throw ngraph_error("get_sum_scale: min and max must have same type");
+                }
+
+                auto shape = min_freezed_output_conv_1.get_shape();
+                if (shape != max_freezed_output_conv_1.get_shape() ||
+                    shape != min_freezed_output_conv_2.get_shape() ||
+                    shape != max_freezed_output_conv_2.get_shape())
+                {
+                    throw ngraph_error("get_sum_scale: min and max must have same shape");
+                }
+
+                auto max_abs_conv_1 = max_abs(min_freezed_output_conv_1, max_freezed_output_conv_1);
+                auto max_abs_conv_2 = max_abs(min_freezed_output_conv_2, max_freezed_output_conv_2);
+                return max_abs_conv_2 / max_abs_conv_1;
+            }
+
+            std::shared_ptr<Node> get_dot_scale(Output<Node> min_input,
+                                                Output<Node> max_input,
+                                                Output<Node> min_filter,
+                                                Output<Node> max_filter,
+                                                Output<Node> min_freezed_output,
+                                                Output<Node> max_freezed_output,
+                                                const ngraph::element::Type& input_type,
+                                                const ngraph::element::Type& output_type,
+                                                const bool requantize)
+            {
+                auto type = min_input.get_element_type();
+                if (type != max_input.get_element_type() || type != min_filter.get_element_type() ||
+                    type != max_filter.get_element_type() ||
+                    type != min_freezed_output.get_element_type() ||
+                    type != max_freezed_output.get_element_type())
+                {
+                    throw ngraph_error("get_dot_scale: min and max must have same type");
+                }
+
+                auto shape = min_input.get_shape();
+                if (shape != max_input.get_shape() || shape != min_filter.get_shape() ||
+                    shape != max_filter.get_shape() || shape != min_freezed_output.get_shape() ||
+                    shape != max_freezed_output.get_shape())
+                {
+                    throw ngraph_error("get_dot_scale: min and max must have same shape");
+                }
+                auto data_scale = get_scale(min_input, max_input, input_type);
+                auto weight_scale = get_scale(min_filter, max_filter, element::i8);
+                auto out_scale = get_scale(min_freezed_output, max_freezed_output, output_type);
+                if (requantize)
+                {
+                    return data_scale * weight_scale / out_scale;
+                }
+                else
+                {
+                    return data_scale * weight_scale;
+                }
+            }
+
+            void
+                check_concat(const NodeVector& args, const NodeVector& mins, const NodeVector& maxs)
+            {
+                auto size = args.size();
+                if (size != mins.size() || size != maxs.size())
+                {
+                    throw ngraph_error("Min and Max node vectors must be of same length");
+                }
+                for (size_t i = 0; i < size; i++)
+                {
+                    auto min = mins[i];
+                    auto max = maxs[i];
+                    auto type = min->get_element_type();
+                    if (type != max->get_element_type())
+                    {
+                        throw ngraph_error("check_concat: min and max must have same type");
+                    }
+
+                    if (min->get_shape() != Shape{1} || max->get_shape() != Shape{1})
+                    {
+                        throw ngraph_error("check_concat: min/max shape not Shape{1}: " +
+                                           vector_to_string(min->get_shape()) +
+                                           vector_to_string(max->get_shape()));
+                    }
+                }
+            }
        }
    }
 }
--- a/src/ngraph/builder/quantization_utils.hpp
+++ b/src/ngraph/builder/quantization_utils.hpp
@@ -43,6 +43,30 @@ namespace ngraph
                                            const Output<Node>& input_max_range,
                                            const ngraph::element::Type& quant_type,
                                            bool bump_by_eps = false);
+
+            std::shared_ptr<Node> get_bias_scale(Output<Node> min_input,
+                                                 Output<Node> max_input,
+                                                 Output<Node> min_filter,
+                                                 Output<Node> max_filter);
+
+            std::shared_ptr<Node> get_sum_scale(Output<Node> min_freezed_output_conv_1,
+                                                Output<Node> max_freezed_output_conv_1,
+                                                Output<Node> min_freezed_output_conv_2,
+                                                Output<Node> max_freezed_output_conv_2);
+
+            std::shared_ptr<Node> get_dot_scale(Output<Node> min_input,
+                                                Output<Node> max_input,
+                                                Output<Node> min_filter,
+                                                Output<Node> max_filter,
+                                                Output<Node> min_freezed_output,
+                                                Output<Node> max_freezed_output,
+                                                const ngraph::element::Type& input_type,
+                                                const ngraph::element::Type& output_type,
+                                                const bool requantize = true);
+
+            void check_concat(const NodeVector& args,
+                              const NodeVector& mins,
+                              const NodeVector& maxs);
        }
    }
 }
--- a/src/ngraph/builder/quantize_builder.cpp
+++ b/src/ngraph/builder/quantize_builder.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <memory>
+
+#include "ngraph/builder/quantize_builder.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+namespace ngraph
+{
+    namespace builder
+    {
+        shared_ptr<Node> QuantizeBuilder(const Output<Node>& input,
+                                         const Output<Node>& min,
+                                         const Output<Node>& max,
+                                         const ngraph::element::Type& quant_type,
+                                         const ngraph::AxisSet& axes,
+                                         op::Quantize::RoundMode round_mode)
+        {
+            auto real_type = input.get_element_type();
+
+            if (min.get_element_type() != real_type)
+            {
+                throw ngraph_error("QuantizeBuilder: min must match input type");
+            }
+
+            if (max.get_element_type() != real_type)
+            {
+                throw ngraph_error("QuantizeBuilder: max must match input type");
+            }
+
+            auto shape = min.get_shape();
+            if (shape != max.get_shape())
+            {
+                throw ngraph_error("QuantizeBuilder: min and max must have same shape");
+            }
+
+            auto zero = make_constant(quant_type, shape, 0);
+            auto scale = quantization_utils::get_scale(min, max, quant_type, true);
+            return make_shared<op::Quantize>(input, scale, zero, quant_type, axes, round_mode);
+        }
+    }
+}
--- a/src/ngraph/builder/quantize_builder.hpp
+++ b/src/ngraph/builder/quantize_builder.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "ngraph/builder/make_constant.hpp"
+#include "ngraph/coordinate_diff.hpp"
+#include "ngraph/node.hpp"
+#include "ngraph/op/quantize.hpp"
+#include "quantization_utils.hpp"
+
+namespace ngraph
+{
+    namespace builder
+    {
+        std::shared_ptr<Node> QuantizeBuilder(const Output<Node>& input,
+                                              const Output<Node>& min,
+                                              const Output<Node>& max,
+                                              const ngraph::element::Type& quant_type,
+                                              const ngraph::AxisSet& axes,
+                                              op::Quantize::RoundMode round_mode);
+    }
+}
--- a/src/ngraph/builder/quantized_concat_builder.cpp
+++ b/src/ngraph/builder/quantized_concat_builder.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <memory>
+
+#include "ngraph/builder/quantized_concat_builder.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+namespace ngraph
+{
+    namespace builder
+    {
+        shared_ptr<Node> QuantizedConcatBuilder(const NodeVector& args,
+                                                size_t concatenation_axis,
+                                                const NodeVector& mins,
+                                                const NodeVector& maxs)
+        {
+            quantization_utils::check_concat(args, mins, maxs);
+            auto quant_type = args[0]->get_element_type();
+
+            // output scale
+            auto min = make_shared<op::Min>(make_shared<op::Concat>(mins, 0), ngraph::AxisSet{0});
+            auto max = make_shared<op::Max>(make_shared<op::Concat>(maxs, 0), ngraph::AxisSet{0});
+            auto out_scale = quantization_utils::get_scale(min, max, quant_type);
+
+            NodeVector rescaled_args(args.size());
+            for (size_t i = 0; i < args.size(); ++i)
+            {
+                auto q_type = args[i]->get_element_type();
+                auto in_scale = make_shared<ngraph::op::Reshape>(
+                    quantization_utils::get_scale(mins[i], maxs[i], q_type),
+                    AxisVector{0},
+                    Shape{});
+                auto zero = make_constant(q_type, in_scale->get_shape(), 0);
+
+                rescaled_args[i] =
+                    make_shared<op::Dequantize>(args[i], in_scale, zero, element::f32, AxisSet{});
+                rescaled_args[i] =
+                    make_shared<op::Quantize>(rescaled_args[i],
+                                              out_scale,
+                                              zero,
+                                              q_type,
+                                              AxisSet{},
+                                              op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN);
+            }
+
+            return make_shared<op::Concat>(rescaled_args, concatenation_axis);
+        }
+    }
+}
--- a/src/ngraph/builder/quantized_concat_builder.hpp
+++ b/src/ngraph/builder/quantized_concat_builder.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "ngraph/builder/make_constant.hpp"
+#include "ngraph/coordinate_diff.hpp"
+#include "ngraph/node.hpp"
+#include "ngraph/op/concat.hpp"
+#include "ngraph/op/dequantize.hpp"
+#include "ngraph/op/max.hpp"
+#include "ngraph/op/min.hpp"
+#include "ngraph/op/quantize.hpp"
+#include "ngraph/op/reshape.hpp"
+#include "quantization_utils.hpp"
+
+namespace ngraph
+{
+    namespace builder
+    {
+        std::shared_ptr<Node> QuantizedConcatBuilder(const NodeVector& args,
+                                                     size_t concatenation_axis,
+                                                     const NodeVector& mins,
+                                                     const NodeVector& maxs);
+    }
+}
--- a/src/ngraph/builder/quantized_conv_builder.cpp
+++ b/src/ngraph/builder/quantized_conv_builder.cpp
@@ -17,7 +17,6 @@
 #include <memory>

 #include "ngraph/builder/quantized_conv_builder.hpp"
-#include "ngraph/op/constant.hpp"

 using namespace std;
 using namespace ngraph;
@@ -74,5 +73,209 @@ namespace ngraph
                filter_axes,
                output_axes);
        }
+
+        shared_ptr<Node> QuantizedConvolutionBiasBuilder(const Output<Node>& input,
+                                                         const Output<Node>& filters,
+                                                         const Output<Node>& bias,
+                                                         const Strides& window_movement_strides,
+                                                         const Strides& window_dilation_strides,
+                                                         const CoordinateDiff& padding_below,
+                                                         const CoordinateDiff& padding_above,
+                                                         const Strides& data_dilation_strides,
+                                                         const Output<Node>& min_input,
+                                                         const Output<Node>& max_input,
+                                                         const Output<Node>& min_filter,
+                                                         const Output<Node>& max_filter,
+                                                         const Output<Node>& min_output,
+                                                         const Output<Node>& max_output,
+                                                         const bool with_relu)
+        {
+            auto output_et = with_relu ? element::u8 : element::i8;
+            auto input_scale =
+                quantization_utils::get_scale(min_input, max_input, input.get_element_type());
+            auto filter_scale =
+                quantization_utils::get_scale(min_filter, max_filter, filters.get_element_type());
+            auto output_scale = quantization_utils::get_scale(min_output, max_output, output_et);
+            auto requantization_scale = input_scale * filter_scale / output_scale;
+
+            auto mybias = bias;
+            if (bias.get_element_type() != element::i32)
+            {
+                auto zero = make_constant(element::i32, min_input.get_shape(), 0);
+                AxisSet quantization_axes;
+                auto bias_scale = quantization_utils::get_bias_scale(
+                    min_input, max_input, min_filter, max_filter);
+                op::Quantize::RoundMode round_mode =
+                    op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
+
+                mybias = make_shared<op::Quantize>(
+                    bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
+            }
+
+            return make_shared<op::QuantizedConvolutionBias>(input,
+                                                             filters,
+                                                             mybias,
+                                                             window_movement_strides,
+                                                             window_dilation_strides,
+                                                             padding_below,
+                                                             padding_above,
+                                                             data_dilation_strides,
+                                                             requantization_scale,
+                                                             with_relu);
+        }
+
+        shared_ptr<Node> QuantizedConvolutionReluBuilder(const Output<Node>& input,
+                                                         const Output<Node>& filters,
+                                                         const Strides& window_movement_strides,
+                                                         const Strides& window_dilation_strides,
+                                                         const CoordinateDiff& padding_below,
+                                                         const CoordinateDiff& padding_above,
+                                                         const Strides& data_dilation_strides,
+                                                         const Output<Node>& min_input,
+                                                         const Output<Node>& max_input,
+                                                         const Output<Node>& min_filter,
+                                                         const Output<Node>& max_filter,
+                                                         const Output<Node>& min_output,
+                                                         const Output<Node>& max_output)
+        {
+            auto input_scale =
+                quantization_utils::get_scale(min_input, max_input, input.get_element_type());
+            auto filter_scale =
+                quantization_utils::get_scale(min_filter, max_filter, filters.get_element_type());
+            auto output_scale = quantization_utils::get_scale(min_output, max_output, element::u8);
+            auto requantization_scale = input_scale * filter_scale / output_scale;
+
+            return make_shared<op::QuantizedConvolutionRelu>(input,
+                                                             filters,
+                                                             window_movement_strides,
+                                                             window_dilation_strides,
+                                                             padding_below,
+                                                             padding_above,
+                                                             data_dilation_strides,
+                                                             requantization_scale);
+        }
+
+        shared_ptr<Node> QuantizedConvolutionBiasAddBuilder(const Output<Node>& input,
+                                                            const Output<Node>& filters,
+                                                            const Output<Node>& bias,
+                                                            const Output<Node>& sum_input,
+                                                            const Strides& window_movement_strides,
+                                                            const Strides& window_dilation_strides,
+                                                            const CoordinateDiff& padding_below,
+                                                            const CoordinateDiff& padding_above,
+                                                            const Strides& data_dilation_strides,
+                                                            const Output<Node>& min_input,
+                                                            const Output<Node>& max_input,
+                                                            const Output<Node>& min_filter,
+                                                            const Output<Node>& max_filter,
+                                                            const Output<Node>& min_output,
+                                                            const Output<Node>& max_output,
+                                                            const Output<Node>& min_sum_input,
+                                                            const Output<Node>& max_sum_input,
+                                                            const bool with_relu)
+        {
+            auto output_et = with_relu ? element::u8 : element::i8;
+            auto input_scale =
+                quantization_utils::get_scale(min_input, max_input, input.get_element_type());
+            auto filter_scale =
+                quantization_utils::get_scale(min_filter, max_filter, filters.get_element_type());
+            auto output_scale = quantization_utils::get_scale(min_output, max_output, output_et);
+            auto requantization_scale = input_scale * filter_scale / output_scale;
+
+            auto sum_scale = builder::quantization_utils::get_sum_scale(
+                min_output, max_output, min_sum_input, max_sum_input);
+
+            auto mybias = bias;
+            if (bias.get_element_type() != element::i32)
+            {
+                auto zero = make_constant(element::i32, min_input.get_shape(), 0);
+                AxisSet quantization_axes;
+                auto bias_scale = quantization_utils::get_bias_scale(
+                    min_input, max_input, min_filter, max_filter);
+                op::Quantize::RoundMode round_mode =
+                    op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
+
+                mybias = make_shared<op::Quantize>(
+                    bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
+            }
+
+            return make_shared<op::QuantizedConvolutionBiasAdd>(input,
+                                                                filters,
+                                                                mybias,
+                                                                sum_input,
+                                                                window_movement_strides,
+                                                                window_dilation_strides,
+                                                                padding_below,
+                                                                padding_above,
+                                                                data_dilation_strides,
+                                                                requantization_scale,
+                                                                sum_scale,
+                                                                with_relu);
+        }
+
+        shared_ptr<Node>
+            QuantizedConvolutionBiasSignedAddBuilder(const Output<Node>& input,
+                                                     const Output<Node>& filters,
+                                                     const Output<Node>& bias,
+                                                     const Output<Node>& sum_input,
+                                                     const Strides& window_movement_strides,
+                                                     const Strides& window_dilation_strides,
+                                                     const CoordinateDiff& padding_below,
+                                                     const CoordinateDiff& padding_above,
+                                                     const Strides& data_dilation_strides,
+                                                     const Output<Node>& min_input,
+                                                     const Output<Node>& max_input,
+                                                     const Output<Node>& min_filter,
+                                                     const Output<Node>& max_filter,
+                                                     const Output<Node>& min_output,
+                                                     const Output<Node>& max_output,
+                                                     const Output<Node>& min_sum_input,
+                                                     const Output<Node>& max_sum_input,
+                                                     const bool with_relu)
+        {
+            auto output_et = with_relu ? element::u8 : element::i8;
+            auto input_scale =
+                quantization_utils::get_scale(min_input, max_input, input.get_element_type());
+            auto filter_scale =
+                quantization_utils::get_scale(min_filter, max_filter, filters.get_element_type());
+            auto output_scale = quantization_utils::get_scale(min_output, max_output, output_et);
+            auto requantization_scale = input_scale * filter_scale / output_scale;
+
+            auto sum_scale = builder::quantization_utils::get_sum_scale(
+                min_output, max_output, min_sum_input, max_sum_input);
+            if (output_et == element::u8)
+            {
+                // Need to multiply by two to account for u8 requantization_scale
+                auto two = make_constant(element::f32, sum_scale->get_shape(), 2.0f);
+                sum_scale = two * sum_scale;
+            }
+
+            auto mybias = bias;
+            if (bias.get_element_type() != element::i32)
+            {
+                auto zero = make_constant(element::i32, min_input.get_shape(), 0);
+                AxisSet quantization_axes;
+                auto bias_scale = quantization_utils::get_bias_scale(
+                    min_input, max_input, min_filter, max_filter);
+                op::Quantize::RoundMode round_mode =
+                    op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
+
+                mybias = make_shared<op::Quantize>(
+                    bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
+            }
+            auto qconv = make_shared<op::QuantizedConvolutionBiasSignedAdd>(input,
+                                                                            filters,
+                                                                            mybias,
+                                                                            sum_input,
+                                                                            window_movement_strides,
+                                                                            window_dilation_strides,
+                                                                            padding_below,
+                                                                            padding_above,
+                                                                            data_dilation_strides,
+                                                                            requantization_scale,
+                                                                            sum_scale,
+                                                                            with_relu);
+            return make_shared<op::Convert>(qconv, element::u8);
+        }
    }
 }
--- a/src/ngraph/builder/quantized_conv_builder.hpp
+++ b/src/ngraph/builder/quantized_conv_builder.hpp
@@ -18,6 +18,11 @@

 #include "ngraph/coordinate_diff.hpp"
 #include "ngraph/node.hpp"
+#include "ngraph/op/constant.hpp"
+#include "ngraph/op/convert.hpp"
+#include "ngraph/op/experimental/quantized_conv_bias.hpp"
+#include "ngraph/op/experimental/quantized_conv_relu.hpp"
+#include "ngraph/op/quantize.hpp"
 #include "ngraph/op/quantized_convolution.hpp"
 #include "quantization_utils.hpp"

@@ -43,5 +48,77 @@ namespace ngraph
                                        const ngraph::AxisSet& input_axes = ngraph::AxisSet{},
                                        const ngraph::AxisSet& filter_axes = ngraph::AxisSet{},
                                        const ngraph::AxisSet& output_axes = ngraph::AxisSet{});
+
+        std::shared_ptr<Node>
+            QuantizedConvolutionBiasBuilder(const Output<Node>& input,
+                                            const Output<Node>& filters,
+                                            const Output<Node>& bias,
+                                            const Strides& window_movement_strides,
+                                            const Strides& window_dilation_strides,
+                                            const CoordinateDiff& padding_below,
+                                            const CoordinateDiff& padding_above,
+                                            const Strides& data_dilation_strides,
+                                            const Output<Node>& min_input,
+                                            const Output<Node>& max_input,
+                                            const Output<Node>& min_filter,
+                                            const Output<Node>& max_filter,
+                                            const Output<Node>& min_output,
+                                            const Output<Node>& max_output,
+                                            const bool with_relu = false);
+
+        std::shared_ptr<Node>
+            QuantizedConvolutionReluBuilder(const Output<Node>& input,
+                                            const Output<Node>& filters,
+                                            const Strides& window_movement_strides,
+                                            const Strides& window_dilation_strides,
+                                            const CoordinateDiff& padding_below,
+                                            const CoordinateDiff& padding_above,
+                                            const Strides& data_dilation_strides,
+                                            const Output<Node>& min_input,
+                                            const Output<Node>& max_input,
+                                            const Output<Node>& min_filter,
+                                            const Output<Node>& max_filter,
+                                            const Output<Node>& min_output,
+                                            const Output<Node>& max_output);
+
+        std::shared_ptr<Node>
+            QuantizedConvolutionBiasAddBuilder(const Output<Node>& input,
+                                               const Output<Node>& filters,
+                                               const Output<Node>& bias,
+                                               const Output<Node>& sum_input,
+                                               const Strides& window_movement_strides,
+                                               const Strides& window_dilation_strides,
+                                               const CoordinateDiff& padding_below,
+                                               const CoordinateDiff& padding_above,
+                                               const Strides& data_dilation_strides,
+                                               const Output<Node>& min_input,
+                                               const Output<Node>& max_input,
+                                               const Output<Node>& min_filter,
+                                               const Output<Node>& max_filter,
+                                               const Output<Node>& min_output,
+                                               const Output<Node>& max_output,
+                                               const Output<Node>& min_sum_input,
+                                               const Output<Node>& max_sum_input,
+                                               const bool with_relu = false);
+
+        std::shared_ptr<Node>
+            QuantizedConvolutionBiasSignedAddBuilder(const Output<Node>& input,
+                                                     const Output<Node>& filters,
+                                                     const Output<Node>& bias,
+                                                     const Output<Node>& sum_input,
+                                                     const Strides& window_movement_strides,
+                                                     const Strides& window_dilation_strides,
+                                                     const CoordinateDiff& padding_below,
+                                                     const CoordinateDiff& padding_above,
+                                                     const Strides& data_dilation_strides,
+                                                     const Output<Node>& min_input,
+                                                     const Output<Node>& max_input,
+                                                     const Output<Node>& min_filter,
+                                                     const Output<Node>& max_filter,
+                                                     const Output<Node>& min_output,
+                                                     const Output<Node>& max_output,
+                                                     const Output<Node>& min_sum_input,
+                                                     const Output<Node>& max_sum_input,
+                                                     const bool with_relu = false);
    }
 }
--- a/src/ngraph/builder/quantized_dot_builder.cpp
+++ b/src/ngraph/builder/quantized_dot_builder.cpp
@@ -17,7 +17,6 @@
 #include <memory>

 #include "ngraph/builder/quantized_dot_builder.hpp"
-#include "ngraph/op/constant.hpp"

 using namespace std;
 using namespace ngraph;
@@ -65,5 +64,45 @@ namespace ngraph
                                                 input1_axes,
                                                 output_axes);
        }
+
+        shared_ptr<Node> QuantizedDotBiasBuilder(const Output<Node>& input,
+                                                 const Output<Node>& filters,
+                                                 const Output<Node>& bias,
+                                                 const Output<Node>& min_input,
+                                                 const Output<Node>& max_input,
+                                                 const Output<Node>& min_filter,
+                                                 const Output<Node>& max_filter,
+                                                 const Output<Node>& min_output,
+                                                 const Output<Node>& max_output,
+                                                 const bool requantize,
+                                                 const bool with_relu)
+        {
+            auto requantization_scale =
+                quantization_utils::get_dot_scale(min_input,
+                                                  max_input,
+                                                  min_filter,
+                                                  max_filter,
+                                                  min_output,
+                                                  max_output,
+                                                  input.get_element_type(),
+                                                  with_relu ? element::u8 : element::i8,
+                                                  requantize);
+
+            auto mybias = bias;
+            if (bias.get_element_type() != element::i32)
+            {
+                auto zero = make_constant(element::i32, min_input.get_shape(), 0);
+                AxisSet quantization_axes;
+                auto bias_scale = quantization_utils::get_bias_scale(
+                    min_input, max_input, min_filter, max_filter);
+                op::Quantize::RoundMode round_mode =
+                    op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
+
+                mybias = make_shared<op::Quantize>(
+                    bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
+            }
+            return make_shared<op::QuantizedDotBias>(
+                input, filters, mybias, requantization_scale, requantize, with_relu);
+        }
    }
 }
--- a/src/ngraph/builder/quantized_dot_builder.hpp
+++ b/src/ngraph/builder/quantized_dot_builder.hpp
@@ -18,6 +18,9 @@

 #include "ngraph/coordinate_diff.hpp"
 #include "ngraph/node.hpp"
+#include "ngraph/op/constant.hpp"
+#include "ngraph/op/experimental/quantized_dot_bias.hpp"
+#include "ngraph/op/quantize.hpp"
 #include "ngraph/op/quantized_dot.hpp"
 #include "quantization_utils.hpp"

@@ -38,5 +41,17 @@ namespace ngraph
                                                  const ngraph::AxisSet& input0_axes,
                                                  const ngraph::AxisSet& input1_axes,
                                                  const ngraph::AxisSet& output_axes);
+
+        std::shared_ptr<Node> QuantizedDotBiasBuilder(const Output<Node>& input,
+                                                      const Output<Node>& filters,
+                                                      const Output<Node>& bias,
+                                                      const Output<Node>& min_input,
+                                                      const Output<Node>& max_input,
+                                                      const Output<Node>& min_filter,
+                                                      const Output<Node>& max_filter,
+                                                      const Output<Node>& min_output,
+                                                      const Output<Node>& max_output,
+                                                      const bool requantize = true,
+                                                      const bool with_relu = false);
    }
 }
--- a/src/ngraph/ngraph.hpp
+++ b/src/ngraph/ngraph.hpp
@@ -62,8 +62,12 @@ namespace ngraph
 ///        recipes, for example auto-broadcast.

 #include "ngraph/builder/autobroadcast.hpp"
+#include "ngraph/builder/dequantize_builder.hpp"
 #include "ngraph/builder/numpy_transpose.hpp"
+#include "ngraph/builder/quantize_builder.hpp"
+#include "ngraph/builder/quantized_concat_builder.hpp"
 #include "ngraph/builder/quantized_conv_builder.hpp"
+#include "ngraph/builder/quantized_dot_builder.hpp"
 #include "ngraph/builder/reduce_ops.hpp"
 #include "ngraph/builder/reshape.hpp"
 #include "ngraph/builder/tensor_mask.hpp"

--- a/test/builder_quantization.cpp
+++ b/test/builder_quantization.cpp
@@ -21,8 +21,10 @@
 #include <string>

 #include "gtest/gtest.h"
-#include "ngraph/builder/quantization.hpp"
+#include "ngraph/builder/dequantize_builder.hpp"
 #include "ngraph/builder/quantization/quantized_linear_convolution.hpp"
+#include "ngraph/builder/quantize_builder.hpp"
+#include "ngraph/builder/quantized_concat_builder.hpp"
 #include "ngraph/builder/quantized_conv_builder.hpp"
 #include "ngraph/builder/quantized_dot_builder.hpp"
 #include "ngraph/ngraph.hpp"
@@ -61,19 +63,19 @@ TEST(builder, scaled_QC_with_relu)
    auto F = op::Constant::create(element::f32, Shape{1}, {127.0f});
    auto G = op::Constant::create(element::f32, Shape{1}, {20.0f});
    auto H = op::Constant::create(element::f32, Shape{1}, {-24.0f});
-    auto CV = ngraph::builder::ScaledQuantizedConvolutionRelu(A,
-                                                              B,
-                                                              Strides{1, 1}, // move_strides
-                                                              Strides{1, 1}, // filter_dilation
-                                                              CoordinateDiff{1, 1}, // below_pads
-                                                              CoordinateDiff{1, 1}, // above_pads
-                                                              Strides{1, 1},        // data_dilation
-                                                              C,
-                                                              D,
-                                                              E,
-                                                              F,
-                                                              G,
-                                                              H);
+    auto CV = ngraph::builder::QuantizedConvolutionReluBuilder(A,
+                                                               B,
+                                                               Strides{1, 1}, // move_strides
+                                                               Strides{1, 1}, // filter_dilation
+                                                               CoordinateDiff{1, 1}, // below_pads
+                                                               CoordinateDiff{1, 1}, // above_pads
+                                                               Strides{1, 1}, // data_dilation
+                                                               C,
+                                                               D,
+                                                               E,
+                                                               F,
+                                                               G,
+                                                               H);
    auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B});
    constant_fold(f);
    auto backend = runtime::Backend::create("CPU");
@@ -103,19 +105,19 @@ TEST(builder, dynamic_scaled_QC_with_relu)
    auto F = make_shared<op::Parameter>(element::f32, Shape{1});
    auto G = make_shared<op::Parameter>(element::f32, Shape{1});
    auto H = make_shared<op::Parameter>(element::f32, Shape{1});
-    auto CV = ngraph::builder::ScaledQuantizedConvolutionRelu(A,
-                                                              B,
-                                                              Strides{1, 1}, // move_strides
-                                                              Strides{1, 1}, // filter_dilation
-                                                              CoordinateDiff{1, 1}, // below_pads
-                                                              CoordinateDiff{1, 1}, // above_pads
-                                                              Strides{1, 1},        // data_dilation
-                                                              C,
-                                                              D,
-                                                              E,
-                                                              F,
-                                                              G,
-                                                              H);
+    auto CV = ngraph::builder::QuantizedConvolutionReluBuilder(A,
+                                                               B,
+                                                               Strides{1, 1}, // move_strides
+                                                               Strides{1, 1}, // filter_dilation
+                                                               CoordinateDiff{1, 1}, // below_pads
+                                                               CoordinateDiff{1, 1}, // above_pads
+                                                               Strides{1, 1}, // data_dilation
+                                                               C,
+                                                               D,
+                                                               E,
+                                                               F,
+                                                               G,
+                                                               H);
    auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, C, D, E, F, G, H});
    auto backend = runtime::Backend::create("CPU");
    // Create some tensors for input/output
@@ -158,20 +160,20 @@ TEST(builder, scaled_QC_with_bias)
    auto F = op::Constant::create(element::f32, Shape{1}, {127.0f});
    auto G = op::Constant::create(element::f32, Shape{1}, {22.0f});
    auto H = op::Constant::create(element::f32, Shape{1}, {90.0f});
-    auto CV = ngraph::builder::ScaledQuantizedConvolutionBias(A,
-                                                              B,
-                                                              Bias,
-                                                              Strides{1, 1}, // move_strides
-                                                              Strides{1, 1}, // filter_dilation
-                                                              CoordinateDiff{1, 1}, // below_pads
-                                                              CoordinateDiff{1, 1}, // above_pads
-                                                              Strides{1, 1},        // data_dilation
-                                                              C,
-                                                              D,
-                                                              E,
-                                                              F,
-                                                              G,
-                                                              H);
+    auto CV = ngraph::builder::QuantizedConvolutionBiasBuilder(A,
+                                                               B,
+                                                               Bias,
+                                                               Strides{1, 1}, // move_strides
+                                                               Strides{1, 1}, // filter_dilation
+                                                               CoordinateDiff{1, 1}, // below_pads
+                                                               CoordinateDiff{1, 1}, // above_pads
+                                                               Strides{1, 1}, // data_dilation
+                                                               C,
+                                                               D,
+                                                               E,
+                                                               F,
+                                                               G,
+                                                               H);
    auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias});
    constant_fold(f);
    auto backend = runtime::Backend::create("CPU");
@@ -185,7 +187,7 @@ TEST(builder, scaled_QC_with_bias)
    auto result = backend->create_tensor(element::i8, shape_r);
    auto handle = backend->compile(f);
    handle->call_with_validate({result}, {a, b, c});
-    EXPECT_EQ((vector<int8_t>{38, 55, 50, 52, 61, 109, 127, 68, 54, 81, 68, 62}),
+    EXPECT_EQ((vector<int8_t>{38, 55, 49, 52, 61, 109, 127, 68, 54, 80, 68, 62}),
              read_vector<int8_t>(result));
 }

@@ -206,20 +208,20 @@ TEST(builder, dynamic_scaled_QC_with_bias)
    auto F = make_shared<op::Parameter>(element::f32, Shape{1});
    auto G = make_shared<op::Parameter>(element::f32, Shape{1});
    auto H = make_shared<op::Parameter>(element::f32, Shape{1});
-    auto CV = ngraph::builder::ScaledQuantizedConvolutionBias(A,
-                                                              B,
-                                                              Bias,
-                                                              Strides{1, 1}, // move_strides
-                                                              Strides{1, 1}, // filter_dilation
-                                                              CoordinateDiff{1, 1}, // below_pads
-                                                              CoordinateDiff{1, 1}, // above_pads
-                                                              Strides{1, 1},        // data_dilation
-                                                              C,
-                                                              D,
-                                                              E,
-                                                              F,
-                                                              G,
-                                                              H);
+    auto CV = ngraph::builder::QuantizedConvolutionBiasBuilder(A,
+                                                               B,
+                                                               Bias,
+                                                               Strides{1, 1}, // move_strides
+                                                               Strides{1, 1}, // filter_dilation
+                                                               CoordinateDiff{1, 1}, // below_pads
+                                                               CoordinateDiff{1, 1}, // above_pads
+                                                               Strides{1, 1}, // data_dilation
+                                                               C,
+                                                               D,
+                                                               E,
+                                                               F,
+                                                               G,
+                                                               H);
    auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias, C, D, E, F, G, H});
    auto backend = runtime::Backend::create("CPU");
    // Create some tensors for input/output
@@ -244,7 +246,7 @@ TEST(builder, dynamic_scaled_QC_with_bias)
    auto result = backend->create_tensor(element::i8, shape_r);
    auto handle = backend->compile(f);
    handle->call_with_validate({result}, {a, b, c, d, e, e_a, g, h, i});
-    EXPECT_EQ((vector<int8_t>{38, 55, 50, 52, 61, 109, 127, 68, 54, 81, 68, 62}),
+    EXPECT_EQ((vector<int8_t>{38, 55, 49, 52, 61, 109, 127, 68, 54, 80, 68, 62}),
              read_vector<int8_t>(result));
 }

@@ -265,21 +267,21 @@ TEST(builder, scaled_QC_with_bias_and_relu)
    auto F = op::Constant::create(element::f32, Shape{1}, {127.0f});
    auto G = op::Constant::create(element::f32, Shape{1}, {20.0f});
    auto H = op::Constant::create(element::f32, Shape{1}, {-24.0f});
-    auto CV = ngraph::builder::ScaledQuantizedConvolutionBias(A,
-                                                              B,
-                                                              Bias,
-                                                              Strides{1, 1}, // move_strides
-                                                              Strides{1, 1}, // filter_dilation
-                                                              CoordinateDiff{1, 1}, // below_pads
-                                                              CoordinateDiff{1, 1}, // above_pads
-                                                              Strides{1, 1},        // data_dilation
-                                                              C,
-                                                              D,
-                                                              E,
-                                                              F,
-                                                              G,
-                                                              H,
-                                                              true);
+    auto CV = ngraph::builder::QuantizedConvolutionBiasBuilder(A,
+                                                               B,
+                                                               Bias,
+                                                               Strides{1, 1}, // move_strides
+                                                               Strides{1, 1}, // filter_dilation
+                                                               CoordinateDiff{1, 1}, // below_pads
+                                                               CoordinateDiff{1, 1}, // above_pads
+                                                               Strides{1, 1}, // data_dilation
+                                                               C,
+                                                               D,
+                                                               E,
+                                                               F,
+                                                               G,
+                                                               H,
+                                                               true);
    auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias});
    constant_fold(f);
    auto backend = runtime::Backend::create("CPU");
@@ -317,24 +319,25 @@ TEST(builder, scaled_QC_with_bias_add_and_relu)
    auto H = op::Constant::create(element::f32, Shape{}, {90.0f});
    auto I = op::Constant::create(element::f32, Shape{}, {22.0f});
    auto J = op::Constant::create(element::f32, Shape{}, {180.0f});
-    auto CV = ngraph::builder::ScaledQuantizedConvolutionBiasAdd(A,
-                                                                 B,
-                                                                 Bias,
-                                                                 Add,
-                                                                 Strides{1, 1}, // move_strides
-                                                                 Strides{1, 1}, // filter_dilation
-                                                                 CoordinateDiff{1, 1}, // below_pads
-                                                                 CoordinateDiff{1, 1}, // above_pads
-                                                                 Strides{1, 1}, // data_dilation
-                                                                 C,
-                                                                 D,
-                                                                 E,
-                                                                 F,
-                                                                 G,
-                                                                 H,
-                                                                 I,
-                                                                 J,
-                                                                 true);
+    auto CV =
+        ngraph::builder::QuantizedConvolutionBiasAddBuilder(A,
+                                                            B,
+                                                            Bias,
+                                                            Add,
+                                                            Strides{1, 1},        // move_strides
+                                                            Strides{1, 1},        // filter_dilation
+                                                            CoordinateDiff{1, 1}, // below_pads
+                                                            CoordinateDiff{1, 1}, // above_pads
+                                                            Strides{1, 1},        // data_dilation
+                                                            C,
+                                                            D,
+                                                            E,
+                                                            F,
+                                                            G,
+                                                            H,
+                                                            I,
+                                                            J,
+                                                            true);
    auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias, Add});
    constant_fold(f);
    auto backend = runtime::Backend::create("CPU");
@@ -350,7 +353,7 @@ TEST(builder, scaled_QC_with_bias_add_and_relu)
    auto result = backend->create_tensor(element::u8, shape_r);
    auto handle = backend->compile(f);
    handle->call_with_validate({result}, {a, b, c, d});
-    EXPECT_EQ((vector<uint8_t>{78, 114, 105, 113, 132, 230, 255, 136, 110, 165, 142, 133}),
+    EXPECT_EQ((vector<uint8_t>{78, 114, 105, 113, 132, 230, 255, 136, 110, 166, 142, 133}),
              read_vector<uint8_t>(result));
 }

@@ -375,24 +378,25 @@ TEST(builder, dynamic_scaled_QC_with_bias_add_and_relu)
    auto H = make_shared<op::Parameter>(element::f32, Shape{1});
    auto I = make_shared<op::Parameter>(element::f32, Shape{1});
    auto J = make_shared<op::Parameter>(element::f32, Shape{1});
-    auto CV = ngraph::builder::ScaledQuantizedConvolutionBiasAdd(A,
-                                                                 B,
-                                                                 Bias,
-                                                                 Add,
-                                                                 Strides{1, 1}, // move_strides
-                                                                 Strides{1, 1}, // filter_dilation
-                                                                 CoordinateDiff{1, 1}, // below_pads
-                                                                 CoordinateDiff{1, 1}, // above_pads
-                                                                 Strides{1, 1}, // data_dilation
-                                                                 C,
-                                                                 D,
-                                                                 E,
-                                                                 F,
-                                                                 G,
-                                                                 H,
-                                                                 I,
-                                                                 J,
-                                                                 true);
+    auto CV =
+        ngraph::builder::QuantizedConvolutionBiasAddBuilder(A,
+                                                            B,
+                                                            Bias,
+                                                            Add,
+                                                            Strides{1, 1},        // move_strides
+                                                            Strides{1, 1},        // filter_dilation
+                                                            CoordinateDiff{1, 1}, // below_pads
+                                                            CoordinateDiff{1, 1}, // above_pads
+                                                            Strides{1, 1},        // data_dilation
+                                                            C,
+                                                            D,
+                                                            E,
+                                                            F,
+                                                            G,
+                                                            H,
+                                                            I,
+                                                            J,
+                                                            true);
    auto f = make_shared<Function>(NodeVector{CV},
                                   ParameterVector{A, B, Bias, Add, C, D, E, F, G, H, I, J});
    auto backend = runtime::Backend::create("CPU");
@@ -424,7 +428,7 @@ TEST(builder, dynamic_scaled_QC_with_bias_add_and_relu)
    auto result = backend->create_tensor(element::u8, shape_r);
    auto handle = backend->compile(f);
    handle->call_with_validate({result}, {a, b, c, d, e, e_a, g, h, i, j, k, l});
-    EXPECT_EQ((vector<uint8_t>{78, 114, 105, 113, 132, 230, 255, 136, 110, 165, 142, 133}),
+    EXPECT_EQ((vector<uint8_t>{78, 114, 105, 113, 132, 230, 255, 136, 110, 166, 142, 133}),
              read_vector<uint8_t>(result));
 }

@@ -449,25 +453,25 @@ TEST(builder, scaled_QC_with_bias_signed_add_and_relu)
    auto H = op::Constant::create(element::f32, Shape{}, {90.0f});
    auto I = op::Constant::create(element::f32, Shape{}, {22.0f});
    auto J = op::Constant::create(element::f32, Shape{}, {90.0f});
-    auto CV =
-        ngraph::builder::ScaledQuantizedConvolutionBiasSignedAdd(A,
-                                                                 B,
-                                                                 Bias,
-                                                                 Add,
-                                                                 Strides{1, 1}, // move_strides
-                                                                 Strides{1, 1}, // filter_dilation
-                                                                 CoordinateDiff{1, 1}, // below_pads
-                                                                 CoordinateDiff{1, 1}, // above_pads
-                                                                 Strides{1, 1}, // data_dilation
-                                                                 C,
-                                                                 D,
-                                                                 E,
-                                                                 F,
-                                                                 G,
-                                                                 H,
-                                                                 I,
-                                                                 J,
-                                                                 true);
+    auto CV = ngraph::builder::QuantizedConvolutionBiasSignedAddBuilder(
+        A,
+        B,
+        Bias,
+        Add,
+        Strides{1, 1},        // move_strides
+        Strides{1, 1},        // filter_dilation
+        CoordinateDiff{1, 1}, // below_pads
+        CoordinateDiff{1, 1}, // above_pads
+        Strides{1, 1},        // data_dilation
+        C,
+        D,
+        E,
+        F,
+        G,
+        H,
+        I,
+        J,
+        true);
    auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias, Add});
    constant_fold(f);
    auto backend = runtime::Backend::create("CPU");
@@ -511,25 +515,25 @@ TEST(builder, scaled_QC_with_bias_signed_add_and_relu_nhwc)
    auto H = op::Constant::create(element::f32, Shape{}, {90.0f});
    auto I = op::Constant::create(element::f32, Shape{}, {22.0f});
    auto J = op::Constant::create(element::f32, Shape{}, {90.0f});
-    auto CV =
-        ngraph::builder::ScaledQuantizedConvolutionBiasSignedAdd(A_reshape,
-                                                                 B_reshape,
-                                                                 Bias,
-                                                                 Add_reshape,
-                                                                 Strides{1, 1}, // move_strides
-                                                                 Strides{1, 1}, // filter_dilation
-                                                                 CoordinateDiff{1, 1}, // below_pads
-                                                                 CoordinateDiff{1, 1}, // above_pads
-                                                                 Strides{1, 1}, // data_dilation
-                                                                 C,
-                                                                 D,
-                                                                 E,
-                                                                 F,
-                                                                 G,
-                                                                 H,
-                                                                 I,
-                                                                 J,
-                                                                 true);
+    auto CV = ngraph::builder::QuantizedConvolutionBiasSignedAddBuilder(
+        A_reshape,
+        B_reshape,
+        Bias,
+        Add_reshape,
+        Strides{1, 1},        // move_strides
+        Strides{1, 1},        // filter_dilation
+        CoordinateDiff{1, 1}, // below_pads
+        CoordinateDiff{1, 1}, // above_pads
+        Strides{1, 1},        // data_dilation
+        C,
+        D,
+        E,
+        F,
+        G,
+        H,
+        I,
+        J,
+        true);
    auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias, Add});
    constant_fold(f);
    auto backend = runtime::Backend::create("CPU");
@@ -570,25 +574,25 @@ TEST(builder, dynamic_scaled_QC_with_bias_signed_add_and_relu)
    auto H = make_shared<op::Parameter>(element::f32, Shape{1});
    auto I = make_shared<op::Parameter>(element::f32, Shape{1});
    auto J = make_shared<op::Parameter>(element::f32, Shape{1});
-    auto CV =
-        ngraph::builder::ScaledQuantizedConvolutionBiasSignedAdd(A,
-                                                                 B,
-                                                                 Bias,
-                                                                 Add,
-                                                                 Strides{1, 1}, // move_strides
-                                                                 Strides{1, 1}, // filter_dilation
-                                                                 CoordinateDiff{1, 1}, // below_pads
-                                                                 CoordinateDiff{1, 1}, // above_pads
-                                                                 Strides{1, 1}, // data_dilation
-                                                                 C,
-                                                                 D,
-                                                                 E,
-                                                                 F,
-                                                                 G,
-                                                                 H,
-                                                                 I,
-                                                                 J,
-                                                                 true);
+    auto CV = ngraph::builder::QuantizedConvolutionBiasSignedAddBuilder(
+        A,
+        B,
+        Bias,
+        Add,
+        Strides{1, 1},        // move_strides
+        Strides{1, 1},        // filter_dilation
+        CoordinateDiff{1, 1}, // below_pads
+        CoordinateDiff{1, 1}, // above_pads
+        Strides{1, 1},        // data_dilation
+        C,
+        D,
+        E,
+        F,
+        G,
+        H,
+        I,
+        J,
+        true);
    auto f = make_shared<Function>(NodeVector{CV},
                                   ParameterVector{A, B, Bias, Add, C, D, E, F, G, H, I, J});
    auto backend = runtime::Backend::create("CPU");
@@ -641,21 +645,21 @@ TEST(builder, scaled_QC_with_f32_bias_and_relu)
    auto F = op::Constant::create(element::f32, Shape{}, {127.0f});
    auto G = op::Constant::create(element::f32, Shape{}, {20.0f});
    auto H = op::Constant::create(element::f32, Shape{}, {-24.0f});
-    auto CV = ngraph::builder::ScaledQuantizedConvolutionBias(A,
-                                                              B,
-                                                              Bias,
-                                                              Strides{1, 1}, // move_strides
-                                                              Strides{1, 1}, // filter_dilation
-                                                              CoordinateDiff{1, 1}, // below_pads
-                                                              CoordinateDiff{1, 1}, // above_pads
-                                                              Strides{1, 1},        // data_dilation
-                                                              C,
-                                                              D,
-                                                              E,
-                                                              F,
-                                                              G,
-                                                              H,
-                                                              true);
+    auto CV = ngraph::builder::QuantizedConvolutionBiasBuilder(A,
+                                                               B,
+                                                               Bias,
+                                                               Strides{1, 1}, // move_strides
+                                                               Strides{1, 1}, // filter_dilation
+                                                               CoordinateDiff{1, 1}, // below_pads
+                                                               CoordinateDiff{1, 1}, // above_pads
+                                                               Strides{1, 1}, // data_dilation
+                                                               C,
+                                                               D,
+                                                               E,
+                                                               F,
+                                                               G,
+                                                               H,
+                                                               true);
    auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias});
    constant_fold(f);
    auto backend = runtime::Backend::create("CPU");
@@ -681,7 +685,7 @@ TEST(builder, scaled_Q_unsigned)
    auto A = make_shared<op::Parameter>(element::f32, shape_a);
    auto B = op::Constant::create(element::f32, Shape{}, {-255.0f});
    auto C = op::Constant::create(element::f32, Shape{}, {127.0f});
-    auto QT = ngraph::builder::ScaledQuantize(A, B, C, element::u8, quantization_axes, round_mode);
+    auto QT = ngraph::builder::QuantizeBuilder(A, B, C, element::u8, quantization_axes, round_mode);
    auto f = make_shared<Function>(NodeVector{QT}, ParameterVector{A});
    constant_fold(f);
    auto backend = runtime::Backend::create("CPU");
@@ -706,7 +710,7 @@ TEST(builder, dynamic_scaled_Q)
        auto A = make_shared<op::Parameter>(element::f32, in_shape);
        auto B = make_shared<op::Parameter>(element::f32, Shape{});
        auto C = make_shared<op::Parameter>(element::f32, Shape{});
-        auto QT = ngraph::builder::ScaledQuantize(A, B, C, type, AxisSet{}, mode);
+        auto QT = ngraph::builder::QuantizeBuilder(A, B, C, type, AxisSet{}, mode);
        auto f = make_shared<Function>(NodeVector{QT}, ParameterVector{A, B, C});
        // Create some tensors for input/output
        auto a = backend->create_tensor(element::f32, in_shape);
@@ -772,7 +776,7 @@ TEST(builder, scaled_Q_signed)
    auto A = make_shared<op::Parameter>(element::f32, shape_a);
    auto B = op::Constant::create(element::f32, Shape{}, {-127.0f});
    auto C = op::Constant::create(element::f32, Shape{}, {127.0f});
-    auto QT = ngraph::builder::ScaledQuantize(A, B, C, element::i8, quantization_axes, round_mode);
+    auto QT = ngraph::builder::QuantizeBuilder(A, B, C, element::i8, quantization_axes, round_mode);
    auto f = make_shared<Function>(NodeVector{QT}, ParameterVector{A});
    constant_fold(f);
    auto backend = runtime::Backend::create("CPU");
@@ -792,7 +796,7 @@ TEST(builder, scaled_DQ_signed)
    auto A = make_shared<op::Parameter>(element::i8, Shape{1});
    auto B = op::Constant::create(element::f32, Shape{}, {-1.0f});
    auto C = op::Constant::create(element::f32, Shape{}, {300.0f});
-    auto r = ngraph::builder::ScaledDequantize(A, B, C, element::f32, quantization_axes);
+    auto r = ngraph::builder::DequantizeBuilder(A, B, C, element::f32, quantization_axes);
    auto f = make_shared<Function>(r, ParameterVector{A});
    constant_fold(f);
    auto backend = runtime::Backend::create("CPU");
@@ -816,7 +820,7 @@ shared_ptr<runtime::Tensor> call_SDQ(shared_ptr<runtime::Backend>& backend,
    auto A = make_shared<op::Parameter>(type, in_shape);
    auto B = make_shared<op::Parameter>(element::f32, Shape{});
    auto C = make_shared<op::Parameter>(element::f32, Shape{});
-    auto DQT = ngraph::builder::ScaledDequantize(A, B, C, element::f32, AxisSet{});
+    auto DQT = ngraph::builder::DequantizeBuilder(A, B, C, element::f32, AxisSet{});
    auto f = make_shared<Function>(NodeVector{DQT}, ParameterVector{A, B, C});
    // Create some tensors for input/output
    auto a = backend->create_tensor(type, in_shape);
@@ -857,7 +861,7 @@ TEST(builder, scaled_quantize_concat_unsigned)
    auto Cn = make_shared<op::Parameter>(element::f32, Shape{1});
    auto Cx = make_shared<op::Parameter>(element::f32, Shape{1});
    Shape shape_r{8, 2};
-    auto QConcat = ngraph::builder::ScaledQuantizedConcat(
+    auto QConcat = ngraph::builder::QuantizedConcatBuilder(
        NodeVector{A, B, C}, 0, NodeVector{An, Bn, Cn}, NodeVector{Ax, Bx, Cx});
    auto f = make_shared<Function>(NodeVector{QConcat},
                                   ParameterVector{A, B, C, An, Bn, Cn, Ax, Bx, Cx});
@@ -906,7 +910,7 @@ TEST(builder, scaled_quantize_concat_signed)
    auto Cx = make_shared<op::Parameter>(element::f32, Shape{1});
    Shape shape_r{8, 2};

-    auto QConcat = ngraph::builder::ScaledQuantizedConcat(
+    auto QConcat = ngraph::builder::QuantizedConcatBuilder(
        NodeVector{A, B, C}, 0, NodeVector{An, Bn, Cn}, NodeVector{Ax, Bx, Cx});
    auto f = make_shared<Function>(NodeVector{QConcat},
                                   ParameterVector{A, B, C, An, Bn, Cn, Ax, Bx, Cx});
@@ -954,7 +958,7 @@ TEST(builder, scaled_quantize_concat_unsigned_varying)
    auto Cn = make_shared<op::Parameter>(element::f32, Shape{1});
    auto Cx = make_shared<op::Parameter>(element::f32, Shape{1});
    Shape shape_r{2, 9};
-    auto QConcat = ngraph::builder::ScaledQuantizedConcat(
+    auto QConcat = ngraph::builder::QuantizedConcatBuilder(
        NodeVector{A, B, C}, 1, NodeVector{An, Bn, Cn}, NodeVector{Ax, Bx, Cx});
    auto f = make_shared<Function>(NodeVector{QConcat},
                                   ParameterVector{A, B, C, An, Bn, Cn, Ax, Bx, Cx});
@@ -1009,7 +1013,7 @@ TEST(builder, dynamic_scaled_QD_with_bias)
        auto F = make_shared<op::Parameter>(element::f32, Shape{1});
        auto G = make_shared<op::Parameter>(element::f32, Shape{1});
        auto H = make_shared<op::Parameter>(element::f32, Shape{1});
-        auto CV = ngraph::builder::ScaledQuantizedDotBias(
+        auto CV = ngraph::builder::QuantizedDotBiasBuilder(
            A, B, Bias, C, D, E, F, G, H, requantize, with_relu);
        return make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias, C, D, E, F, G, H});
    };