Commit f3b9389c authored by Nishant Patel's avatar Nishant Patel Committed by Scott Cyphers

Segregate the quant builders op wise (#3501)

* Segregate builders op wise

* Style

* Update ngraph.hpp
parent 98205845
...@@ -24,20 +24,23 @@ set (SRC ...@@ -24,20 +24,23 @@ set (SRC
axis_vector.hpp axis_vector.hpp
builder/autobroadcast.cpp builder/autobroadcast.cpp
builder/autobroadcast.hpp builder/autobroadcast.hpp
builder/dequantize_builder.cpp
builder/dequantize_builder.hpp
builder/make_constant.hpp builder/make_constant.hpp
builder/norm.cpp builder/norm.cpp
builder/norm.hpp builder/norm.hpp
builder/numpy_transpose.cpp builder/numpy_transpose.cpp
builder/numpy_transpose.hpp builder/numpy_transpose.hpp
builder/quantization.cpp builder/quantize_builder.cpp
builder/quantization.hpp builder/quantize_builder.hpp
builder/quantized_concat_builder.cpp
builder/quantized_concat_builder.hpp
builder/quantized_conv_builder.cpp builder/quantized_conv_builder.cpp
builder/quantized_conv_builder.hpp builder/quantized_conv_builder.hpp
builder/quantized_dot_builder.cpp builder/quantized_dot_builder.cpp
builder/quantized_dot_builder.hpp builder/quantized_dot_builder.hpp
builder/quantization/quantized_linear_convolution.cpp builder/quantization/quantized_linear_convolution.cpp
builder/quantization/quantized_linear_convolution.hpp builder/quantization/quantized_linear_convolution.hpp
builder/quantization_util.hpp
builder/quantization_utils.hpp builder/quantization_utils.hpp
builder/quantization_utils.cpp builder/quantization_utils.cpp
builder/reduce_ops.cpp builder/reduce_ops.cpp
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <memory>
#include "ngraph/builder/dequantize_builder.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace builder
{
shared_ptr<Node> DequantizeBuilder(const Output<Node>& input,
const Output<Node>& min,
const Output<Node>& max,
const ngraph::element::Type& real_type,
const ngraph::AxisSet& axes)
{
auto quant_type = input.get_element_type();
if (min.get_element_type() != real_type)
{
throw ngraph_error("DequantizeBuilder: min must match input type");
}
if (max.get_element_type() != real_type)
{
throw ngraph_error("DequantizeBuilder: max must match input type");
}
auto shape = min.get_shape();
if (shape != max.get_shape())
{
throw ngraph_error("DequantizeBuilder: min and max must have same shape");
}
auto zero = make_constant(quant_type, shape, 0);
auto scale = quantization_utils::get_scale(min, max, quant_type);
return make_shared<op::Dequantize>(input, scale, zero, real_type, axes);
}
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/dequantize.hpp"
#include "quantization_utils.hpp"
namespace ngraph
{
namespace builder
{
std::shared_ptr<Node> DequantizeBuilder(const Output<Node>& input,
const Output<Node>& min,
const Output<Node>& max,
const ngraph::element::Type& real_type,
const ngraph::AxisSet& axes);
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <memory>
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/builder/quantization.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convert.hpp"
#include "ngraph/op/max.hpp"
#include "ngraph/op/min.hpp"
#include "ngraph/op/reshape.hpp"
#include "quantization_util.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace builder
{
shared_ptr<Node> ScaledQuantize(const Output<Node>& input,
const Output<Node>& min,
const Output<Node>& max,
const ngraph::element::Type& quant_type,
const ngraph::AxisSet& axes,
op::Quantize::RoundMode round_mode)
{
auto real_type = input.get_element_type();
if (min.get_element_type() != real_type)
{
throw ngraph_error("ScaledQuantize: min must match input type");
}
if (max.get_element_type() != real_type)
{
throw ngraph_error("ScaledQuantize: max must match input type");
}
auto shape = min.get_shape();
if (shape != max.get_shape())
{
throw ngraph_error("ScaledQuantize: min and max must have same shape");
}
auto zero = make_constant(quant_type, shape, 0);
auto scale = quantization_util::get_scale(min, max, quant_type, true);
return make_shared<op::Quantize>(input, scale, zero, quant_type, axes, round_mode);
}
shared_ptr<Node> ScaledDequantize(const Output<Node>& input,
const Output<Node>& min,
const Output<Node>& max,
const ngraph::element::Type& real_type,
const ngraph::AxisSet& axes)
{
auto quant_type = input.get_element_type();
if (min.get_element_type() != real_type)
{
throw ngraph_error("ScaledDequantize: min must match output type");
}
if (max.get_element_type() != real_type)
{
throw ngraph_error("ScaledDequantize: max must match output type");
}
auto shape = min.get_shape();
if (shape != max.get_shape())
{
throw ngraph_error("ScaledDequantize: min and max must have same shape");
}
auto zero = make_constant(quant_type, shape, 0);
auto scale = quantization_util::get_scale(min, max, quant_type);
return make_shared<op::Dequantize>(input, scale, zero, real_type, axes);
}
shared_ptr<Node> ScaledQuantizedConcat(const NodeVector& args,
size_t concatenation_axis,
const NodeVector& mins,
const NodeVector& maxs)
{
quantization_util::check_concat(args, mins, maxs);
auto quant_type = args[0]->get_element_type();
// output scale
auto min = make_shared<op::Min>(make_shared<op::Concat>(mins, 0), ngraph::AxisSet{0});
auto max = make_shared<op::Max>(make_shared<op::Concat>(maxs, 0), ngraph::AxisSet{0});
auto out_scale = quantization_util::get_scale(min, max, quant_type);
NodeVector rescaled_args(args.size());
for (size_t i = 0; i < args.size(); ++i)
{
auto q_type = args[i]->get_element_type();
auto in_scale = make_shared<ngraph::op::Reshape>(
quantization_util::get_scale(mins[i], maxs[i], q_type), AxisVector{0}, Shape{});
auto zero = make_constant(q_type, in_scale->get_shape(), 0);
rescaled_args[i] =
make_shared<op::Dequantize>(args[i], in_scale, zero, element::f32, AxisSet{});
rescaled_args[i] =
make_shared<op::Quantize>(rescaled_args[i],
out_scale,
zero,
q_type,
AxisSet{},
op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN);
}
return make_shared<op::Concat>(rescaled_args, concatenation_axis);
}
shared_ptr<Node> ScaledQuantizedConvolutionBias(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const bool with_relu)
{
auto output_et = with_relu ? element::u8 : element::i8;
auto requantization_scale = quantization_util::get_scale(
min_input, max_input, min_filter, max_filter, min_output, max_output, output_et);
auto mybias = bias;
if (bias.get_element_type() != element::i32)
{
auto zero = make_constant(element::i32, min_input.get_shape(), 0);
AxisSet quantization_axes;
auto bias_scale =
quantization_util::get_bias_scale(min_input, max_input, min_filter, max_filter);
op::Quantize::RoundMode round_mode =
op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
mybias = make_shared<op::Quantize>(
bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
}
return make_shared<op::QuantizedConvolutionBias>(input,
filters,
mybias,
window_movement_strides,
window_dilation_strides,
padding_below,
padding_above,
data_dilation_strides,
requantization_scale,
with_relu);
}
shared_ptr<Node> ScaledQuantizedConvolutionRelu(const Output<Node>& input,
const Output<Node>& filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output)
{
auto requantization_scale = quantization_util::get_scale(
min_input, max_input, min_filter, max_filter, min_output, max_output, element::u8);
return make_shared<op::QuantizedConvolutionRelu>(input,
filters,
window_movement_strides,
window_dilation_strides,
padding_below,
padding_above,
data_dilation_strides,
requantization_scale);
}
shared_ptr<Node> ScaledQuantizedConvolutionBiasAdd(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Output<Node>& sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const Output<Node>& min_sum_input,
const Output<Node>& max_sum_input,
const bool with_relu)
{
auto output_et = with_relu ? element::u8 : element::i8;
auto requantization_scale = quantization_util::get_scale(
min_input, max_input, min_filter, max_filter, min_output, max_output, output_et);
auto sum_scale = builder::quantization_util::get_sum_scale(
min_output, max_output, min_sum_input, max_sum_input);
auto mybias = bias;
if (bias.get_element_type() != element::i32)
{
auto zero = make_constant(element::i32, min_input.get_shape(), 0);
AxisSet quantization_axes;
auto bias_scale =
quantization_util::get_bias_scale(min_input, max_input, min_filter, max_filter);
op::Quantize::RoundMode round_mode =
op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
mybias = make_shared<op::Quantize>(
bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
}
return make_shared<op::QuantizedConvolutionBiasAdd>(input,
filters,
mybias,
sum_input,
window_movement_strides,
window_dilation_strides,
padding_below,
padding_above,
data_dilation_strides,
requantization_scale,
sum_scale,
with_relu);
}
shared_ptr<Node>
ScaledQuantizedConvolutionBiasSignedAdd(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Output<Node>& sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const Output<Node>& min_sum_input,
const Output<Node>& max_sum_input,
const bool with_relu)
{
auto output_et = with_relu ? element::u8 : element::i8;
auto requantization_scale = quantization_util::get_scale(
min_input, max_input, min_filter, max_filter, min_output, max_output, output_et);
auto sum_scale = builder::quantization_util::get_sum_scale(
min_output, max_output, min_sum_input, max_sum_input);
if (output_et == element::u8)
{
// Need to multiply by two to account for u8 requantization_scale
auto two = make_constant(element::f32, sum_scale->get_shape(), 2.0f);
sum_scale = two * sum_scale;
}
auto mybias = bias;
if (bias.get_element_type() != element::i32)
{
auto zero = make_constant(element::i32, min_input.get_shape(), 0);
AxisSet quantization_axes;
auto bias_scale =
quantization_util::get_bias_scale(min_input, max_input, min_filter, max_filter);
op::Quantize::RoundMode round_mode =
op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
mybias = make_shared<op::Quantize>(
bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
}
auto qconv = make_shared<op::QuantizedConvolutionBiasSignedAdd>(input,
filters,
mybias,
sum_input,
window_movement_strides,
window_dilation_strides,
padding_below,
padding_above,
data_dilation_strides,
requantization_scale,
sum_scale,
with_relu);
return make_shared<op::Convert>(qconv, element::u8);
}
shared_ptr<Node> ScaledQuantizedDotBias(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const bool requantize,
const bool with_relu)
{
auto requantization_scale =
quantization_util::get_dot_scale(min_input,
max_input,
min_filter,
max_filter,
min_output,
max_output,
input.get_element_type(),
with_relu ? element::u8 : element::i8,
requantize);
auto mybias = bias;
if (bias.get_element_type() != element::i32)
{
auto zero = make_constant(element::i32, min_input.get_shape(), 0);
AxisSet quantization_axes;
auto bias_scale =
quantization_util::get_bias_scale(min_input, max_input, min_filter, max_filter);
op::Quantize::RoundMode round_mode =
op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
mybias = make_shared<op::Quantize>(
bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
}
return make_shared<op::QuantizedDotBias>(
input, filters, mybias, requantization_scale, requantize, with_relu);
}
} // namespace builder
} // namespace ngraph
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/op/experimental/quantized_dot_bias.hpp"
#include "ngraph/op/quantize.hpp"
namespace ngraph
{
namespace builder
{
std::shared_ptr<Node> ScaledQuantize(const Output<Node>& input,
const Output<Node>& min,
const Output<Node>& max,
const ngraph::element::Type& type,
const ngraph::AxisSet& axes,
op::Quantize::RoundMode round_mode);
std::shared_ptr<Node> ScaledDequantize(const Output<Node>& input,
const Output<Node>& min,
const Output<Node>& max,
const ngraph::element::Type& type,
const ngraph::AxisSet& axes);
std::shared_ptr<Node> ScaledQuantizedConcat(const NodeVector& args,
size_t concatenation_axis,
const NodeVector& mins,
const NodeVector& maxes);
std::shared_ptr<Node> ScaledQuantizedConvolutionBias(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const bool with_relu = false);
std::shared_ptr<Node> ScaledQuantizedConvolutionRelu(const Output<Node>& input,
const Output<Node>& filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output);
std::shared_ptr<Node>
ScaledQuantizedConvolutionBiasAdd(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Output<Node>& sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const Output<Node>& min_sum_input,
const Output<Node>& max_sum_input,
const bool with_relu = false);
std::shared_ptr<Node>
ScaledQuantizedConvolutionBiasSignedAdd(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Output<Node>& sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const Output<Node>& min_sum_input,
const Output<Node>& max_sum_input,
const bool with_relu = false);
std::shared_ptr<Node> ScaledQuantizedDotBias(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const bool requantize = true,
const bool with_relu = false);
} // namespace builder
} // namespace ngraph
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#include "ngraph/builder/quantization/quantized_linear_convolution.hpp" #include "ngraph/builder/quantization/quantized_linear_convolution.hpp"
#include "ngraph/axis_set.hpp" #include "ngraph/axis_set.hpp"
#include "ngraph/builder/make_constant.hpp" #include "ngraph/builder/make_constant.hpp"
#include "ngraph/builder/quantization.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "ngraph/op/convolution.hpp" #include "ngraph/op/convolution.hpp"
#include "ngraph/op/dequantize.hpp" #include "ngraph/op/dequantize.hpp"
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <limits>
#include <vector>
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/abs.hpp"
#include "ngraph/op/add.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/divide.hpp"
#include "ngraph/op/maximum.hpp"
#include "ngraph/op/minimum.hpp"
#include "ngraph/op/multiply.hpp"
#include "ngraph/op/subtract.hpp"
#include "ngraph/util.hpp"
namespace ngraph
{
namespace builder
{
namespace quantization_util
{
std::shared_ptr<Node> max_abs(Output<Node> a, Output<Node> b)
{
auto abs_a = std::make_shared<op::Abs>(a);
auto abs_b = std::make_shared<op::Abs>(b);
return std::make_shared<op::Maximum>(abs_a, abs_b);
}
std::pair<std::shared_ptr<Node>, std::shared_ptr<Node>>
quantization_range_for_multiplication(Output<Node> min_a,
Output<Node> max_a,
Output<Node> min_b,
Output<Node> max_b)
{
auto type = min_a.get_element_type();
if (type != max_a.get_element_type() || type != min_b.get_element_type() ||
type != max_b.get_element_type())
{
throw ngraph_error(
"quantization_range_for_multiplication: min and max must have same type");
}
auto shape = min_a.get_shape();
if (shape != max_a.get_shape() || shape != min_b.get_shape() ||
shape != max_b.get_shape())
{
throw ngraph_error(
"quantization_range_for_multiplication: min and max must have same shape");
}
auto u8_range = make_constant(type,
shape,
std::numeric_limits<uint8_t>::max() -
std::numeric_limits<uint8_t>::min());
auto i8_range = make_constant(type,
shape,
std::numeric_limits<int8_t>::max() -
std::numeric_limits<int8_t>::min());
auto a_one_quant_level = (max_a - min_a) / u8_range;
auto b_one_quant_level = (max_b - min_b) / i8_range;
auto c_one_quant_level = a_one_quant_level * b_one_quant_level;
auto i32_min = make_constant(type, shape, std::numeric_limits<int32_t>::min());
auto i32_max = make_constant(type, shape, std::numeric_limits<int32_t>::max());
auto min_c = c_one_quant_level * i32_min;
auto max_c = c_one_quant_level * i32_max;
return std::pair<std::shared_ptr<Node>, std::shared_ptr<Node>>(min_c, max_c);
}
std::shared_ptr<Node> get_scale(Output<Node> min_input,
Output<Node> max_input,
Output<Node> min_filter,
Output<Node> max_filter,
Output<Node> min_freezed_output,
Output<Node> max_freezed_output,
const ngraph::element::Type& output_type)
{
auto type = min_input.get_element_type();
if (type != max_input.get_element_type() || type != min_filter.get_element_type() ||
type != max_filter.get_element_type() ||
type != min_freezed_output.get_element_type() ||
type != max_freezed_output.get_element_type())
{
throw ngraph_error("get_scale: min and max must have same type");
}
auto shape = min_input.get_shape();
if (shape != max_input.get_shape() || shape != min_filter.get_shape() ||
shape != max_filter.get_shape() || shape != min_freezed_output.get_shape() ||
shape != max_freezed_output.get_shape())
{
throw ngraph_error("get_scale: min and max must have same shape");
}
auto ranges = quantization_range_for_multiplication(
min_input, max_input, min_filter, max_filter);
auto min_out_value = ranges.first;
auto max_out_value = ranges.second;
auto max_abs32 = max_abs(min_out_value, max_out_value);
auto max_abs8 = max_abs(min_freezed_output, max_freezed_output);
// The output of int8 convolution is accumalated in int32.
// Mkldnn needs a scale to requantize the output back to {u}int8 based on
// if relu is fused or not.
// Equation to go from f32 to s32. std::pow(2, 31)/ max_abs32 can be thought of
// as the scale used for the quantization..
// 1. s32 = f32 * std::pow(2, 31)/ max_abs32;
// Equation to go from f32 to u8.
// 2. u8 = f32 * std::pow(2, 8)/ max_abs8;
// Equation to go from f32 to s8.
// 3. s8 = f32 * std::pow(2, 7)/ max_abs8;
// Replacing f32 from eq 1 in eq 2.
// 4. u8 = s32 * std::pow(2, -23) * max_abs32 / max_abs8;
// Replacing f32 from eq 1 in eq 3.
// 5. s8 = s32 * std::pow(2, -24) * max_abs32 / max_abs8;
return make_constant(
type, shape, std::pow(2, (output_type == element::i8) ? -24 : -23)) *
(max_abs32 / max_abs8);
}
std::shared_ptr<Node> get_bias_scale(Output<Node> min_input,
Output<Node> max_input,
Output<Node> min_filter,
Output<Node> max_filter)
{
auto type = min_input.get_element_type();
if (type != max_input.get_element_type() || type != min_filter.get_element_type() ||
type != max_filter.get_element_type())
{
throw ngraph_error("get_bias_scale: min and max must have same type");
}
auto shape = min_input.get_shape();
if (shape != max_input.get_shape() || shape != min_filter.get_shape() ||
shape != max_filter.get_shape())
{
throw ngraph_error("get_bias_scale: min and max must have same shape");
}
auto max_abs_input_range = max_abs(min_input, max_input);
auto max_abs_filter_range = max_abs(min_filter, max_filter);
auto range = make_constant(type,
shape,
std::numeric_limits<uint8_t>::max() *
std::numeric_limits<int8_t>::max());
// Inverting the scale calculation here as the Quantize op passes scale as 1/scale.
return (max_abs_input_range * max_abs_filter_range) / range;
}
std::shared_ptr<Node> get_sum_scale(Output<Node> min_freezed_output_conv_1,
Output<Node> max_freezed_output_conv_1,
Output<Node> min_freezed_output_conv_2,
Output<Node> max_freezed_output_conv_2)
{
auto type = min_freezed_output_conv_1.get_element_type();
if (type != max_freezed_output_conv_1.get_element_type() ||
type != min_freezed_output_conv_2.get_element_type() ||
type != max_freezed_output_conv_2.get_element_type())
{
throw ngraph_error("get_sum_scale: min and max must have same type");
}
auto shape = min_freezed_output_conv_1.get_shape();
if (shape != max_freezed_output_conv_1.get_shape() ||
shape != min_freezed_output_conv_2.get_shape() ||
shape != max_freezed_output_conv_2.get_shape())
{
throw ngraph_error("get_sum_scale: min and max must have same shape");
}
auto max_abs_conv_1 = max_abs(min_freezed_output_conv_1, max_freezed_output_conv_1);
auto max_abs_conv_2 = max_abs(min_freezed_output_conv_2, max_freezed_output_conv_2);
return max_abs_conv_2 / max_abs_conv_1;
}
std::shared_ptr<Node> get_scale(Output<Node> input_min_range,
Output<Node> input_max_range,
const ngraph::element::Type& quant_type,
bool bump_by_eps = false)
{
auto type = input_min_range.get_element_type();
if (type != input_max_range.get_element_type())
{
throw ngraph_error("get_scale: min and max must have same type");
}
auto shape = input_min_range.get_shape();
if (shape != input_max_range.get_shape())
{
throw ngraph_error("get_scale: min and max must have same shape");
}
auto min_range = input_min_range;
auto max_range = input_max_range;
if (bump_by_eps)
{
auto zero = make_constant(type, shape, 0);
min_range = std::make_shared<op::Minimum>(zero, input_min_range);
auto max_abs_input_range = max_abs(input_min_range, input_max_range);
auto one = make_constant(type, shape, 1);
auto hundred = make_constant(type, shape, 100);
auto epsilon =
std::make_shared<op::Maximum>(one, max_abs_input_range) / hundred;
max_range = std::make_shared<op::Maximum>(input_max_range, min_range + epsilon);
max_range = std::make_shared<op::Maximum>(zero, max_range);
}
size_t bw = quant_type.bitwidth();
float range = static_cast<float>(
(quant_type.is_signed() ? std::pow(2, (bw - 1)) : std::pow(2, bw)) - 1);
auto max_abs_range = max_abs(min_range, max_range);
auto target_range = make_constant(type, shape, range);
return max_abs_range / target_range;
}
void
check_concat(const NodeVector& args, const NodeVector& mins, const NodeVector& maxs)
{
auto size = args.size();
if (size != mins.size() || size != maxs.size())
{
throw ngraph_error("Min and Max node vectors must be of same length");
}
for (size_t i = 0; i < size; i++)
{
auto min = mins[i];
auto max = maxs[i];
auto type = min->get_element_type();
if (type != max->get_element_type())
{
throw ngraph_error("check_concat: min and max must have same type");
}
if (min->get_shape() != Shape{1} || max->get_shape() != Shape{1})
{
throw ngraph_error("check_concat: min/max shape not Shape{1}: " +
vector_to_string(min->get_shape()) +
vector_to_string(max->get_shape()));
}
}
}
std::shared_ptr<Node> get_dot_scale(Output<Node> min_input,
Output<Node> max_input,
Output<Node> min_filter,
Output<Node> max_filter,
Output<Node> min_freezed_output,
Output<Node> max_freezed_output,
const ngraph::element::Type& input_type,
const ngraph::element::Type& output_type,
const bool requantize = true)
{
auto type = min_input.get_element_type();
if (type != max_input.get_element_type() || type != min_filter.get_element_type() ||
type != max_filter.get_element_type() ||
type != min_freezed_output.get_element_type() ||
type != max_freezed_output.get_element_type())
{
throw ngraph_error("get_dot_scale: min and max must have same type");
}
auto shape = min_input.get_shape();
if (shape != max_input.get_shape() || shape != min_filter.get_shape() ||
shape != max_filter.get_shape() || shape != min_freezed_output.get_shape() ||
shape != max_freezed_output.get_shape())
{
throw ngraph_error("get_dot_scale: min and max must have same shape");
}
auto data_scale = get_scale(min_input, max_input, input_type);
auto weight_scale = get_scale(min_filter, max_filter, element::i8);
auto out_scale = get_scale(min_freezed_output, max_freezed_output, output_type);
if (requantize)
{
return data_scale * weight_scale / out_scale;
}
else
{
return data_scale * weight_scale;
}
}
} // namespace quantization_util
} // namespace builder
} // namespace ngraph
...@@ -74,6 +74,128 @@ namespace ngraph ...@@ -74,6 +74,128 @@ namespace ngraph
return max_abs_range / target_range; return max_abs_range / target_range;
} }
std::shared_ptr<Node> get_bias_scale(Output<Node> min_input,
Output<Node> max_input,
Output<Node> min_filter,
Output<Node> max_filter)
{
auto type = min_input.get_element_type();
if (type != max_input.get_element_type() || type != min_filter.get_element_type() ||
type != max_filter.get_element_type())
{
throw ngraph_error("get_bias_scale: min and max must have same type");
}
auto shape = min_input.get_shape();
if (shape != max_input.get_shape() || shape != min_filter.get_shape() ||
shape != max_filter.get_shape())
{
throw ngraph_error("get_bias_scale: min and max must have same shape");
}
auto max_abs_input_range = max_abs(min_input, max_input);
auto max_abs_filter_range = max_abs(min_filter, max_filter);
auto range = make_constant(type,
shape,
std::numeric_limits<uint8_t>::max() *
std::numeric_limits<int8_t>::max());
// Inverting the scale calculation here as the Quantize op passes scale as 1/scale.
return (max_abs_input_range * max_abs_filter_range) / range;
}
std::shared_ptr<Node> get_sum_scale(Output<Node> min_freezed_output_conv_1,
Output<Node> max_freezed_output_conv_1,
Output<Node> min_freezed_output_conv_2,
Output<Node> max_freezed_output_conv_2)
{
auto type = min_freezed_output_conv_1.get_element_type();
if (type != max_freezed_output_conv_1.get_element_type() ||
type != min_freezed_output_conv_2.get_element_type() ||
type != max_freezed_output_conv_2.get_element_type())
{
throw ngraph_error("get_sum_scale: min and max must have same type");
}
auto shape = min_freezed_output_conv_1.get_shape();
if (shape != max_freezed_output_conv_1.get_shape() ||
shape != min_freezed_output_conv_2.get_shape() ||
shape != max_freezed_output_conv_2.get_shape())
{
throw ngraph_error("get_sum_scale: min and max must have same shape");
}
auto max_abs_conv_1 = max_abs(min_freezed_output_conv_1, max_freezed_output_conv_1);
auto max_abs_conv_2 = max_abs(min_freezed_output_conv_2, max_freezed_output_conv_2);
return max_abs_conv_2 / max_abs_conv_1;
}
std::shared_ptr<Node> get_dot_scale(Output<Node> min_input,
Output<Node> max_input,
Output<Node> min_filter,
Output<Node> max_filter,
Output<Node> min_freezed_output,
Output<Node> max_freezed_output,
const ngraph::element::Type& input_type,
const ngraph::element::Type& output_type,
const bool requantize)
{
auto type = min_input.get_element_type();
if (type != max_input.get_element_type() || type != min_filter.get_element_type() ||
type != max_filter.get_element_type() ||
type != min_freezed_output.get_element_type() ||
type != max_freezed_output.get_element_type())
{
throw ngraph_error("get_dot_scale: min and max must have same type");
}
auto shape = min_input.get_shape();
if (shape != max_input.get_shape() || shape != min_filter.get_shape() ||
shape != max_filter.get_shape() || shape != min_freezed_output.get_shape() ||
shape != max_freezed_output.get_shape())
{
throw ngraph_error("get_dot_scale: min and max must have same shape");
}
auto data_scale = get_scale(min_input, max_input, input_type);
auto weight_scale = get_scale(min_filter, max_filter, element::i8);
auto out_scale = get_scale(min_freezed_output, max_freezed_output, output_type);
if (requantize)
{
return data_scale * weight_scale / out_scale;
}
else
{
return data_scale * weight_scale;
}
}
void
check_concat(const NodeVector& args, const NodeVector& mins, const NodeVector& maxs)
{
auto size = args.size();
if (size != mins.size() || size != maxs.size())
{
throw ngraph_error("Min and Max node vectors must be of same length");
}
for (size_t i = 0; i < size; i++)
{
auto min = mins[i];
auto max = maxs[i];
auto type = min->get_element_type();
if (type != max->get_element_type())
{
throw ngraph_error("check_concat: min and max must have same type");
}
if (min->get_shape() != Shape{1} || max->get_shape() != Shape{1})
{
throw ngraph_error("check_concat: min/max shape not Shape{1}: " +
vector_to_string(min->get_shape()) +
vector_to_string(max->get_shape()));
}
}
}
} }
} }
} }
...@@ -43,6 +43,30 @@ namespace ngraph ...@@ -43,6 +43,30 @@ namespace ngraph
const Output<Node>& input_max_range, const Output<Node>& input_max_range,
const ngraph::element::Type& quant_type, const ngraph::element::Type& quant_type,
bool bump_by_eps = false); bool bump_by_eps = false);
std::shared_ptr<Node> get_bias_scale(Output<Node> min_input,
Output<Node> max_input,
Output<Node> min_filter,
Output<Node> max_filter);
std::shared_ptr<Node> get_sum_scale(Output<Node> min_freezed_output_conv_1,
Output<Node> max_freezed_output_conv_1,
Output<Node> min_freezed_output_conv_2,
Output<Node> max_freezed_output_conv_2);
std::shared_ptr<Node> get_dot_scale(Output<Node> min_input,
Output<Node> max_input,
Output<Node> min_filter,
Output<Node> max_filter,
Output<Node> min_freezed_output,
Output<Node> max_freezed_output,
const ngraph::element::Type& input_type,
const ngraph::element::Type& output_type,
const bool requantize = true);
void check_concat(const NodeVector& args,
const NodeVector& mins,
const NodeVector& maxs);
} }
} }
} }
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <memory>
#include "ngraph/builder/quantize_builder.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace builder
{
shared_ptr<Node> QuantizeBuilder(const Output<Node>& input,
const Output<Node>& min,
const Output<Node>& max,
const ngraph::element::Type& quant_type,
const ngraph::AxisSet& axes,
op::Quantize::RoundMode round_mode)
{
auto real_type = input.get_element_type();
if (min.get_element_type() != real_type)
{
throw ngraph_error("QuantizeBuilder: min must match input type");
}
if (max.get_element_type() != real_type)
{
throw ngraph_error("QuantizeBuilder: max must match input type");
}
auto shape = min.get_shape();
if (shape != max.get_shape())
{
throw ngraph_error("QuantizeBuilder: min and max must have same shape");
}
auto zero = make_constant(quant_type, shape, 0);
auto scale = quantization_utils::get_scale(min, max, quant_type, true);
return make_shared<op::Quantize>(input, scale, zero, quant_type, axes, round_mode);
}
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/quantize.hpp"
#include "quantization_utils.hpp"
namespace ngraph
{
namespace builder
{
std::shared_ptr<Node> QuantizeBuilder(const Output<Node>& input,
const Output<Node>& min,
const Output<Node>& max,
const ngraph::element::Type& quant_type,
const ngraph::AxisSet& axes,
op::Quantize::RoundMode round_mode);
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <memory>
#include "ngraph/builder/quantized_concat_builder.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace builder
{
shared_ptr<Node> QuantizedConcatBuilder(const NodeVector& args,
size_t concatenation_axis,
const NodeVector& mins,
const NodeVector& maxs)
{
quantization_utils::check_concat(args, mins, maxs);
auto quant_type = args[0]->get_element_type();
// output scale
auto min = make_shared<op::Min>(make_shared<op::Concat>(mins, 0), ngraph::AxisSet{0});
auto max = make_shared<op::Max>(make_shared<op::Concat>(maxs, 0), ngraph::AxisSet{0});
auto out_scale = quantization_utils::get_scale(min, max, quant_type);
NodeVector rescaled_args(args.size());
for (size_t i = 0; i < args.size(); ++i)
{
auto q_type = args[i]->get_element_type();
auto in_scale = make_shared<ngraph::op::Reshape>(
quantization_utils::get_scale(mins[i], maxs[i], q_type),
AxisVector{0},
Shape{});
auto zero = make_constant(q_type, in_scale->get_shape(), 0);
rescaled_args[i] =
make_shared<op::Dequantize>(args[i], in_scale, zero, element::f32, AxisSet{});
rescaled_args[i] =
make_shared<op::Quantize>(rescaled_args[i],
out_scale,
zero,
q_type,
AxisSet{},
op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN);
}
return make_shared<op::Concat>(rescaled_args, concatenation_axis);
}
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/max.hpp"
#include "ngraph/op/min.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/reshape.hpp"
#include "quantization_utils.hpp"
namespace ngraph
{
namespace builder
{
std::shared_ptr<Node> QuantizedConcatBuilder(const NodeVector& args,
size_t concatenation_axis,
const NodeVector& mins,
const NodeVector& maxs);
}
}
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#include <memory> #include <memory>
#include "ngraph/builder/quantized_conv_builder.hpp" #include "ngraph/builder/quantized_conv_builder.hpp"
#include "ngraph/op/constant.hpp"
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
...@@ -74,5 +73,209 @@ namespace ngraph ...@@ -74,5 +73,209 @@ namespace ngraph
filter_axes, filter_axes,
output_axes); output_axes);
} }
shared_ptr<Node> QuantizedConvolutionBiasBuilder(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const bool with_relu)
{
auto output_et = with_relu ? element::u8 : element::i8;
auto input_scale =
quantization_utils::get_scale(min_input, max_input, input.get_element_type());
auto filter_scale =
quantization_utils::get_scale(min_filter, max_filter, filters.get_element_type());
auto output_scale = quantization_utils::get_scale(min_output, max_output, output_et);
auto requantization_scale = input_scale * filter_scale / output_scale;
auto mybias = bias;
if (bias.get_element_type() != element::i32)
{
auto zero = make_constant(element::i32, min_input.get_shape(), 0);
AxisSet quantization_axes;
auto bias_scale = quantization_utils::get_bias_scale(
min_input, max_input, min_filter, max_filter);
op::Quantize::RoundMode round_mode =
op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
mybias = make_shared<op::Quantize>(
bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
}
return make_shared<op::QuantizedConvolutionBias>(input,
filters,
mybias,
window_movement_strides,
window_dilation_strides,
padding_below,
padding_above,
data_dilation_strides,
requantization_scale,
with_relu);
}
shared_ptr<Node> QuantizedConvolutionReluBuilder(const Output<Node>& input,
const Output<Node>& filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output)
{
auto input_scale =
quantization_utils::get_scale(min_input, max_input, input.get_element_type());
auto filter_scale =
quantization_utils::get_scale(min_filter, max_filter, filters.get_element_type());
auto output_scale = quantization_utils::get_scale(min_output, max_output, element::u8);
auto requantization_scale = input_scale * filter_scale / output_scale;
return make_shared<op::QuantizedConvolutionRelu>(input,
filters,
window_movement_strides,
window_dilation_strides,
padding_below,
padding_above,
data_dilation_strides,
requantization_scale);
}
shared_ptr<Node> QuantizedConvolutionBiasAddBuilder(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Output<Node>& sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const Output<Node>& min_sum_input,
const Output<Node>& max_sum_input,
const bool with_relu)
{
auto output_et = with_relu ? element::u8 : element::i8;
auto input_scale =
quantization_utils::get_scale(min_input, max_input, input.get_element_type());
auto filter_scale =
quantization_utils::get_scale(min_filter, max_filter, filters.get_element_type());
auto output_scale = quantization_utils::get_scale(min_output, max_output, output_et);
auto requantization_scale = input_scale * filter_scale / output_scale;
auto sum_scale = builder::quantization_utils::get_sum_scale(
min_output, max_output, min_sum_input, max_sum_input);
auto mybias = bias;
if (bias.get_element_type() != element::i32)
{
auto zero = make_constant(element::i32, min_input.get_shape(), 0);
AxisSet quantization_axes;
auto bias_scale = quantization_utils::get_bias_scale(
min_input, max_input, min_filter, max_filter);
op::Quantize::RoundMode round_mode =
op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
mybias = make_shared<op::Quantize>(
bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
}
return make_shared<op::QuantizedConvolutionBiasAdd>(input,
filters,
mybias,
sum_input,
window_movement_strides,
window_dilation_strides,
padding_below,
padding_above,
data_dilation_strides,
requantization_scale,
sum_scale,
with_relu);
}
shared_ptr<Node>
QuantizedConvolutionBiasSignedAddBuilder(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Output<Node>& sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const Output<Node>& min_sum_input,
const Output<Node>& max_sum_input,
const bool with_relu)
{
auto output_et = with_relu ? element::u8 : element::i8;
auto input_scale =
quantization_utils::get_scale(min_input, max_input, input.get_element_type());
auto filter_scale =
quantization_utils::get_scale(min_filter, max_filter, filters.get_element_type());
auto output_scale = quantization_utils::get_scale(min_output, max_output, output_et);
auto requantization_scale = input_scale * filter_scale / output_scale;
auto sum_scale = builder::quantization_utils::get_sum_scale(
min_output, max_output, min_sum_input, max_sum_input);
if (output_et == element::u8)
{
// Need to multiply by two to account for u8 requantization_scale
auto two = make_constant(element::f32, sum_scale->get_shape(), 2.0f);
sum_scale = two * sum_scale;
}
auto mybias = bias;
if (bias.get_element_type() != element::i32)
{
auto zero = make_constant(element::i32, min_input.get_shape(), 0);
AxisSet quantization_axes;
auto bias_scale = quantization_utils::get_bias_scale(
min_input, max_input, min_filter, max_filter);
op::Quantize::RoundMode round_mode =
op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
mybias = make_shared<op::Quantize>(
bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
}
auto qconv = make_shared<op::QuantizedConvolutionBiasSignedAdd>(input,
filters,
mybias,
sum_input,
window_movement_strides,
window_dilation_strides,
padding_below,
padding_above,
data_dilation_strides,
requantization_scale,
sum_scale,
with_relu);
return make_shared<op::Convert>(qconv, element::u8);
}
} }
} }
...@@ -18,6 +18,11 @@ ...@@ -18,6 +18,11 @@
#include "ngraph/coordinate_diff.hpp" #include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convert.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/quantized_convolution.hpp" #include "ngraph/op/quantized_convolution.hpp"
#include "quantization_utils.hpp" #include "quantization_utils.hpp"
...@@ -43,5 +48,77 @@ namespace ngraph ...@@ -43,5 +48,77 @@ namespace ngraph
const ngraph::AxisSet& input_axes = ngraph::AxisSet{}, const ngraph::AxisSet& input_axes = ngraph::AxisSet{},
const ngraph::AxisSet& filter_axes = ngraph::AxisSet{}, const ngraph::AxisSet& filter_axes = ngraph::AxisSet{},
const ngraph::AxisSet& output_axes = ngraph::AxisSet{}); const ngraph::AxisSet& output_axes = ngraph::AxisSet{});
std::shared_ptr<Node>
QuantizedConvolutionBiasBuilder(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const bool with_relu = false);
std::shared_ptr<Node>
QuantizedConvolutionReluBuilder(const Output<Node>& input,
const Output<Node>& filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output);
std::shared_ptr<Node>
QuantizedConvolutionBiasAddBuilder(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Output<Node>& sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const Output<Node>& min_sum_input,
const Output<Node>& max_sum_input,
const bool with_relu = false);
std::shared_ptr<Node>
QuantizedConvolutionBiasSignedAddBuilder(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Output<Node>& sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const Output<Node>& min_sum_input,
const Output<Node>& max_sum_input,
const bool with_relu = false);
} }
} }
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#include <memory> #include <memory>
#include "ngraph/builder/quantized_dot_builder.hpp" #include "ngraph/builder/quantized_dot_builder.hpp"
#include "ngraph/op/constant.hpp"
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
...@@ -65,5 +64,45 @@ namespace ngraph ...@@ -65,5 +64,45 @@ namespace ngraph
input1_axes, input1_axes,
output_axes); output_axes);
} }
shared_ptr<Node> QuantizedDotBiasBuilder(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const bool requantize,
const bool with_relu)
{
auto requantization_scale =
quantization_utils::get_dot_scale(min_input,
max_input,
min_filter,
max_filter,
min_output,
max_output,
input.get_element_type(),
with_relu ? element::u8 : element::i8,
requantize);
auto mybias = bias;
if (bias.get_element_type() != element::i32)
{
auto zero = make_constant(element::i32, min_input.get_shape(), 0);
AxisSet quantization_axes;
auto bias_scale = quantization_utils::get_bias_scale(
min_input, max_input, min_filter, max_filter);
op::Quantize::RoundMode round_mode =
op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
mybias = make_shared<op::Quantize>(
bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
}
return make_shared<op::QuantizedDotBias>(
input, filters, mybias, requantization_scale, requantize, with_relu);
}
} }
} }
...@@ -18,6 +18,9 @@ ...@@ -18,6 +18,9 @@
#include "ngraph/coordinate_diff.hpp" #include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/experimental/quantized_dot_bias.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/quantized_dot.hpp" #include "ngraph/op/quantized_dot.hpp"
#include "quantization_utils.hpp" #include "quantization_utils.hpp"
...@@ -38,5 +41,17 @@ namespace ngraph ...@@ -38,5 +41,17 @@ namespace ngraph
const ngraph::AxisSet& input0_axes, const ngraph::AxisSet& input0_axes,
const ngraph::AxisSet& input1_axes, const ngraph::AxisSet& input1_axes,
const ngraph::AxisSet& output_axes); const ngraph::AxisSet& output_axes);
std::shared_ptr<Node> QuantizedDotBiasBuilder(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const bool requantize = true,
const bool with_relu = false);
} }
} }
...@@ -62,8 +62,12 @@ namespace ngraph ...@@ -62,8 +62,12 @@ namespace ngraph
/// recipes, for example auto-broadcast. /// recipes, for example auto-broadcast.
#include "ngraph/builder/autobroadcast.hpp" #include "ngraph/builder/autobroadcast.hpp"
#include "ngraph/builder/dequantize_builder.hpp"
#include "ngraph/builder/numpy_transpose.hpp" #include "ngraph/builder/numpy_transpose.hpp"
#include "ngraph/builder/quantize_builder.hpp"
#include "ngraph/builder/quantized_concat_builder.hpp"
#include "ngraph/builder/quantized_conv_builder.hpp" #include "ngraph/builder/quantized_conv_builder.hpp"
#include "ngraph/builder/quantized_dot_builder.hpp"
#include "ngraph/builder/reduce_ops.hpp" #include "ngraph/builder/reduce_ops.hpp"
#include "ngraph/builder/reshape.hpp" #include "ngraph/builder/reshape.hpp"
#include "ngraph/builder/tensor_mask.hpp" #include "ngraph/builder/tensor_mask.hpp"
......
...@@ -21,8 +21,10 @@ ...@@ -21,8 +21,10 @@
#include <string> #include <string>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "ngraph/builder/quantization.hpp" #include "ngraph/builder/dequantize_builder.hpp"
#include "ngraph/builder/quantization/quantized_linear_convolution.hpp" #include "ngraph/builder/quantization/quantized_linear_convolution.hpp"
#include "ngraph/builder/quantize_builder.hpp"
#include "ngraph/builder/quantized_concat_builder.hpp"
#include "ngraph/builder/quantized_conv_builder.hpp" #include "ngraph/builder/quantized_conv_builder.hpp"
#include "ngraph/builder/quantized_dot_builder.hpp" #include "ngraph/builder/quantized_dot_builder.hpp"
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
...@@ -61,19 +63,19 @@ TEST(builder, scaled_QC_with_relu) ...@@ -61,19 +63,19 @@ TEST(builder, scaled_QC_with_relu)
auto F = op::Constant::create(element::f32, Shape{1}, {127.0f}); auto F = op::Constant::create(element::f32, Shape{1}, {127.0f});
auto G = op::Constant::create(element::f32, Shape{1}, {20.0f}); auto G = op::Constant::create(element::f32, Shape{1}, {20.0f});
auto H = op::Constant::create(element::f32, Shape{1}, {-24.0f}); auto H = op::Constant::create(element::f32, Shape{1}, {-24.0f});
auto CV = ngraph::builder::ScaledQuantizedConvolutionRelu(A, auto CV = ngraph::builder::QuantizedConvolutionReluBuilder(A,
B, B,
Strides{1, 1}, // move_strides Strides{1, 1}, // move_strides
Strides{1, 1}, // filter_dilation Strides{1, 1}, // filter_dilation
CoordinateDiff{1, 1}, // below_pads CoordinateDiff{1, 1}, // below_pads
CoordinateDiff{1, 1}, // above_pads CoordinateDiff{1, 1}, // above_pads
Strides{1, 1}, // data_dilation Strides{1, 1}, // data_dilation
C, C,
D, D,
E, E,
F, F,
G, G,
H); H);
auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B}); auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B});
constant_fold(f); constant_fold(f);
auto backend = runtime::Backend::create("CPU"); auto backend = runtime::Backend::create("CPU");
...@@ -103,19 +105,19 @@ TEST(builder, dynamic_scaled_QC_with_relu) ...@@ -103,19 +105,19 @@ TEST(builder, dynamic_scaled_QC_with_relu)
auto F = make_shared<op::Parameter>(element::f32, Shape{1}); auto F = make_shared<op::Parameter>(element::f32, Shape{1});
auto G = make_shared<op::Parameter>(element::f32, Shape{1}); auto G = make_shared<op::Parameter>(element::f32, Shape{1});
auto H = make_shared<op::Parameter>(element::f32, Shape{1}); auto H = make_shared<op::Parameter>(element::f32, Shape{1});
auto CV = ngraph::builder::ScaledQuantizedConvolutionRelu(A, auto CV = ngraph::builder::QuantizedConvolutionReluBuilder(A,
B, B,
Strides{1, 1}, // move_strides Strides{1, 1}, // move_strides
Strides{1, 1}, // filter_dilation Strides{1, 1}, // filter_dilation
CoordinateDiff{1, 1}, // below_pads CoordinateDiff{1, 1}, // below_pads
CoordinateDiff{1, 1}, // above_pads CoordinateDiff{1, 1}, // above_pads
Strides{1, 1}, // data_dilation Strides{1, 1}, // data_dilation
C, C,
D, D,
E, E,
F, F,
G, G,
H); H);
auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, C, D, E, F, G, H}); auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, C, D, E, F, G, H});
auto backend = runtime::Backend::create("CPU"); auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output // Create some tensors for input/output
...@@ -158,20 +160,20 @@ TEST(builder, scaled_QC_with_bias) ...@@ -158,20 +160,20 @@ TEST(builder, scaled_QC_with_bias)
auto F = op::Constant::create(element::f32, Shape{1}, {127.0f}); auto F = op::Constant::create(element::f32, Shape{1}, {127.0f});
auto G = op::Constant::create(element::f32, Shape{1}, {22.0f}); auto G = op::Constant::create(element::f32, Shape{1}, {22.0f});
auto H = op::Constant::create(element::f32, Shape{1}, {90.0f}); auto H = op::Constant::create(element::f32, Shape{1}, {90.0f});
auto CV = ngraph::builder::ScaledQuantizedConvolutionBias(A, auto CV = ngraph::builder::QuantizedConvolutionBiasBuilder(A,
B, B,
Bias, Bias,
Strides{1, 1}, // move_strides Strides{1, 1}, // move_strides
Strides{1, 1}, // filter_dilation Strides{1, 1}, // filter_dilation
CoordinateDiff{1, 1}, // below_pads CoordinateDiff{1, 1}, // below_pads
CoordinateDiff{1, 1}, // above_pads CoordinateDiff{1, 1}, // above_pads
Strides{1, 1}, // data_dilation Strides{1, 1}, // data_dilation
C, C,
D, D,
E, E,
F, F,
G, G,
H); H);
auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias}); auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias});
constant_fold(f); constant_fold(f);
auto backend = runtime::Backend::create("CPU"); auto backend = runtime::Backend::create("CPU");
...@@ -185,7 +187,7 @@ TEST(builder, scaled_QC_with_bias) ...@@ -185,7 +187,7 @@ TEST(builder, scaled_QC_with_bias)
auto result = backend->create_tensor(element::i8, shape_r); auto result = backend->create_tensor(element::i8, shape_r);
auto handle = backend->compile(f); auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b, c}); handle->call_with_validate({result}, {a, b, c});
EXPECT_EQ((vector<int8_t>{38, 55, 50, 52, 61, 109, 127, 68, 54, 81, 68, 62}), EXPECT_EQ((vector<int8_t>{38, 55, 49, 52, 61, 109, 127, 68, 54, 80, 68, 62}),
read_vector<int8_t>(result)); read_vector<int8_t>(result));
} }
...@@ -206,20 +208,20 @@ TEST(builder, dynamic_scaled_QC_with_bias) ...@@ -206,20 +208,20 @@ TEST(builder, dynamic_scaled_QC_with_bias)
auto F = make_shared<op::Parameter>(element::f32, Shape{1}); auto F = make_shared<op::Parameter>(element::f32, Shape{1});
auto G = make_shared<op::Parameter>(element::f32, Shape{1}); auto G = make_shared<op::Parameter>(element::f32, Shape{1});
auto H = make_shared<op::Parameter>(element::f32, Shape{1}); auto H = make_shared<op::Parameter>(element::f32, Shape{1});
auto CV = ngraph::builder::ScaledQuantizedConvolutionBias(A, auto CV = ngraph::builder::QuantizedConvolutionBiasBuilder(A,
B, B,
Bias, Bias,
Strides{1, 1}, // move_strides Strides{1, 1}, // move_strides
Strides{1, 1}, // filter_dilation Strides{1, 1}, // filter_dilation
CoordinateDiff{1, 1}, // below_pads CoordinateDiff{1, 1}, // below_pads
CoordinateDiff{1, 1}, // above_pads CoordinateDiff{1, 1}, // above_pads
Strides{1, 1}, // data_dilation Strides{1, 1}, // data_dilation
C, C,
D, D,
E, E,
F, F,
G, G,
H); H);
auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias, C, D, E, F, G, H}); auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias, C, D, E, F, G, H});
auto backend = runtime::Backend::create("CPU"); auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output // Create some tensors for input/output
...@@ -244,7 +246,7 @@ TEST(builder, dynamic_scaled_QC_with_bias) ...@@ -244,7 +246,7 @@ TEST(builder, dynamic_scaled_QC_with_bias)
auto result = backend->create_tensor(element::i8, shape_r); auto result = backend->create_tensor(element::i8, shape_r);
auto handle = backend->compile(f); auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b, c, d, e, e_a, g, h, i}); handle->call_with_validate({result}, {a, b, c, d, e, e_a, g, h, i});
EXPECT_EQ((vector<int8_t>{38, 55, 50, 52, 61, 109, 127, 68, 54, 81, 68, 62}), EXPECT_EQ((vector<int8_t>{38, 55, 49, 52, 61, 109, 127, 68, 54, 80, 68, 62}),
read_vector<int8_t>(result)); read_vector<int8_t>(result));
} }
...@@ -265,21 +267,21 @@ TEST(builder, scaled_QC_with_bias_and_relu) ...@@ -265,21 +267,21 @@ TEST(builder, scaled_QC_with_bias_and_relu)
auto F = op::Constant::create(element::f32, Shape{1}, {127.0f}); auto F = op::Constant::create(element::f32, Shape{1}, {127.0f});
auto G = op::Constant::create(element::f32, Shape{1}, {20.0f}); auto G = op::Constant::create(element::f32, Shape{1}, {20.0f});
auto H = op::Constant::create(element::f32, Shape{1}, {-24.0f}); auto H = op::Constant::create(element::f32, Shape{1}, {-24.0f});
auto CV = ngraph::builder::ScaledQuantizedConvolutionBias(A, auto CV = ngraph::builder::QuantizedConvolutionBiasBuilder(A,
B, B,
Bias, Bias,
Strides{1, 1}, // move_strides Strides{1, 1}, // move_strides
Strides{1, 1}, // filter_dilation Strides{1, 1}, // filter_dilation
CoordinateDiff{1, 1}, // below_pads CoordinateDiff{1, 1}, // below_pads
CoordinateDiff{1, 1}, // above_pads CoordinateDiff{1, 1}, // above_pads
Strides{1, 1}, // data_dilation Strides{1, 1}, // data_dilation
C, C,
D, D,
E, E,
F, F,
G, G,
H, H,
true); true);
auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias}); auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias});
constant_fold(f); constant_fold(f);
auto backend = runtime::Backend::create("CPU"); auto backend = runtime::Backend::create("CPU");
...@@ -317,24 +319,25 @@ TEST(builder, scaled_QC_with_bias_add_and_relu) ...@@ -317,24 +319,25 @@ TEST(builder, scaled_QC_with_bias_add_and_relu)
auto H = op::Constant::create(element::f32, Shape{}, {90.0f}); auto H = op::Constant::create(element::f32, Shape{}, {90.0f});
auto I = op::Constant::create(element::f32, Shape{}, {22.0f}); auto I = op::Constant::create(element::f32, Shape{}, {22.0f});
auto J = op::Constant::create(element::f32, Shape{}, {180.0f}); auto J = op::Constant::create(element::f32, Shape{}, {180.0f});
auto CV = ngraph::builder::ScaledQuantizedConvolutionBiasAdd(A, auto CV =
B, ngraph::builder::QuantizedConvolutionBiasAddBuilder(A,
Bias, B,
Add, Bias,
Strides{1, 1}, // move_strides Add,
Strides{1, 1}, // filter_dilation Strides{1, 1}, // move_strides
CoordinateDiff{1, 1}, // below_pads Strides{1, 1}, // filter_dilation
CoordinateDiff{1, 1}, // above_pads CoordinateDiff{1, 1}, // below_pads
Strides{1, 1}, // data_dilation CoordinateDiff{1, 1}, // above_pads
C, Strides{1, 1}, // data_dilation
D, C,
E, D,
F, E,
G, F,
H, G,
I, H,
J, I,
true); J,
true);
auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias, Add}); auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias, Add});
constant_fold(f); constant_fold(f);
auto backend = runtime::Backend::create("CPU"); auto backend = runtime::Backend::create("CPU");
...@@ -350,7 +353,7 @@ TEST(builder, scaled_QC_with_bias_add_and_relu) ...@@ -350,7 +353,7 @@ TEST(builder, scaled_QC_with_bias_add_and_relu)
auto result = backend->create_tensor(element::u8, shape_r); auto result = backend->create_tensor(element::u8, shape_r);
auto handle = backend->compile(f); auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b, c, d}); handle->call_with_validate({result}, {a, b, c, d});
EXPECT_EQ((vector<uint8_t>{78, 114, 105, 113, 132, 230, 255, 136, 110, 165, 142, 133}), EXPECT_EQ((vector<uint8_t>{78, 114, 105, 113, 132, 230, 255, 136, 110, 166, 142, 133}),
read_vector<uint8_t>(result)); read_vector<uint8_t>(result));
} }
...@@ -375,24 +378,25 @@ TEST(builder, dynamic_scaled_QC_with_bias_add_and_relu) ...@@ -375,24 +378,25 @@ TEST(builder, dynamic_scaled_QC_with_bias_add_and_relu)
auto H = make_shared<op::Parameter>(element::f32, Shape{1}); auto H = make_shared<op::Parameter>(element::f32, Shape{1});
auto I = make_shared<op::Parameter>(element::f32, Shape{1}); auto I = make_shared<op::Parameter>(element::f32, Shape{1});
auto J = make_shared<op::Parameter>(element::f32, Shape{1}); auto J = make_shared<op::Parameter>(element::f32, Shape{1});
auto CV = ngraph::builder::ScaledQuantizedConvolutionBiasAdd(A, auto CV =
B, ngraph::builder::QuantizedConvolutionBiasAddBuilder(A,
Bias, B,
Add, Bias,
Strides{1, 1}, // move_strides Add,
Strides{1, 1}, // filter_dilation Strides{1, 1}, // move_strides
CoordinateDiff{1, 1}, // below_pads Strides{1, 1}, // filter_dilation
CoordinateDiff{1, 1}, // above_pads CoordinateDiff{1, 1}, // below_pads
Strides{1, 1}, // data_dilation CoordinateDiff{1, 1}, // above_pads
C, Strides{1, 1}, // data_dilation
D, C,
E, D,
F, E,
G, F,
H, G,
I, H,
J, I,
true); J,
true);
auto f = make_shared<Function>(NodeVector{CV}, auto f = make_shared<Function>(NodeVector{CV},
ParameterVector{A, B, Bias, Add, C, D, E, F, G, H, I, J}); ParameterVector{A, B, Bias, Add, C, D, E, F, G, H, I, J});
auto backend = runtime::Backend::create("CPU"); auto backend = runtime::Backend::create("CPU");
...@@ -424,7 +428,7 @@ TEST(builder, dynamic_scaled_QC_with_bias_add_and_relu) ...@@ -424,7 +428,7 @@ TEST(builder, dynamic_scaled_QC_with_bias_add_and_relu)
auto result = backend->create_tensor(element::u8, shape_r); auto result = backend->create_tensor(element::u8, shape_r);
auto handle = backend->compile(f); auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b, c, d, e, e_a, g, h, i, j, k, l}); handle->call_with_validate({result}, {a, b, c, d, e, e_a, g, h, i, j, k, l});
EXPECT_EQ((vector<uint8_t>{78, 114, 105, 113, 132, 230, 255, 136, 110, 165, 142, 133}), EXPECT_EQ((vector<uint8_t>{78, 114, 105, 113, 132, 230, 255, 136, 110, 166, 142, 133}),
read_vector<uint8_t>(result)); read_vector<uint8_t>(result));
} }
...@@ -449,25 +453,25 @@ TEST(builder, scaled_QC_with_bias_signed_add_and_relu) ...@@ -449,25 +453,25 @@ TEST(builder, scaled_QC_with_bias_signed_add_and_relu)
auto H = op::Constant::create(element::f32, Shape{}, {90.0f}); auto H = op::Constant::create(element::f32, Shape{}, {90.0f});
auto I = op::Constant::create(element::f32, Shape{}, {22.0f}); auto I = op::Constant::create(element::f32, Shape{}, {22.0f});
auto J = op::Constant::create(element::f32, Shape{}, {90.0f}); auto J = op::Constant::create(element::f32, Shape{}, {90.0f});
auto CV = auto CV = ngraph::builder::QuantizedConvolutionBiasSignedAddBuilder(
ngraph::builder::ScaledQuantizedConvolutionBiasSignedAdd(A, A,
B, B,
Bias, Bias,
Add, Add,
Strides{1, 1}, // move_strides Strides{1, 1}, // move_strides
Strides{1, 1}, // filter_dilation Strides{1, 1}, // filter_dilation
CoordinateDiff{1, 1}, // below_pads CoordinateDiff{1, 1}, // below_pads
CoordinateDiff{1, 1}, // above_pads CoordinateDiff{1, 1}, // above_pads
Strides{1, 1}, // data_dilation Strides{1, 1}, // data_dilation
C, C,
D, D,
E, E,
F, F,
G, G,
H, H,
I, I,
J, J,
true); true);
auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias, Add}); auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias, Add});
constant_fold(f); constant_fold(f);
auto backend = runtime::Backend::create("CPU"); auto backend = runtime::Backend::create("CPU");
...@@ -511,25 +515,25 @@ TEST(builder, scaled_QC_with_bias_signed_add_and_relu_nhwc) ...@@ -511,25 +515,25 @@ TEST(builder, scaled_QC_with_bias_signed_add_and_relu_nhwc)
auto H = op::Constant::create(element::f32, Shape{}, {90.0f}); auto H = op::Constant::create(element::f32, Shape{}, {90.0f});
auto I = op::Constant::create(element::f32, Shape{}, {22.0f}); auto I = op::Constant::create(element::f32, Shape{}, {22.0f});
auto J = op::Constant::create(element::f32, Shape{}, {90.0f}); auto J = op::Constant::create(element::f32, Shape{}, {90.0f});
auto CV = auto CV = ngraph::builder::QuantizedConvolutionBiasSignedAddBuilder(
ngraph::builder::ScaledQuantizedConvolutionBiasSignedAdd(A_reshape, A_reshape,
B_reshape, B_reshape,
Bias, Bias,
Add_reshape, Add_reshape,
Strides{1, 1}, // move_strides Strides{1, 1}, // move_strides
Strides{1, 1}, // filter_dilation Strides{1, 1}, // filter_dilation
CoordinateDiff{1, 1}, // below_pads CoordinateDiff{1, 1}, // below_pads
CoordinateDiff{1, 1}, // above_pads CoordinateDiff{1, 1}, // above_pads
Strides{1, 1}, // data_dilation Strides{1, 1}, // data_dilation
C, C,
D, D,
E, E,
F, F,
G, G,
H, H,
I, I,
J, J,
true); true);
auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias, Add}); auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias, Add});
constant_fold(f); constant_fold(f);
auto backend = runtime::Backend::create("CPU"); auto backend = runtime::Backend::create("CPU");
...@@ -570,25 +574,25 @@ TEST(builder, dynamic_scaled_QC_with_bias_signed_add_and_relu) ...@@ -570,25 +574,25 @@ TEST(builder, dynamic_scaled_QC_with_bias_signed_add_and_relu)
auto H = make_shared<op::Parameter>(element::f32, Shape{1}); auto H = make_shared<op::Parameter>(element::f32, Shape{1});
auto I = make_shared<op::Parameter>(element::f32, Shape{1}); auto I = make_shared<op::Parameter>(element::f32, Shape{1});
auto J = make_shared<op::Parameter>(element::f32, Shape{1}); auto J = make_shared<op::Parameter>(element::f32, Shape{1});
auto CV = auto CV = ngraph::builder::QuantizedConvolutionBiasSignedAddBuilder(
ngraph::builder::ScaledQuantizedConvolutionBiasSignedAdd(A, A,
B, B,
Bias, Bias,
Add, Add,
Strides{1, 1}, // move_strides Strides{1, 1}, // move_strides
Strides{1, 1}, // filter_dilation Strides{1, 1}, // filter_dilation
CoordinateDiff{1, 1}, // below_pads CoordinateDiff{1, 1}, // below_pads
CoordinateDiff{1, 1}, // above_pads CoordinateDiff{1, 1}, // above_pads
Strides{1, 1}, // data_dilation Strides{1, 1}, // data_dilation
C, C,
D, D,
E, E,
F, F,
G, G,
H, H,
I, I,
J, J,
true); true);
auto f = make_shared<Function>(NodeVector{CV}, auto f = make_shared<Function>(NodeVector{CV},
ParameterVector{A, B, Bias, Add, C, D, E, F, G, H, I, J}); ParameterVector{A, B, Bias, Add, C, D, E, F, G, H, I, J});
auto backend = runtime::Backend::create("CPU"); auto backend = runtime::Backend::create("CPU");
...@@ -641,21 +645,21 @@ TEST(builder, scaled_QC_with_f32_bias_and_relu) ...@@ -641,21 +645,21 @@ TEST(builder, scaled_QC_with_f32_bias_and_relu)
auto F = op::Constant::create(element::f32, Shape{}, {127.0f}); auto F = op::Constant::create(element::f32, Shape{}, {127.0f});
auto G = op::Constant::create(element::f32, Shape{}, {20.0f}); auto G = op::Constant::create(element::f32, Shape{}, {20.0f});
auto H = op::Constant::create(element::f32, Shape{}, {-24.0f}); auto H = op::Constant::create(element::f32, Shape{}, {-24.0f});
auto CV = ngraph::builder::ScaledQuantizedConvolutionBias(A, auto CV = ngraph::builder::QuantizedConvolutionBiasBuilder(A,
B, B,
Bias, Bias,
Strides{1, 1}, // move_strides Strides{1, 1}, // move_strides
Strides{1, 1}, // filter_dilation Strides{1, 1}, // filter_dilation
CoordinateDiff{1, 1}, // below_pads CoordinateDiff{1, 1}, // below_pads
CoordinateDiff{1, 1}, // above_pads CoordinateDiff{1, 1}, // above_pads
Strides{1, 1}, // data_dilation Strides{1, 1}, // data_dilation
C, C,
D, D,
E, E,
F, F,
G, G,
H, H,
true); true);
auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias}); auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias});
constant_fold(f); constant_fold(f);
auto backend = runtime::Backend::create("CPU"); auto backend = runtime::Backend::create("CPU");
...@@ -681,7 +685,7 @@ TEST(builder, scaled_Q_unsigned) ...@@ -681,7 +685,7 @@ TEST(builder, scaled_Q_unsigned)
auto A = make_shared<op::Parameter>(element::f32, shape_a); auto A = make_shared<op::Parameter>(element::f32, shape_a);
auto B = op::Constant::create(element::f32, Shape{}, {-255.0f}); auto B = op::Constant::create(element::f32, Shape{}, {-255.0f});
auto C = op::Constant::create(element::f32, Shape{}, {127.0f}); auto C = op::Constant::create(element::f32, Shape{}, {127.0f});
auto QT = ngraph::builder::ScaledQuantize(A, B, C, element::u8, quantization_axes, round_mode); auto QT = ngraph::builder::QuantizeBuilder(A, B, C, element::u8, quantization_axes, round_mode);
auto f = make_shared<Function>(NodeVector{QT}, ParameterVector{A}); auto f = make_shared<Function>(NodeVector{QT}, ParameterVector{A});
constant_fold(f); constant_fold(f);
auto backend = runtime::Backend::create("CPU"); auto backend = runtime::Backend::create("CPU");
...@@ -706,7 +710,7 @@ TEST(builder, dynamic_scaled_Q) ...@@ -706,7 +710,7 @@ TEST(builder, dynamic_scaled_Q)
auto A = make_shared<op::Parameter>(element::f32, in_shape); auto A = make_shared<op::Parameter>(element::f32, in_shape);
auto B = make_shared<op::Parameter>(element::f32, Shape{}); auto B = make_shared<op::Parameter>(element::f32, Shape{});
auto C = make_shared<op::Parameter>(element::f32, Shape{}); auto C = make_shared<op::Parameter>(element::f32, Shape{});
auto QT = ngraph::builder::ScaledQuantize(A, B, C, type, AxisSet{}, mode); auto QT = ngraph::builder::QuantizeBuilder(A, B, C, type, AxisSet{}, mode);
auto f = make_shared<Function>(NodeVector{QT}, ParameterVector{A, B, C}); auto f = make_shared<Function>(NodeVector{QT}, ParameterVector{A, B, C});
// Create some tensors for input/output // Create some tensors for input/output
auto a = backend->create_tensor(element::f32, in_shape); auto a = backend->create_tensor(element::f32, in_shape);
...@@ -772,7 +776,7 @@ TEST(builder, scaled_Q_signed) ...@@ -772,7 +776,7 @@ TEST(builder, scaled_Q_signed)
auto A = make_shared<op::Parameter>(element::f32, shape_a); auto A = make_shared<op::Parameter>(element::f32, shape_a);
auto B = op::Constant::create(element::f32, Shape{}, {-127.0f}); auto B = op::Constant::create(element::f32, Shape{}, {-127.0f});
auto C = op::Constant::create(element::f32, Shape{}, {127.0f}); auto C = op::Constant::create(element::f32, Shape{}, {127.0f});
auto QT = ngraph::builder::ScaledQuantize(A, B, C, element::i8, quantization_axes, round_mode); auto QT = ngraph::builder::QuantizeBuilder(A, B, C, element::i8, quantization_axes, round_mode);
auto f = make_shared<Function>(NodeVector{QT}, ParameterVector{A}); auto f = make_shared<Function>(NodeVector{QT}, ParameterVector{A});
constant_fold(f); constant_fold(f);
auto backend = runtime::Backend::create("CPU"); auto backend = runtime::Backend::create("CPU");
...@@ -792,7 +796,7 @@ TEST(builder, scaled_DQ_signed) ...@@ -792,7 +796,7 @@ TEST(builder, scaled_DQ_signed)
auto A = make_shared<op::Parameter>(element::i8, Shape{1}); auto A = make_shared<op::Parameter>(element::i8, Shape{1});
auto B = op::Constant::create(element::f32, Shape{}, {-1.0f}); auto B = op::Constant::create(element::f32, Shape{}, {-1.0f});
auto C = op::Constant::create(element::f32, Shape{}, {300.0f}); auto C = op::Constant::create(element::f32, Shape{}, {300.0f});
auto r = ngraph::builder::ScaledDequantize(A, B, C, element::f32, quantization_axes); auto r = ngraph::builder::DequantizeBuilder(A, B, C, element::f32, quantization_axes);
auto f = make_shared<Function>(r, ParameterVector{A}); auto f = make_shared<Function>(r, ParameterVector{A});
constant_fold(f); constant_fold(f);
auto backend = runtime::Backend::create("CPU"); auto backend = runtime::Backend::create("CPU");
...@@ -816,7 +820,7 @@ shared_ptr<runtime::Tensor> call_SDQ(shared_ptr<runtime::Backend>& backend, ...@@ -816,7 +820,7 @@ shared_ptr<runtime::Tensor> call_SDQ(shared_ptr<runtime::Backend>& backend,
auto A = make_shared<op::Parameter>(type, in_shape); auto A = make_shared<op::Parameter>(type, in_shape);
auto B = make_shared<op::Parameter>(element::f32, Shape{}); auto B = make_shared<op::Parameter>(element::f32, Shape{});
auto C = make_shared<op::Parameter>(element::f32, Shape{}); auto C = make_shared<op::Parameter>(element::f32, Shape{});
auto DQT = ngraph::builder::ScaledDequantize(A, B, C, element::f32, AxisSet{}); auto DQT = ngraph::builder::DequantizeBuilder(A, B, C, element::f32, AxisSet{});
auto f = make_shared<Function>(NodeVector{DQT}, ParameterVector{A, B, C}); auto f = make_shared<Function>(NodeVector{DQT}, ParameterVector{A, B, C});
// Create some tensors for input/output // Create some tensors for input/output
auto a = backend->create_tensor(type, in_shape); auto a = backend->create_tensor(type, in_shape);
...@@ -857,7 +861,7 @@ TEST(builder, scaled_quantize_concat_unsigned) ...@@ -857,7 +861,7 @@ TEST(builder, scaled_quantize_concat_unsigned)
auto Cn = make_shared<op::Parameter>(element::f32, Shape{1}); auto Cn = make_shared<op::Parameter>(element::f32, Shape{1});
auto Cx = make_shared<op::Parameter>(element::f32, Shape{1}); auto Cx = make_shared<op::Parameter>(element::f32, Shape{1});
Shape shape_r{8, 2}; Shape shape_r{8, 2};
auto QConcat = ngraph::builder::ScaledQuantizedConcat( auto QConcat = ngraph::builder::QuantizedConcatBuilder(
NodeVector{A, B, C}, 0, NodeVector{An, Bn, Cn}, NodeVector{Ax, Bx, Cx}); NodeVector{A, B, C}, 0, NodeVector{An, Bn, Cn}, NodeVector{Ax, Bx, Cx});
auto f = make_shared<Function>(NodeVector{QConcat}, auto f = make_shared<Function>(NodeVector{QConcat},
ParameterVector{A, B, C, An, Bn, Cn, Ax, Bx, Cx}); ParameterVector{A, B, C, An, Bn, Cn, Ax, Bx, Cx});
...@@ -906,7 +910,7 @@ TEST(builder, scaled_quantize_concat_signed) ...@@ -906,7 +910,7 @@ TEST(builder, scaled_quantize_concat_signed)
auto Cx = make_shared<op::Parameter>(element::f32, Shape{1}); auto Cx = make_shared<op::Parameter>(element::f32, Shape{1});
Shape shape_r{8, 2}; Shape shape_r{8, 2};
auto QConcat = ngraph::builder::ScaledQuantizedConcat( auto QConcat = ngraph::builder::QuantizedConcatBuilder(
NodeVector{A, B, C}, 0, NodeVector{An, Bn, Cn}, NodeVector{Ax, Bx, Cx}); NodeVector{A, B, C}, 0, NodeVector{An, Bn, Cn}, NodeVector{Ax, Bx, Cx});
auto f = make_shared<Function>(NodeVector{QConcat}, auto f = make_shared<Function>(NodeVector{QConcat},
ParameterVector{A, B, C, An, Bn, Cn, Ax, Bx, Cx}); ParameterVector{A, B, C, An, Bn, Cn, Ax, Bx, Cx});
...@@ -954,7 +958,7 @@ TEST(builder, scaled_quantize_concat_unsigned_varying) ...@@ -954,7 +958,7 @@ TEST(builder, scaled_quantize_concat_unsigned_varying)
auto Cn = make_shared<op::Parameter>(element::f32, Shape{1}); auto Cn = make_shared<op::Parameter>(element::f32, Shape{1});
auto Cx = make_shared<op::Parameter>(element::f32, Shape{1}); auto Cx = make_shared<op::Parameter>(element::f32, Shape{1});
Shape shape_r{2, 9}; Shape shape_r{2, 9};
auto QConcat = ngraph::builder::ScaledQuantizedConcat( auto QConcat = ngraph::builder::QuantizedConcatBuilder(
NodeVector{A, B, C}, 1, NodeVector{An, Bn, Cn}, NodeVector{Ax, Bx, Cx}); NodeVector{A, B, C}, 1, NodeVector{An, Bn, Cn}, NodeVector{Ax, Bx, Cx});
auto f = make_shared<Function>(NodeVector{QConcat}, auto f = make_shared<Function>(NodeVector{QConcat},
ParameterVector{A, B, C, An, Bn, Cn, Ax, Bx, Cx}); ParameterVector{A, B, C, An, Bn, Cn, Ax, Bx, Cx});
...@@ -1009,7 +1013,7 @@ TEST(builder, dynamic_scaled_QD_with_bias) ...@@ -1009,7 +1013,7 @@ TEST(builder, dynamic_scaled_QD_with_bias)
auto F = make_shared<op::Parameter>(element::f32, Shape{1}); auto F = make_shared<op::Parameter>(element::f32, Shape{1});
auto G = make_shared<op::Parameter>(element::f32, Shape{1}); auto G = make_shared<op::Parameter>(element::f32, Shape{1});
auto H = make_shared<op::Parameter>(element::f32, Shape{1}); auto H = make_shared<op::Parameter>(element::f32, Shape{1});
auto CV = ngraph::builder::ScaledQuantizedDotBias( auto CV = ngraph::builder::QuantizedDotBiasBuilder(
A, B, Bias, C, D, E, F, G, H, requantize, with_relu); A, B, Bias, C, D, E, F, G, H, requantize, with_relu);
return make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias, C, D, E, F, G, H}); return make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias, C, D, E, F, G, H});
}; };
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment