Commit f3b9389c authored by Nishant Patel's avatar Nishant Patel Committed by Scott Cyphers

Segregate the quant builders op wise (#3501)

* Segregate builders op wise

* Style

* Update ngraph.hpp
parent 98205845
...@@ -24,20 +24,23 @@ set (SRC ...@@ -24,20 +24,23 @@ set (SRC
axis_vector.hpp axis_vector.hpp
builder/autobroadcast.cpp builder/autobroadcast.cpp
builder/autobroadcast.hpp builder/autobroadcast.hpp
builder/dequantize_builder.cpp
builder/dequantize_builder.hpp
builder/make_constant.hpp builder/make_constant.hpp
builder/norm.cpp builder/norm.cpp
builder/norm.hpp builder/norm.hpp
builder/numpy_transpose.cpp builder/numpy_transpose.cpp
builder/numpy_transpose.hpp builder/numpy_transpose.hpp
builder/quantization.cpp builder/quantize_builder.cpp
builder/quantization.hpp builder/quantize_builder.hpp
builder/quantized_concat_builder.cpp
builder/quantized_concat_builder.hpp
builder/quantized_conv_builder.cpp builder/quantized_conv_builder.cpp
builder/quantized_conv_builder.hpp builder/quantized_conv_builder.hpp
builder/quantized_dot_builder.cpp builder/quantized_dot_builder.cpp
builder/quantized_dot_builder.hpp builder/quantized_dot_builder.hpp
builder/quantization/quantized_linear_convolution.cpp builder/quantization/quantized_linear_convolution.cpp
builder/quantization/quantized_linear_convolution.hpp builder/quantization/quantized_linear_convolution.hpp
builder/quantization_util.hpp
builder/quantization_utils.hpp builder/quantization_utils.hpp
builder/quantization_utils.cpp builder/quantization_utils.cpp
builder/reduce_ops.cpp builder/reduce_ops.cpp
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <memory>
#include "ngraph/builder/dequantize_builder.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace builder
{
shared_ptr<Node> DequantizeBuilder(const Output<Node>& input,
const Output<Node>& min,
const Output<Node>& max,
const ngraph::element::Type& real_type,
const ngraph::AxisSet& axes)
{
auto quant_type = input.get_element_type();
if (min.get_element_type() != real_type)
{
throw ngraph_error("DequantizeBuilder: min must match input type");
}
if (max.get_element_type() != real_type)
{
throw ngraph_error("DequantizeBuilder: max must match input type");
}
auto shape = min.get_shape();
if (shape != max.get_shape())
{
throw ngraph_error("DequantizeBuilder: min and max must have same shape");
}
auto zero = make_constant(quant_type, shape, 0);
auto scale = quantization_utils::get_scale(min, max, quant_type);
return make_shared<op::Dequantize>(input, scale, zero, real_type, axes);
}
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/dequantize.hpp"
#include "quantization_utils.hpp"
namespace ngraph
{
namespace builder
{
std::shared_ptr<Node> DequantizeBuilder(const Output<Node>& input,
const Output<Node>& min,
const Output<Node>& max,
const ngraph::element::Type& real_type,
const ngraph::AxisSet& axes);
}
}
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/op/experimental/quantized_dot_bias.hpp"
#include "ngraph/op/quantize.hpp"
namespace ngraph
{
namespace builder
{
std::shared_ptr<Node> ScaledQuantize(const Output<Node>& input,
const Output<Node>& min,
const Output<Node>& max,
const ngraph::element::Type& type,
const ngraph::AxisSet& axes,
op::Quantize::RoundMode round_mode);
std::shared_ptr<Node> ScaledDequantize(const Output<Node>& input,
const Output<Node>& min,
const Output<Node>& max,
const ngraph::element::Type& type,
const ngraph::AxisSet& axes);
std::shared_ptr<Node> ScaledQuantizedConcat(const NodeVector& args,
size_t concatenation_axis,
const NodeVector& mins,
const NodeVector& maxes);
std::shared_ptr<Node> ScaledQuantizedConvolutionBias(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const bool with_relu = false);
std::shared_ptr<Node> ScaledQuantizedConvolutionRelu(const Output<Node>& input,
const Output<Node>& filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output);
std::shared_ptr<Node>
ScaledQuantizedConvolutionBiasAdd(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Output<Node>& sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const Output<Node>& min_sum_input,
const Output<Node>& max_sum_input,
const bool with_relu = false);
std::shared_ptr<Node>
ScaledQuantizedConvolutionBiasSignedAdd(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Output<Node>& sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const Output<Node>& min_sum_input,
const Output<Node>& max_sum_input,
const bool with_relu = false);
std::shared_ptr<Node> ScaledQuantizedDotBias(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const bool requantize = true,
const bool with_relu = false);
} // namespace builder
} // namespace ngraph
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#include "ngraph/builder/quantization/quantized_linear_convolution.hpp" #include "ngraph/builder/quantization/quantized_linear_convolution.hpp"
#include "ngraph/axis_set.hpp" #include "ngraph/axis_set.hpp"
#include "ngraph/builder/make_constant.hpp" #include "ngraph/builder/make_constant.hpp"
#include "ngraph/builder/quantization.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "ngraph/op/convolution.hpp" #include "ngraph/op/convolution.hpp"
#include "ngraph/op/dequantize.hpp" #include "ngraph/op/dequantize.hpp"
......
This diff is collapsed.
...@@ -74,6 +74,128 @@ namespace ngraph ...@@ -74,6 +74,128 @@ namespace ngraph
return max_abs_range / target_range; return max_abs_range / target_range;
} }
std::shared_ptr<Node> get_bias_scale(Output<Node> min_input,
Output<Node> max_input,
Output<Node> min_filter,
Output<Node> max_filter)
{
auto type = min_input.get_element_type();
if (type != max_input.get_element_type() || type != min_filter.get_element_type() ||
type != max_filter.get_element_type())
{
throw ngraph_error("get_bias_scale: min and max must have same type");
}
auto shape = min_input.get_shape();
if (shape != max_input.get_shape() || shape != min_filter.get_shape() ||
shape != max_filter.get_shape())
{
throw ngraph_error("get_bias_scale: min and max must have same shape");
}
auto max_abs_input_range = max_abs(min_input, max_input);
auto max_abs_filter_range = max_abs(min_filter, max_filter);
auto range = make_constant(type,
shape,
std::numeric_limits<uint8_t>::max() *
std::numeric_limits<int8_t>::max());
// Inverting the scale calculation here as the Quantize op passes scale as 1/scale.
return (max_abs_input_range * max_abs_filter_range) / range;
}
std::shared_ptr<Node> get_sum_scale(Output<Node> min_freezed_output_conv_1,
Output<Node> max_freezed_output_conv_1,
Output<Node> min_freezed_output_conv_2,
Output<Node> max_freezed_output_conv_2)
{
auto type = min_freezed_output_conv_1.get_element_type();
if (type != max_freezed_output_conv_1.get_element_type() ||
type != min_freezed_output_conv_2.get_element_type() ||
type != max_freezed_output_conv_2.get_element_type())
{
throw ngraph_error("get_sum_scale: min and max must have same type");
}
auto shape = min_freezed_output_conv_1.get_shape();
if (shape != max_freezed_output_conv_1.get_shape() ||
shape != min_freezed_output_conv_2.get_shape() ||
shape != max_freezed_output_conv_2.get_shape())
{
throw ngraph_error("get_sum_scale: min and max must have same shape");
}
auto max_abs_conv_1 = max_abs(min_freezed_output_conv_1, max_freezed_output_conv_1);
auto max_abs_conv_2 = max_abs(min_freezed_output_conv_2, max_freezed_output_conv_2);
return max_abs_conv_2 / max_abs_conv_1;
}
std::shared_ptr<Node> get_dot_scale(Output<Node> min_input,
Output<Node> max_input,
Output<Node> min_filter,
Output<Node> max_filter,
Output<Node> min_freezed_output,
Output<Node> max_freezed_output,
const ngraph::element::Type& input_type,
const ngraph::element::Type& output_type,
const bool requantize)
{
auto type = min_input.get_element_type();
if (type != max_input.get_element_type() || type != min_filter.get_element_type() ||
type != max_filter.get_element_type() ||
type != min_freezed_output.get_element_type() ||
type != max_freezed_output.get_element_type())
{
throw ngraph_error("get_dot_scale: min and max must have same type");
}
auto shape = min_input.get_shape();
if (shape != max_input.get_shape() || shape != min_filter.get_shape() ||
shape != max_filter.get_shape() || shape != min_freezed_output.get_shape() ||
shape != max_freezed_output.get_shape())
{
throw ngraph_error("get_dot_scale: min and max must have same shape");
}
auto data_scale = get_scale(min_input, max_input, input_type);
auto weight_scale = get_scale(min_filter, max_filter, element::i8);
auto out_scale = get_scale(min_freezed_output, max_freezed_output, output_type);
if (requantize)
{
return data_scale * weight_scale / out_scale;
}
else
{
return data_scale * weight_scale;
}
}
void
check_concat(const NodeVector& args, const NodeVector& mins, const NodeVector& maxs)
{
auto size = args.size();
if (size != mins.size() || size != maxs.size())
{
throw ngraph_error("Min and Max node vectors must be of same length");
}
for (size_t i = 0; i < size; i++)
{
auto min = mins[i];
auto max = maxs[i];
auto type = min->get_element_type();
if (type != max->get_element_type())
{
throw ngraph_error("check_concat: min and max must have same type");
}
if (min->get_shape() != Shape{1} || max->get_shape() != Shape{1})
{
throw ngraph_error("check_concat: min/max shape not Shape{1}: " +
vector_to_string(min->get_shape()) +
vector_to_string(max->get_shape()));
}
}
}
} }
} }
} }
...@@ -43,6 +43,30 @@ namespace ngraph ...@@ -43,6 +43,30 @@ namespace ngraph
const Output<Node>& input_max_range, const Output<Node>& input_max_range,
const ngraph::element::Type& quant_type, const ngraph::element::Type& quant_type,
bool bump_by_eps = false); bool bump_by_eps = false);
std::shared_ptr<Node> get_bias_scale(Output<Node> min_input,
Output<Node> max_input,
Output<Node> min_filter,
Output<Node> max_filter);
std::shared_ptr<Node> get_sum_scale(Output<Node> min_freezed_output_conv_1,
Output<Node> max_freezed_output_conv_1,
Output<Node> min_freezed_output_conv_2,
Output<Node> max_freezed_output_conv_2);
std::shared_ptr<Node> get_dot_scale(Output<Node> min_input,
Output<Node> max_input,
Output<Node> min_filter,
Output<Node> max_filter,
Output<Node> min_freezed_output,
Output<Node> max_freezed_output,
const ngraph::element::Type& input_type,
const ngraph::element::Type& output_type,
const bool requantize = true);
void check_concat(const NodeVector& args,
const NodeVector& mins,
const NodeVector& maxs);
} }
} }
} }
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <memory>
#include "ngraph/builder/quantize_builder.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace builder
{
shared_ptr<Node> QuantizeBuilder(const Output<Node>& input,
const Output<Node>& min,
const Output<Node>& max,
const ngraph::element::Type& quant_type,
const ngraph::AxisSet& axes,
op::Quantize::RoundMode round_mode)
{
auto real_type = input.get_element_type();
if (min.get_element_type() != real_type)
{
throw ngraph_error("QuantizeBuilder: min must match input type");
}
if (max.get_element_type() != real_type)
{
throw ngraph_error("QuantizeBuilder: max must match input type");
}
auto shape = min.get_shape();
if (shape != max.get_shape())
{
throw ngraph_error("QuantizeBuilder: min and max must have same shape");
}
auto zero = make_constant(quant_type, shape, 0);
auto scale = quantization_utils::get_scale(min, max, quant_type, true);
return make_shared<op::Quantize>(input, scale, zero, quant_type, axes, round_mode);
}
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/quantize.hpp"
#include "quantization_utils.hpp"
namespace ngraph
{
namespace builder
{
std::shared_ptr<Node> QuantizeBuilder(const Output<Node>& input,
const Output<Node>& min,
const Output<Node>& max,
const ngraph::element::Type& quant_type,
const ngraph::AxisSet& axes,
op::Quantize::RoundMode round_mode);
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <memory>
#include "ngraph/builder/quantized_concat_builder.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace builder
{
shared_ptr<Node> QuantizedConcatBuilder(const NodeVector& args,
size_t concatenation_axis,
const NodeVector& mins,
const NodeVector& maxs)
{
quantization_utils::check_concat(args, mins, maxs);
auto quant_type = args[0]->get_element_type();
// output scale
auto min = make_shared<op::Min>(make_shared<op::Concat>(mins, 0), ngraph::AxisSet{0});
auto max = make_shared<op::Max>(make_shared<op::Concat>(maxs, 0), ngraph::AxisSet{0});
auto out_scale = quantization_utils::get_scale(min, max, quant_type);
NodeVector rescaled_args(args.size());
for (size_t i = 0; i < args.size(); ++i)
{
auto q_type = args[i]->get_element_type();
auto in_scale = make_shared<ngraph::op::Reshape>(
quantization_utils::get_scale(mins[i], maxs[i], q_type),
AxisVector{0},
Shape{});
auto zero = make_constant(q_type, in_scale->get_shape(), 0);
rescaled_args[i] =
make_shared<op::Dequantize>(args[i], in_scale, zero, element::f32, AxisSet{});
rescaled_args[i] =
make_shared<op::Quantize>(rescaled_args[i],
out_scale,
zero,
q_type,
AxisSet{},
op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN);
}
return make_shared<op::Concat>(rescaled_args, concatenation_axis);
}
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/max.hpp"
#include "ngraph/op/min.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/reshape.hpp"
#include "quantization_utils.hpp"
namespace ngraph
{
namespace builder
{
std::shared_ptr<Node> QuantizedConcatBuilder(const NodeVector& args,
size_t concatenation_axis,
const NodeVector& mins,
const NodeVector& maxs);
}
}
...@@ -18,6 +18,11 @@ ...@@ -18,6 +18,11 @@
#include "ngraph/coordinate_diff.hpp" #include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convert.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/quantized_convolution.hpp" #include "ngraph/op/quantized_convolution.hpp"
#include "quantization_utils.hpp" #include "quantization_utils.hpp"
...@@ -43,5 +48,77 @@ namespace ngraph ...@@ -43,5 +48,77 @@ namespace ngraph
const ngraph::AxisSet& input_axes = ngraph::AxisSet{}, const ngraph::AxisSet& input_axes = ngraph::AxisSet{},
const ngraph::AxisSet& filter_axes = ngraph::AxisSet{}, const ngraph::AxisSet& filter_axes = ngraph::AxisSet{},
const ngraph::AxisSet& output_axes = ngraph::AxisSet{}); const ngraph::AxisSet& output_axes = ngraph::AxisSet{});
std::shared_ptr<Node>
QuantizedConvolutionBiasBuilder(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const bool with_relu = false);
std::shared_ptr<Node>
QuantizedConvolutionReluBuilder(const Output<Node>& input,
const Output<Node>& filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output);
std::shared_ptr<Node>
QuantizedConvolutionBiasAddBuilder(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Output<Node>& sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const Output<Node>& min_sum_input,
const Output<Node>& max_sum_input,
const bool with_relu = false);
std::shared_ptr<Node>
QuantizedConvolutionBiasSignedAddBuilder(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Output<Node>& sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const Output<Node>& min_sum_input,
const Output<Node>& max_sum_input,
const bool with_relu = false);
} }
} }
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#include <memory> #include <memory>
#include "ngraph/builder/quantized_dot_builder.hpp" #include "ngraph/builder/quantized_dot_builder.hpp"
#include "ngraph/op/constant.hpp"
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
...@@ -65,5 +64,45 @@ namespace ngraph ...@@ -65,5 +64,45 @@ namespace ngraph
input1_axes, input1_axes,
output_axes); output_axes);
} }
shared_ptr<Node> QuantizedDotBiasBuilder(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const bool requantize,
const bool with_relu)
{
auto requantization_scale =
quantization_utils::get_dot_scale(min_input,
max_input,
min_filter,
max_filter,
min_output,
max_output,
input.get_element_type(),
with_relu ? element::u8 : element::i8,
requantize);
auto mybias = bias;
if (bias.get_element_type() != element::i32)
{
auto zero = make_constant(element::i32, min_input.get_shape(), 0);
AxisSet quantization_axes;
auto bias_scale = quantization_utils::get_bias_scale(
min_input, max_input, min_filter, max_filter);
op::Quantize::RoundMode round_mode =
op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
mybias = make_shared<op::Quantize>(
bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
}
return make_shared<op::QuantizedDotBias>(
input, filters, mybias, requantization_scale, requantize, with_relu);
}
} }
} }
...@@ -18,6 +18,9 @@ ...@@ -18,6 +18,9 @@
#include "ngraph/coordinate_diff.hpp" #include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/experimental/quantized_dot_bias.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/quantized_dot.hpp" #include "ngraph/op/quantized_dot.hpp"
#include "quantization_utils.hpp" #include "quantization_utils.hpp"
...@@ -38,5 +41,17 @@ namespace ngraph ...@@ -38,5 +41,17 @@ namespace ngraph
const ngraph::AxisSet& input0_axes, const ngraph::AxisSet& input0_axes,
const ngraph::AxisSet& input1_axes, const ngraph::AxisSet& input1_axes,
const ngraph::AxisSet& output_axes); const ngraph::AxisSet& output_axes);
std::shared_ptr<Node> QuantizedDotBiasBuilder(const Output<Node>& input,
const Output<Node>& filters,
const Output<Node>& bias,
const Output<Node>& min_input,
const Output<Node>& max_input,
const Output<Node>& min_filter,
const Output<Node>& max_filter,
const Output<Node>& min_output,
const Output<Node>& max_output,
const bool requantize = true,
const bool with_relu = false);
} }
} }
...@@ -62,8 +62,12 @@ namespace ngraph ...@@ -62,8 +62,12 @@ namespace ngraph
/// recipes, for example auto-broadcast. /// recipes, for example auto-broadcast.
#include "ngraph/builder/autobroadcast.hpp" #include "ngraph/builder/autobroadcast.hpp"
#include "ngraph/builder/dequantize_builder.hpp"
#include "ngraph/builder/numpy_transpose.hpp" #include "ngraph/builder/numpy_transpose.hpp"
#include "ngraph/builder/quantize_builder.hpp"
#include "ngraph/builder/quantized_concat_builder.hpp"
#include "ngraph/builder/quantized_conv_builder.hpp" #include "ngraph/builder/quantized_conv_builder.hpp"
#include "ngraph/builder/quantized_dot_builder.hpp"
#include "ngraph/builder/reduce_ops.hpp" #include "ngraph/builder/reduce_ops.hpp"
#include "ngraph/builder/reshape.hpp" #include "ngraph/builder/reshape.hpp"
#include "ngraph/builder/tensor_mask.hpp" #include "ngraph/builder/tensor_mask.hpp"
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment