Commit 8bd3846f authored by Adam Straw's avatar Adam Straw Committed by Robert Kimball

graph builders for quantize scale (#1976)

* quantize scale passing unit tests

* epsilon bump

* finished with quantization scale

* unit tests passing with convolution scale as builder

* broadcasted constants and cleanup

* api consistency for quant builders

* code style

* cleanup

* newline at EOF

* use requantization_scale

* drop TF license as we are no longer using TF code directly
parent 2a26558a
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/node.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/constant.hpp"
namespace ngraph
{
namespace builder
{
template <class T>
std::shared_ptr<Node>
make_constant(const element::Type& type, const Shape& shape, const T& num)
{
std::shared_ptr<Node> val = nullptr;
if (type == element::f32)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<float>{static_cast<float>(num)});
}
else if (type == element::f64)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<double>{static_cast<double>(num)});
}
else if (type == element::i64)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<int64_t>{static_cast<int64_t>(num)});
}
else if (type == element::i32)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<int32_t>{static_cast<int32_t>(num)});
}
else if (type == element::i16)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<int16_t>{static_cast<int16_t>(num)});
}
else if (type == element::i8)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<int8_t>{static_cast<int8_t>(num)});
}
else if (type == element::u64)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<uint64_t>{static_cast<uint64_t>(num)});
}
else if (type == element::u32)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<uint32_t>{static_cast<uint32_t>(num)});
}
else if (type == element::u16)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<uint16_t>{static_cast<uint16_t>(num)});
}
else if (type == element::u8)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<uint8_t>{static_cast<uint8_t>(num)});
}
else
{
throw ngraph_error("make_constant: Unsupported element type");
}
if (shape.size() > 0)
{
ngraph::AxisSet axes;
for (size_t i = 0; i < shape.size(); i++)
{
axes.insert(i);
}
val = std::make_shared<ngraph::op::Broadcast>(val, shape, axes);
}
return val;
}
}
}
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <memory> #include <memory>
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/builder/quantization.hpp" #include "ngraph/builder/quantization.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "quantization_util.hpp" #include "quantization_util.hpp"
...@@ -30,73 +31,72 @@ namespace ngraph ...@@ -30,73 +31,72 @@ namespace ngraph
std::shared_ptr<Node> ScaledQuantize(std::shared_ptr<Node> input, std::shared_ptr<Node> ScaledQuantize(std::shared_ptr<Node> input,
std::shared_ptr<Node> min, std::shared_ptr<Node> min,
std::shared_ptr<Node> max, std::shared_ptr<Node> max,
const ngraph::element::Type& type, const ngraph::element::Type& quant_type,
const ngraph::AxisSet& axes, const ngraph::AxisSet& axes,
op::Quantize::RoundMode round_mode) op::Quantize::RoundMode round_mode)
{ {
auto offset = op::Constant::create(type, Shape{}, {0}); auto real_type = input->get_element_type();
if (input->get_element_type() == element::f32)
if (min->get_element_type() != real_type)
{ {
float scale = throw ngraph_error("ScaledQuantize: min must match input type");
builder::quantization_util::get_quantization_scale<float>(min, max, type, true);
auto quantize_scale =
op::Constant::create(input->get_element_type(), Shape{}, {scale});
return make_shared<op::Quantize>(
input, quantize_scale, offset, type, axes, round_mode);
} }
else if (input->get_element_type() == element::f64)
if (max->get_element_type() != real_type)
{ {
double scale = builder::quantization_util::get_quantization_scale<double>( throw ngraph_error("ScaledQuantize: max must match input type");
min, max, type, true);
auto quantize_scale =
op::Constant::create(input->get_element_type(), Shape{}, {scale});
return make_shared<op::Quantize>(
input, quantize_scale, offset, type, axes, round_mode);
} }
else
auto shape = min->get_shape();
if (shape != max->get_shape())
{ {
throw ngraph_error("Unsupported quantization element type"); throw ngraph_error("ScaledQuantize: min and max must have same shape");
} }
auto zero = make_constant(quant_type, shape, 0);
auto scale = quantization_util::get_scale(min, max, quant_type, true);
return make_shared<op::Quantize>(input, scale, zero, quant_type, axes, round_mode);
} }
std::shared_ptr<Node> ScaledDequantize(std::shared_ptr<Node> input, std::shared_ptr<Node> ScaledDequantize(std::shared_ptr<Node> input,
std::shared_ptr<Node> min, std::shared_ptr<Node> min,
std::shared_ptr<Node> max, std::shared_ptr<Node> max,
const ngraph::element::Type& type, const ngraph::element::Type& real_type,
const ngraph::AxisSet& axes) const ngraph::AxisSet& axes)
{ {
auto input_et = input->get_element_type(); auto quant_type = input->get_element_type();
auto offset = op::Constant::create(input_et, Shape{}, {0});
if (type == element::f32) if (min->get_element_type() != real_type)
{ {
float scale = throw ngraph_error("ScaledDequantize: min must match output type");
builder::quantization_util::get_quantization_scale<float>(min, max, input_et);
auto dequantize_scale = op::Constant::create(type, Shape{}, {scale});
return make_shared<op::Dequantize>(input, dequantize_scale, offset, type, axes);
} }
else if (type == element::f64)
if (max->get_element_type() != real_type)
{ {
double scale = throw ngraph_error("ScaledDequantize: max must match output type");
builder::quantization_util::get_quantization_scale<double>(min, max, input_et);
auto dequantize_scale = op::Constant::create(type, Shape{}, {scale});
return make_shared<op::Dequantize>(input, dequantize_scale, offset, type, axes);
} }
else
auto shape = min->get_shape();
if (shape != max->get_shape())
{ {
throw ngraph_error("Unsupported dequantization element type"); throw ngraph_error("ScaledDequantize: min and max must have same shape");
} }
auto zero = make_constant(quant_type, shape, 0);
auto scale = quantization_util::get_scale(min, max, quant_type);
return make_shared<op::Dequantize>(input, scale, zero, real_type, axes);
} }
std::shared_ptr<Node> ScaledQuantizedAvgPool(const std::shared_ptr<Node>& arg, std::shared_ptr<Node> ScaledQuantizedAvgPool(std::shared_ptr<Node> input,
const Shape& window_shape, const Shape& window_shape,
const Strides& window_movement_strides, const Strides& window_movement_strides,
const Shape& padding_below, const Shape& padding_below,
const Shape& padding_above, const Shape& padding_above,
bool include_padding_in_avg_computation, bool include_padding_in_avg_computation,
const std::shared_ptr<Node> min, std::shared_ptr<Node> min,
const std::shared_ptr<Node> max) std::shared_ptr<Node> max)
{ {
return make_shared<op::QuantizedAvgPool>(arg, return make_shared<op::QuantizedAvgPool>(input,
window_shape, window_shape,
window_movement_strides, window_movement_strides,
padding_below, padding_below,
...@@ -105,31 +105,30 @@ namespace ngraph ...@@ -105,31 +105,30 @@ namespace ngraph
} }
std::shared_ptr<Node> std::shared_ptr<Node>
ScaledQuantizedConvolutionBias(const std::shared_ptr<Node>& data_batch, ScaledQuantizedConvolutionBias(std::shared_ptr<Node> input,
const std::shared_ptr<Node>& filters, std::shared_ptr<Node> filters,
const std::shared_ptr<Node>& bias, std::shared_ptr<Node> bias,
const Strides& window_movement_strides, const Strides& window_movement_strides,
const Strides& window_dilation_strides, const Strides& window_dilation_strides,
const CoordinateDiff& padding_below, const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above, const CoordinateDiff& padding_above,
const Strides& data_dilation_strides, const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input, std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input, std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter, std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter, std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output, std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output, std::shared_ptr<Node> max_freezed_output,
const bool with_relu) const bool with_relu)
{ {
float scale = builder::quantization_util::get_scale(min_input, auto requantization_scale = quantization_util::get_scale(min_input,
max_input, max_input,
min_filter, min_filter,
max_filter, max_filter,
min_freezed_output, min_freezed_output,
max_freezed_output); max_freezed_output);
auto requantization_scale = op::Constant::create(element::f32, Shape{1}, {scale});
return make_shared<op::QuantizedConvolutionBias>(input,
return make_shared<op::QuantizedConvolutionBias>(data_batch,
filters, filters,
bias, bias,
window_movement_strides, window_movement_strides,
...@@ -142,28 +141,28 @@ namespace ngraph ...@@ -142,28 +141,28 @@ namespace ngraph
} }
std::shared_ptr<Node> std::shared_ptr<Node>
ScaledQuantizedConvolutionRelu(const std::shared_ptr<Node>& data_batch, ScaledQuantizedConvolutionRelu(std::shared_ptr<Node> input,
const std::shared_ptr<Node>& filters, std::shared_ptr<Node> filters,
const Strides& window_movement_strides, const Strides& window_movement_strides,
const Strides& window_dilation_strides, const Strides& window_dilation_strides,
const CoordinateDiff& padding_below, const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above, const CoordinateDiff& padding_above,
const Strides& data_dilation_strides, const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input, std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input, std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter, std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter, std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output, std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output) std::shared_ptr<Node> max_freezed_output)
{ {
float scale = builder::quantization_util::get_scale(min_input, auto requantization_scale = quantization_util::get_scale(min_input,
max_input, max_input,
min_filter, min_filter,
max_filter, max_filter,
min_freezed_output, min_freezed_output,
max_freezed_output); max_freezed_output);
auto requantization_scale = op::Constant::create(element::f32, Shape{1}, {scale});
return make_shared<op::QuantizedConvolutionRelu>(data_batch, return make_shared<op::QuantizedConvolutionRelu>(input,
filters, filters,
window_movement_strides, window_movement_strides,
window_dilation_strides, window_dilation_strides,
...@@ -173,29 +172,28 @@ namespace ngraph ...@@ -173,29 +172,28 @@ namespace ngraph
requantization_scale); requantization_scale);
} }
std::shared_ptr<Node> std::shared_ptr<Node> ScaledQuantizedConvolution(std::shared_ptr<Node> input,
ScaledQuantizedConvolution(const std::shared_ptr<Node>& data_batch, std::shared_ptr<Node> filters,
const std::shared_ptr<Node>& filters, const Strides& window_movement_strides,
const Strides& window_movement_strides, const Strides& window_dilation_strides,
const Strides& window_dilation_strides, const CoordinateDiff& padding_below,
const CoordinateDiff& padding_below, const CoordinateDiff& padding_above,
const CoordinateDiff& padding_above, const Strides& data_dilation_strides,
const Strides& data_dilation_strides, std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> min_input, std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> max_input, std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> min_filter, std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> max_filter, std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> min_freezed_output, std::shared_ptr<Node> max_freezed_output)
const std::shared_ptr<Node> max_freezed_output)
{ {
float scale = builder::quantization_util::get_scale(min_input, auto requantization_scale = quantization_util::get_scale(min_input,
max_input, max_input,
min_filter, min_filter,
max_filter, max_filter,
min_freezed_output, min_freezed_output,
max_freezed_output); max_freezed_output);
auto requantization_scale = op::Constant::create(element::f32, Shape{1}, {scale});
return make_shared<op::QuantizedConvolution>(data_batch, return make_shared<op::QuantizedConvolution>(input,
filters, filters,
window_movement_strides, window_movement_strides,
window_dilation_strides, window_dilation_strides,
...@@ -205,16 +203,16 @@ namespace ngraph ...@@ -205,16 +203,16 @@ namespace ngraph
requantization_scale); requantization_scale);
} }
std::shared_ptr<Node> ScaledQuantizedMaxPool(const std::shared_ptr<Node>& arg, std::shared_ptr<Node> ScaledQuantizedMaxPool(std::shared_ptr<Node> input,
const Shape& window_shape, const Shape& window_shape,
const Strides& window_movement_strides, const Strides& window_movement_strides,
const Shape& padding_below, const Shape& padding_below,
const Shape& padding_above, const Shape& padding_above,
const std::shared_ptr<Node> min, std::shared_ptr<Node> min,
const std::shared_ptr<Node> max) std::shared_ptr<Node> max)
{ {
return make_shared<op::QuantizedMaxPool>( return make_shared<op::QuantizedMaxPool>(
arg, window_shape, window_movement_strides, padding_below, padding_above); input, window_shape, window_movement_strides, padding_below, padding_above);
} }
} }
} }
...@@ -43,68 +43,67 @@ namespace ngraph ...@@ -43,68 +43,67 @@ namespace ngraph
const ngraph::element::Type& type, const ngraph::element::Type& type,
const ngraph::AxisSet& axes); const ngraph::AxisSet& axes);
std::shared_ptr<Node> ScaledQuantizedAvgPool(const std::shared_ptr<Node>& arg, std::shared_ptr<Node> ScaledQuantizedAvgPool(std::shared_ptr<Node> input,
const Shape& window_shape, const Shape& window_shape,
const Strides& window_movement_strides, const Strides& window_movement_strides,
const Shape& padding_below, const Shape& padding_below,
const Shape& padding_above, const Shape& padding_above,
bool include_padding_in_avg_computation, bool include_padding_in_avg_computation,
const std::shared_ptr<Node> min, std::shared_ptr<Node> min,
const std::shared_ptr<Node> max); std::shared_ptr<Node> max);
std::shared_ptr<Node> std::shared_ptr<Node>
ScaledQuantizedConvolutionBias(const std::shared_ptr<Node>& data_batch, ScaledQuantizedConvolutionBias(std::shared_ptr<Node> input,
const std::shared_ptr<Node>& filters, std::shared_ptr<Node> filters,
const std::shared_ptr<Node>& bias, std::shared_ptr<Node> bias,
const Strides& window_movement_strides, const Strides& window_movement_strides,
const Strides& window_dilation_strides, const Strides& window_dilation_strides,
const CoordinateDiff& padding_below, const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above, const CoordinateDiff& padding_above,
const Strides& data_dilation_strides, const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input, std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input, std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter, std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter, std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output, std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output, std::shared_ptr<Node> max_freezed_output,
const bool with_relu = false); const bool with_relu = false);
std::shared_ptr<Node> std::shared_ptr<Node>
ScaledQuantizedConvolutionRelu(const std::shared_ptr<Node>& data_batch, ScaledQuantizedConvolutionRelu(std::shared_ptr<Node> input,
const std::shared_ptr<Node>& filters, std::shared_ptr<Node> filters,
const Strides& window_movement_strides, const Strides& window_movement_strides,
const Strides& window_dilation_strides, const Strides& window_dilation_strides,
const CoordinateDiff& padding_below, const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above, const CoordinateDiff& padding_above,
const Strides& data_dilation_strides, const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input, std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input, std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter, std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter, std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output, std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output); std::shared_ptr<Node> max_freezed_output);
std::shared_ptr<Node> std::shared_ptr<Node> ScaledQuantizedConvolution(std::shared_ptr<Node> input,
ScaledQuantizedConvolution(const std::shared_ptr<Node>& data_batch, std::shared_ptr<Node> filters,
const std::shared_ptr<Node>& filters, const Strides& window_movement_strides,
const Strides& window_movement_strides, const Strides& window_dilation_strides,
const Strides& window_dilation_strides, const CoordinateDiff& padding_below,
const CoordinateDiff& padding_below, const CoordinateDiff& padding_above,
const CoordinateDiff& padding_above, const Strides& data_dilation_strides,
const Strides& data_dilation_strides, std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> min_input, std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> max_input, std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> min_filter, std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> max_filter, std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> min_freezed_output, std::shared_ptr<Node> max_freezed_output);
const std::shared_ptr<Node> max_freezed_output);
std::shared_ptr<Node> ScaledQuantizedMaxPool(const std::shared_ptr<Node>& arg, std::shared_ptr<Node> ScaledQuantizedMaxPool(std::shared_ptr<Node> input,
const Shape& window_shape, const Shape& window_shape,
const Strides& window_movement_strides, const Strides& window_movement_strides,
const Shape& padding_below, const Shape& padding_below,
const Shape& padding_above, const Shape& padding_above,
const std::shared_ptr<Node> min, std::shared_ptr<Node> min,
const std::shared_ptr<Node> max); std::shared_ptr<Node> max);
} }
} }
...@@ -13,25 +13,22 @@ ...@@ -13,25 +13,22 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
//***************************************************************************** //*****************************************************************************
//*******************************************************************************
// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//==============================================================================
#pragma once #pragma once
#include <limits> #include <limits>
#include <vector> #include <vector>
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/op/abs.hpp"
#include "ngraph/op/add.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "ngraph/op/divide.hpp"
#include "ngraph/op/maximum.hpp"
#include "ngraph/op/minimum.hpp"
#include "ngraph/op/multiply.hpp"
#include "ngraph/op/subtract.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
namespace ngraph namespace ngraph
...@@ -40,117 +37,141 @@ namespace ngraph ...@@ -40,117 +37,141 @@ namespace ngraph
{ {
namespace quantization_util namespace quantization_util
{ {
template <class T1, class T2, class T3> std::shared_ptr<Node> max_abs(std::shared_ptr<Node> a, std::shared_ptr<Node> b)
void quantization_range_for_multiplication(
float min_a, float max_a, float min_b, float max_b, float* min_c, float* max_c)
{ {
// begin code copied and pasted (and modified) from auto abs_a = std::make_shared<op::Abs>(a);
// github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/quantization_utils.h auto abs_b = std::make_shared<op::Abs>(b);
float a_one_quant_level = (max_a - min_a) / (std::numeric_limits<T1>::max() - return std::make_shared<op::Maximum>(abs_a, abs_b);
std::numeric_limits<T1>::min());
float b_one_quant_level = (max_b - min_b) / (std::numeric_limits<T2>::max() -
std::numeric_limits<T2>::min());
float c_one_quant_level = a_one_quant_level * b_one_quant_level;
*min_c = c_one_quant_level * std::numeric_limits<T3>::min();
*max_c = c_one_quant_level * std::numeric_limits<T3>::max();
// end code copied and pasted (and modified) from
// github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/quantization_utils.h
} }
float get_scale(const std::shared_ptr<Node> min_input, std::pair<std::shared_ptr<Node>, std::shared_ptr<Node>>
const std::shared_ptr<Node> max_input, quantization_range_for_multiplication(std::shared_ptr<Node> min_a,
const std::shared_ptr<Node> min_filter, std::shared_ptr<Node> max_a,
const std::shared_ptr<Node> max_filter, std::shared_ptr<Node> min_b,
const std::shared_ptr<Node> min_freezed_output, std::shared_ptr<Node> max_b)
const std::shared_ptr<Node> max_freezed_output)
{ {
auto min_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(min_input); auto type = min_a->get_element_type();
auto max_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(max_input); if (type != max_a->get_element_type() || type != min_b->get_element_type() ||
auto min_filter_const_op = type != max_b->get_element_type())
std::static_pointer_cast<ngraph::op::Constant>(min_filter); {
auto max_filter_const_op = throw ngraph_error(
std::static_pointer_cast<ngraph::op::Constant>(max_filter); "quantization_range_for_multiplication: min and max must have same type");
auto min_freezed_output_const_op = }
std::static_pointer_cast<ngraph::op::Constant>(min_freezed_output);
auto max_freezed_output_const_op = auto shape = min_a->get_shape();
std::static_pointer_cast<ngraph::op::Constant>(max_freezed_output); if (shape != max_a->get_shape() || shape != min_b->get_shape() ||
auto input_min = min_input_const_op->get_vector<float>(); shape != max_b->get_shape())
auto input_max = max_input_const_op->get_vector<float>(); {
auto filter_min = min_filter_const_op->get_vector<float>(); throw ngraph_error(
auto filter_max = max_filter_const_op->get_vector<float>(); "quantization_range_for_multiplication: min and max must have same shape");
auto output_min = min_freezed_output_const_op->get_vector<float>(); }
auto output_max = max_freezed_output_const_op->get_vector<float>();
auto u8_range = make_constant(type,
float min_out_value; shape,
float max_out_value; std::numeric_limits<uint8_t>::max() -
quantization_range_for_multiplication<uint8_t, int8_t, int32_t>(input_min[0], std::numeric_limits<uint8_t>::min());
input_max[0], auto i8_range = make_constant(type,
filter_min[0], shape,
filter_max[0], std::numeric_limits<int8_t>::max() -
&min_out_value, std::numeric_limits<int8_t>::min());
&max_out_value);
const float max_abs32 = std::max(std::abs(min_out_value), std::abs(max_out_value)); auto a_one_quant_level = (max_a - min_a) / u8_range;
const float max_abs8 = std::max(std::abs(output_min[0]), std::abs(output_max[0])); auto b_one_quant_level = (max_b - min_b) / i8_range;
auto c_one_quant_level = a_one_quant_level * b_one_quant_level;
auto i32_min = make_constant(type, shape, std::numeric_limits<int32_t>::min());
auto i32_max = make_constant(type, shape, std::numeric_limits<int32_t>::max());
auto min_c = c_one_quant_level * i32_min;
auto max_c = c_one_quant_level * i32_max;
return std::pair<std::shared_ptr<Node>, std::shared_ptr<Node>>(min_c, max_c);
}
std::shared_ptr<Node> get_scale(std::shared_ptr<Node> min_input,
std::shared_ptr<Node> max_input,
std::shared_ptr<Node> min_filter,
std::shared_ptr<Node> max_filter,
std::shared_ptr<Node> min_freezed_output,
std::shared_ptr<Node> max_freezed_output)
{
auto type = min_input->get_element_type();
if (type != max_input->get_element_type() ||
type != min_filter->get_element_type() ||
type != max_filter->get_element_type() ||
type != min_freezed_output->get_element_type() ||
type != max_freezed_output->get_element_type())
{
throw ngraph_error("get_scale: min and max must have same type");
}
auto shape = min_input->get_shape();
if (shape != max_input->get_shape() || shape != min_filter->get_shape() ||
shape != max_filter->get_shape() || shape != min_freezed_output->get_shape() ||
shape != max_freezed_output->get_shape())
{
throw ngraph_error("get_scale: min and max must have same shape");
}
auto ranges = quantization_range_for_multiplication(
min_input, max_input, min_filter, max_filter);
auto min_out_value = ranges.first;
auto max_out_value = ranges.second;
auto max_abs32 = max_abs(min_out_value, max_out_value);
auto max_abs8 = max_abs(min_freezed_output, max_freezed_output);
// Output is signed int. // Output is signed int.
// s32 = f32 * std::pow(2, 31)/ max_abs32; // s32 = f32 * std::pow(2, 31)/ max_abs32;
// s8 = f32 * std::pow(2, 7)/ max_abs8; // s8 = f32 * std::pow(2, 7)/ max_abs8;
// s8 = s32 * std::pow(2, -24) * max_abs32 / max_abs8; // s8 = s32 * std::pow(2, -24) * max_abs32 / max_abs8;
const float scale = static_cast<float>( return make_constant(type, shape, std::pow(2, -24)) * (max_abs32 / max_abs8);
(std::pow(2, -24) * static_cast<double>(max_abs32 / max_abs8)));
return scale;
} }
template <typename T> std::shared_ptr<Node> get_scale(std::shared_ptr<Node> input_min_range,
static inline T get_quantization_scale(const std::shared_ptr<Node> min_input, std::shared_ptr<Node> input_max_range,
const std::shared_ptr<Node> max_input, const ngraph::element::Type& quant_type,
const ngraph::element::Type& type, bool bump_by_eps = false)
bool bump_by_eps = false)
{ {
auto min_input_const_op = auto type = input_min_range->get_element_type();
std::dynamic_pointer_cast<ngraph::op::Constant>(min_input); if (type != input_max_range->get_element_type())
auto max_input_const_op =
std::dynamic_pointer_cast<ngraph::op::Constant>(max_input);
if (min_input_const_op == nullptr)
{ {
throw ngraph_error("min input must be constant"); throw ngraph_error("get_scale: min and max must have same type");
} }
else if (max_input_const_op == nullptr)
auto shape = input_min_range->get_shape();
if (shape != input_max_range->get_shape())
{ {
throw ngraph_error("max input must be constant"); throw ngraph_error("get_scale: min and max must have same shape");
} }
auto input_min_range = min_input_const_op->get_vector<T>(); auto min_range = input_min_range;
auto input_max_range = max_input_const_op->get_vector<T>(); auto max_range = input_max_range;
T min_range = std::numeric_limits<T>::min();
T max_range = std::numeric_limits<T>::max();
if (bump_by_eps) if (bump_by_eps)
{ {
// If input_min_range and input_max_range are close, auto zero = make_constant(type, shape, 0);
// introduce a slightly larger delta between them. min_range = std::make_shared<op::Minimum>(zero, input_min_range);
min_range = std::min(static_cast<T>(0.0f), input_min_range[0]);
const T epsilon = std::max(static_cast<T>(1.0f), auto max_abs_input_range = max_abs(input_min_range, input_max_range);
static_cast<T>(std::max(fabs(input_min_range[0]),
fabs(input_max_range[0])))) / auto one = make_constant(type, shape, 1);
static_cast<T>(100.0f); auto hundred = make_constant(type, shape, 100);
max_range = std::max(input_max_range[0], min_range + epsilon); auto epsilon =
max_range = std::max(static_cast<T>(0.0f), max_range); std::make_shared<op::Maximum>(one, max_abs_input_range) / hundred;
// end code copied and pasted from
// github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/quantize_op.cc max_range = std::make_shared<op::Maximum>(input_max_range, min_range + epsilon);
} max_range = std::make_shared<op::Maximum>(zero, max_range);
else
{
min_range = input_min_range[0];
max_range = input_max_range[0];
} }
const T max_abs = std::max(std::abs(min_range), std::abs(max_range)); size_t bw = quant_type.bitwidth();
const T bitwidth = type.bitwidth(); float range = static_cast<float>(
const T target_range = static_cast<T>( (quant_type.is_signed() ? std::pow(2, (bw - 1)) : std::pow(2, bw)) - 1);
(type.is_signed() ? std::pow(2, (bitwidth - 1)) : std::pow(2, bitwidth)) - 1);
const T scale_factor = max_abs / target_range; auto max_abs_range = max_abs(min_range, max_range);
return scale_factor; auto target_range = make_constant(type, shape, range);
return max_abs_range / target_range;
} }
} }
} }
......
...@@ -45,11 +45,6 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc ...@@ -45,11 +45,6 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc
auto& data_batch_shape = data_batch->get_shape(); auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape(); auto& filters_shape = filters->get_shape();
auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
auto scale_val = scale_const_op->get_vector<float>();
this->m_scale = scale_val[0];
set_output_type(0, set_output_type(0,
element::i8, element::i8,
util::infer_convolution_output_shape(this, util::infer_convolution_output_shape(this,
......
...@@ -41,7 +41,6 @@ namespace ngraph ...@@ -41,7 +41,6 @@ namespace ngraph
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; } const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
std::shared_ptr<Node> get_filters() { return get_argument(1); } std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); } std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
float get_scale() const { return m_scale; }
virtual std::shared_ptr<Node> virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override; copy_with_new_args(const NodeVector& new_args) const override;
...@@ -51,7 +50,6 @@ namespace ngraph ...@@ -51,7 +50,6 @@ namespace ngraph
CoordinateDiff m_padding_below; CoordinateDiff m_padding_below;
CoordinateDiff m_padding_above; CoordinateDiff m_padding_above;
Strides m_data_dilation_strides; Strides m_data_dilation_strides;
float m_scale;
}; };
} }
} }
...@@ -49,10 +49,6 @@ op::QuantizedConvolutionBias::QuantizedConvolutionBias(const shared_ptr<Node>& d ...@@ -49,10 +49,6 @@ op::QuantizedConvolutionBias::QuantizedConvolutionBias(const shared_ptr<Node>& d
auto& data_batch_shape = data_batch->get_shape(); auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape(); auto& filters_shape = filters->get_shape();
auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
auto scale_val = scale_const_op->get_vector<float>();
this->m_scale = scale_val[0];
// TODO: call ngraph util // TODO: call ngraph util
// util::validate_convbias_shapes(data_batch_shape, filters_shape, bias->get_shape()); // util::validate_convbias_shapes(data_batch_shape, filters_shape, bias->get_shape());
......
...@@ -47,7 +47,6 @@ namespace ngraph ...@@ -47,7 +47,6 @@ namespace ngraph
const CoordinateDiff& get_padding_below() const { return m_padding_below; } const CoordinateDiff& get_padding_below() const { return m_padding_below; }
const CoordinateDiff& get_padding_above() const { return m_padding_above; } const CoordinateDiff& get_padding_above() const { return m_padding_above; }
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; } const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
float get_scale() const { return m_scale; }
std::shared_ptr<Node> get_bias() { return get_argument(2); } std::shared_ptr<Node> get_bias() { return get_argument(2); }
std::shared_ptr<Node> get_filters() { return get_argument(1); } std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); } std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
...@@ -62,7 +61,6 @@ namespace ngraph ...@@ -62,7 +61,6 @@ namespace ngraph
CoordinateDiff m_padding_above; CoordinateDiff m_padding_above;
Strides m_data_dilation_strides; Strides m_data_dilation_strides;
bool m_with_relu; bool m_with_relu;
float m_scale;
}; };
} }
} }
...@@ -44,10 +44,6 @@ op::QuantizedConvolutionRelu::QuantizedConvolutionRelu(const std::shared_ptr<Nod ...@@ -44,10 +44,6 @@ op::QuantizedConvolutionRelu::QuantizedConvolutionRelu(const std::shared_ptr<Nod
auto& data_batch_shape = data_batch->get_shape(); auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape(); auto& filters_shape = filters->get_shape();
auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
float scale_val = *(static_cast<float const*>(scale_const_op->get_data_ptr()));
this->m_scale = scale_val;
set_output_type(0, set_output_type(0,
element::u8, element::u8,
util::infer_convolution_output_shape(this, util::infer_convolution_output_shape(this,
......
...@@ -43,7 +43,6 @@ namespace ngraph ...@@ -43,7 +43,6 @@ namespace ngraph
const CoordinateDiff& get_padding_below() const { return m_padding_below; } const CoordinateDiff& get_padding_below() const { return m_padding_below; }
const CoordinateDiff& get_padding_above() const { return m_padding_above; } const CoordinateDiff& get_padding_above() const { return m_padding_above; }
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; } const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
float get_scale() const { return m_scale; }
std::shared_ptr<Node> get_filters() { return get_argument(1); } std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); } std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
virtual std::shared_ptr<Node> virtual std::shared_ptr<Node>
...@@ -55,7 +54,6 @@ namespace ngraph ...@@ -55,7 +54,6 @@ namespace ngraph
CoordinateDiff m_padding_below; CoordinateDiff m_padding_below;
CoordinateDiff m_padding_above; CoordinateDiff m_padding_above;
Strides m_data_dilation_strides; Strides m_data_dilation_strides;
float m_scale;
}; };
} }
} }
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "ngraph/op/negative.hpp" #include "ngraph/op/negative.hpp"
#include "ngraph/op/pad.hpp" #include "ngraph/op/pad.hpp"
#include "ngraph/op/quantize.hpp" #include "ngraph/op/quantize.hpp"
#include "ngraph/op/relu.hpp"
#include "ngraph/op/reshape.hpp" #include "ngraph/op/reshape.hpp"
#include "ngraph/op/subtract.hpp" #include "ngraph/op/subtract.hpp"
#include "ngraph/pattern/matcher.hpp" #include "ngraph/pattern/matcher.hpp"
...@@ -45,6 +46,7 @@ ...@@ -45,6 +46,7 @@
#include "ngraph/runtime/reference/negate.hpp" #include "ngraph/runtime/reference/negate.hpp"
#include "ngraph/runtime/reference/pad.hpp" #include "ngraph/runtime/reference/pad.hpp"
#include "ngraph/runtime/reference/quantize.hpp" #include "ngraph/runtime/reference/quantize.hpp"
#include "ngraph/runtime/reference/relu.hpp"
#include "ngraph/runtime/reference/reshape.hpp" #include "ngraph/runtime/reference/reshape.hpp"
#include "ngraph/runtime/reference/subtract.hpp" #include "ngraph/runtime/reference/subtract.hpp"
...@@ -378,7 +380,8 @@ void ngraph::pass::ConstantFolding::construct_constant_binary() ...@@ -378,7 +380,8 @@ void ngraph::pass::ConstantFolding::construct_constant_binary()
bool is_supported_unary_op(std::shared_ptr<Node> n) bool is_supported_unary_op(std::shared_ptr<Node> n)
{ {
return std::dynamic_pointer_cast<op::Abs>(n) || std::dynamic_pointer_cast<op::Negative>(n); return std::dynamic_pointer_cast<op::Abs>(n) || std::dynamic_pointer_cast<op::Negative>(n) ||
std::dynamic_pointer_cast<op::Relu>(n);
} }
template <class T> template <class T>
...@@ -398,6 +401,11 @@ shared_ptr<op::Constant> make_constant_unary(shared_ptr<op::Constant> constant, ...@@ -398,6 +401,11 @@ shared_ptr<op::Constant> make_constant_unary(shared_ptr<op::Constant> constant,
runtime::reference::negate<T>( runtime::reference::negate<T>(
constant->get_vector<T>().data(), out_vec.data(), shape_size(out_shape)); constant->get_vector<T>().data(), out_vec.data(), shape_size(out_shape));
} }
else if (std::dynamic_pointer_cast<op::Relu>(unary))
{
runtime::reference::relu<T>(
constant->get_vector<T>().data(), out_vec.data(), shape_size(out_shape));
}
else else
{ {
NGRAPH_ASSERT(false) << "must be consistent with is_supported_unary_op"; NGRAPH_ASSERT(false) << "must be consistent with is_supported_unary_op";
......
...@@ -118,6 +118,7 @@ ...@@ -118,6 +118,7 @@
#include "ngraph/op/topk.hpp" #include "ngraph/op/topk.hpp"
#include "ngraph/pass/algebraic_simplification.hpp" #include "ngraph/pass/algebraic_simplification.hpp"
#include "ngraph/pass/common_function_collection.hpp" #include "ngraph/pass/common_function_collection.hpp"
#include "ngraph/pass/constant_folding.hpp"
#include "ngraph/pass/core_fusion.hpp" #include "ngraph/pass/core_fusion.hpp"
#include "ngraph/pass/cse.hpp" #include "ngraph/pass/cse.hpp"
#include "ngraph/pass/dump_sorted.hpp" #include "ngraph/pass/dump_sorted.hpp"
...@@ -1049,6 +1050,7 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes(ngraph::pass::Ma ...@@ -1049,6 +1050,7 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes(ngraph::pass::Ma
NodeVector nv_cwi; // We dont need CPUWorkspaceInsertion to return list of indices NodeVector nv_cwi; // We dont need CPUWorkspaceInsertion to return list of indices
pass_manager.register_pass<runtime::cpu::pass::CPUWorkspaceInsertion>(nv_cwi, false); pass_manager.register_pass<runtime::cpu::pass::CPUWorkspaceInsertion>(nv_cwi, false);
pass_manager.register_pass<runtime::cpu::pass::CPUAssignment>(this); pass_manager.register_pass<runtime::cpu::pass::CPUAssignment>(this);
pass_manager.register_pass<ngraph::pass::ConstantFolding>();
pass_manager.register_pass<runtime::cpu::pass::CPULayout>(this); pass_manager.register_pass<runtime::cpu::pass::CPULayout>(this);
pass_manager.register_pass<ngraph::pass::CommonSubexpressionElimination>( pass_manager.register_pass<ngraph::pass::CommonSubexpressionElimination>(
runtime::cpu::get_cse_handlers_map()); runtime::cpu::get_cse_handlers_map());
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "ngraph/coordinate_diff.hpp" #include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convolution.hpp" #include "ngraph/op/convolution.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp" #include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp" #include "ngraph/op/experimental/quantized_conv_bias.hpp"
...@@ -226,6 +227,16 @@ namespace ngraph ...@@ -226,6 +227,16 @@ namespace ngraph
} }
else if (std::is_same<OP, ngraph::op::QuantizedConvolution>()) else if (std::is_same<OP, ngraph::op::QuantizedConvolution>())
{ {
auto qc = dynamic_cast<const ngraph::op::QuantizedConvolution*>(node);
auto scale_const_op =
std::dynamic_pointer_cast<ngraph::op::Constant>(qc->get_arguments()[2]);
if (scale_const_op == nullptr)
{
throw ngraph_error("QuantizedConvolution scale must be a Constant");
}
auto scale_val = scale_const_op->get_vector<float>();
return build_quantized_convolution( return build_quantized_convolution(
data_desc, data_desc,
weights_desc, weights_desc,
...@@ -234,12 +245,21 @@ namespace ngraph ...@@ -234,12 +245,21 @@ namespace ngraph
window_dilation_strides_adjusted, window_dilation_strides_adjusted,
convolution->get_padding_below(), convolution->get_padding_below(),
convolution->get_padding_above(), convolution->get_padding_above(),
(dynamic_cast<const ngraph::op::QuantizedConvolution*>(node)) scale_val[0],
->get_scale(),
ops); ops);
} }
else if (std::is_same<OP, ngraph::op::QuantizedConvolutionRelu>()) else if (std::is_same<OP, ngraph::op::QuantizedConvolutionRelu>())
{ {
auto qcr = dynamic_cast<const ngraph::op::QuantizedConvolutionRelu*>(node);
auto scale_const_op = std::dynamic_pointer_cast<ngraph::op::Constant>(
qcr->get_arguments()[2]);
if (scale_const_op == nullptr)
{
throw ngraph_error("QuantizedConvolutionRelu scale must be a Constant");
}
auto scale_val = scale_const_op->get_vector<float>();
return build_quantized_convolution( return build_quantized_convolution(
data_desc, data_desc,
weights_desc, weights_desc,
...@@ -248,12 +268,21 @@ namespace ngraph ...@@ -248,12 +268,21 @@ namespace ngraph
window_dilation_strides_adjusted, window_dilation_strides_adjusted,
convolution->get_padding_below(), convolution->get_padding_below(),
convolution->get_padding_above(), convolution->get_padding_above(),
(dynamic_cast<const ngraph::op::QuantizedConvolutionRelu*>(node)) scale_val[0],
->get_scale(),
ops); ops);
} }
else if (std::is_same<OP, ngraph::op::QuantizedConvolutionBias>()) else if (std::is_same<OP, ngraph::op::QuantizedConvolutionBias>())
{ {
auto qcb = dynamic_cast<const ngraph::op::QuantizedConvolutionBias*>(node);
auto scale_const_op = std::dynamic_pointer_cast<ngraph::op::Constant>(
qcb->get_arguments()[3]);
if (scale_const_op == nullptr)
{
throw ngraph_error("QuantizedConvolutionBias scale must be a Constant");
}
auto scale_val = scale_const_op->get_vector<float>();
// conv+bias = cvt_to_int8(scale*(dst + bias)) // conv+bias = cvt_to_int8(scale*(dst + bias))
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2); auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
return build_quantized_convolution( return build_quantized_convolution(
...@@ -265,8 +294,7 @@ namespace ngraph ...@@ -265,8 +294,7 @@ namespace ngraph
window_dilation_strides_adjusted, window_dilation_strides_adjusted,
convolution->get_padding_below(), convolution->get_padding_below(),
convolution->get_padding_above(), convolution->get_padding_above(),
(dynamic_cast<const ngraph::op::QuantizedConvolutionBias*>(node)) scale_val[0],
->get_scale(),
ops); ops);
} }
else else
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment