Commit 8bd3846f authored by Adam Straw's avatar Adam Straw Committed by Robert Kimball

graph builders for quantize scale (#1976)

* quantize scale passing unit tests

* epsilon bump

* finished with quantization scale

* unit tests passing with convolution scale as builder

* broadcasted constants and cleanup

* api consistency for quant builders

* code style

* cleanup

* newline at EOF

* use requantization_scale

* drop TF license as we are no longer using TF code directly
parent 2a26558a
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/node.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/constant.hpp"
namespace ngraph
{
namespace builder
{
template <class T>
std::shared_ptr<Node>
make_constant(const element::Type& type, const Shape& shape, const T& num)
{
std::shared_ptr<Node> val = nullptr;
if (type == element::f32)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<float>{static_cast<float>(num)});
}
else if (type == element::f64)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<double>{static_cast<double>(num)});
}
else if (type == element::i64)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<int64_t>{static_cast<int64_t>(num)});
}
else if (type == element::i32)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<int32_t>{static_cast<int32_t>(num)});
}
else if (type == element::i16)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<int16_t>{static_cast<int16_t>(num)});
}
else if (type == element::i8)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<int8_t>{static_cast<int8_t>(num)});
}
else if (type == element::u64)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<uint64_t>{static_cast<uint64_t>(num)});
}
else if (type == element::u32)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<uint32_t>{static_cast<uint32_t>(num)});
}
else if (type == element::u16)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<uint16_t>{static_cast<uint16_t>(num)});
}
else if (type == element::u8)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<uint8_t>{static_cast<uint8_t>(num)});
}
else
{
throw ngraph_error("make_constant: Unsupported element type");
}
if (shape.size() > 0)
{
ngraph::AxisSet axes;
for (size_t i = 0; i < shape.size(); i++)
{
axes.insert(i);
}
val = std::make_shared<ngraph::op::Broadcast>(val, shape, axes);
}
return val;
}
}
}
......@@ -16,6 +16,7 @@
#include <memory>
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/builder/quantization.hpp"
#include "ngraph/op/constant.hpp"
#include "quantization_util.hpp"
......@@ -30,73 +31,72 @@ namespace ngraph
std::shared_ptr<Node> ScaledQuantize(std::shared_ptr<Node> input,
std::shared_ptr<Node> min,
std::shared_ptr<Node> max,
const ngraph::element::Type& type,
const ngraph::element::Type& quant_type,
const ngraph::AxisSet& axes,
op::Quantize::RoundMode round_mode)
{
auto offset = op::Constant::create(type, Shape{}, {0});
if (input->get_element_type() == element::f32)
auto real_type = input->get_element_type();
if (min->get_element_type() != real_type)
{
float scale =
builder::quantization_util::get_quantization_scale<float>(min, max, type, true);
auto quantize_scale =
op::Constant::create(input->get_element_type(), Shape{}, {scale});
return make_shared<op::Quantize>(
input, quantize_scale, offset, type, axes, round_mode);
throw ngraph_error("ScaledQuantize: min must match input type");
}
else if (input->get_element_type() == element::f64)
if (max->get_element_type() != real_type)
{
double scale = builder::quantization_util::get_quantization_scale<double>(
min, max, type, true);
auto quantize_scale =
op::Constant::create(input->get_element_type(), Shape{}, {scale});
return make_shared<op::Quantize>(
input, quantize_scale, offset, type, axes, round_mode);
throw ngraph_error("ScaledQuantize: max must match input type");
}
else
auto shape = min->get_shape();
if (shape != max->get_shape())
{
throw ngraph_error("Unsupported quantization element type");
throw ngraph_error("ScaledQuantize: min and max must have same shape");
}
auto zero = make_constant(quant_type, shape, 0);
auto scale = quantization_util::get_scale(min, max, quant_type, true);
return make_shared<op::Quantize>(input, scale, zero, quant_type, axes, round_mode);
}
std::shared_ptr<Node> ScaledDequantize(std::shared_ptr<Node> input,
std::shared_ptr<Node> min,
std::shared_ptr<Node> max,
const ngraph::element::Type& type,
const ngraph::element::Type& real_type,
const ngraph::AxisSet& axes)
{
auto input_et = input->get_element_type();
auto offset = op::Constant::create(input_et, Shape{}, {0});
if (type == element::f32)
auto quant_type = input->get_element_type();
if (min->get_element_type() != real_type)
{
float scale =
builder::quantization_util::get_quantization_scale<float>(min, max, input_et);
auto dequantize_scale = op::Constant::create(type, Shape{}, {scale});
return make_shared<op::Dequantize>(input, dequantize_scale, offset, type, axes);
throw ngraph_error("ScaledDequantize: min must match output type");
}
else if (type == element::f64)
if (max->get_element_type() != real_type)
{
double scale =
builder::quantization_util::get_quantization_scale<double>(min, max, input_et);
auto dequantize_scale = op::Constant::create(type, Shape{}, {scale});
return make_shared<op::Dequantize>(input, dequantize_scale, offset, type, axes);
throw ngraph_error("ScaledDequantize: max must match output type");
}
else
auto shape = min->get_shape();
if (shape != max->get_shape())
{
throw ngraph_error("Unsupported dequantization element type");
throw ngraph_error("ScaledDequantize: min and max must have same shape");
}
auto zero = make_constant(quant_type, shape, 0);
auto scale = quantization_util::get_scale(min, max, quant_type);
return make_shared<op::Dequantize>(input, scale, zero, real_type, axes);
}
std::shared_ptr<Node> ScaledQuantizedAvgPool(const std::shared_ptr<Node>& arg,
std::shared_ptr<Node> ScaledQuantizedAvgPool(std::shared_ptr<Node> input,
const Shape& window_shape,
const Strides& window_movement_strides,
const Shape& padding_below,
const Shape& padding_above,
bool include_padding_in_avg_computation,
const std::shared_ptr<Node> min,
const std::shared_ptr<Node> max)
std::shared_ptr<Node> min,
std::shared_ptr<Node> max)
{
return make_shared<op::QuantizedAvgPool>(arg,
return make_shared<op::QuantizedAvgPool>(input,
window_shape,
window_movement_strides,
padding_below,
......@@ -105,31 +105,30 @@ namespace ngraph
}
std::shared_ptr<Node>
ScaledQuantizedConvolutionBias(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const std::shared_ptr<Node>& bias,
ScaledQuantizedConvolutionBias(std::shared_ptr<Node> input,
std::shared_ptr<Node> filters,
std::shared_ptr<Node> bias,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output,
std::shared_ptr<Node> min_input,
std::shared_ptr<Node> max_input,
std::shared_ptr<Node> min_filter,
std::shared_ptr<Node> max_filter,
std::shared_ptr<Node> min_freezed_output,
std::shared_ptr<Node> max_freezed_output,
const bool with_relu)
{
float scale = builder::quantization_util::get_scale(min_input,
max_input,
min_filter,
max_filter,
min_freezed_output,
max_freezed_output);
auto requantization_scale = op::Constant::create(element::f32, Shape{1}, {scale});
return make_shared<op::QuantizedConvolutionBias>(data_batch,
auto requantization_scale = quantization_util::get_scale(min_input,
max_input,
min_filter,
max_filter,
min_freezed_output,
max_freezed_output);
return make_shared<op::QuantizedConvolutionBias>(input,
filters,
bias,
window_movement_strides,
......@@ -142,28 +141,28 @@ namespace ngraph
}
std::shared_ptr<Node>
ScaledQuantizedConvolutionRelu(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
ScaledQuantizedConvolutionRelu(std::shared_ptr<Node> input,
std::shared_ptr<Node> filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output)
std::shared_ptr<Node> min_input,
std::shared_ptr<Node> max_input,
std::shared_ptr<Node> min_filter,
std::shared_ptr<Node> max_filter,
std::shared_ptr<Node> min_freezed_output,
std::shared_ptr<Node> max_freezed_output)
{
float scale = builder::quantization_util::get_scale(min_input,
max_input,
min_filter,
max_filter,
min_freezed_output,
max_freezed_output);
auto requantization_scale = op::Constant::create(element::f32, Shape{1}, {scale});
return make_shared<op::QuantizedConvolutionRelu>(data_batch,
auto requantization_scale = quantization_util::get_scale(min_input,
max_input,
min_filter,
max_filter,
min_freezed_output,
max_freezed_output);
return make_shared<op::QuantizedConvolutionRelu>(input,
filters,
window_movement_strides,
window_dilation_strides,
......@@ -173,29 +172,28 @@ namespace ngraph
requantization_scale);
}
std::shared_ptr<Node>
ScaledQuantizedConvolution(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output)
std::shared_ptr<Node> ScaledQuantizedConvolution(std::shared_ptr<Node> input,
std::shared_ptr<Node> filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
std::shared_ptr<Node> min_input,
std::shared_ptr<Node> max_input,
std::shared_ptr<Node> min_filter,
std::shared_ptr<Node> max_filter,
std::shared_ptr<Node> min_freezed_output,
std::shared_ptr<Node> max_freezed_output)
{
float scale = builder::quantization_util::get_scale(min_input,
max_input,
min_filter,
max_filter,
min_freezed_output,
max_freezed_output);
auto requantization_scale = op::Constant::create(element::f32, Shape{1}, {scale});
return make_shared<op::QuantizedConvolution>(data_batch,
auto requantization_scale = quantization_util::get_scale(min_input,
max_input,
min_filter,
max_filter,
min_freezed_output,
max_freezed_output);
return make_shared<op::QuantizedConvolution>(input,
filters,
window_movement_strides,
window_dilation_strides,
......@@ -205,16 +203,16 @@ namespace ngraph
requantization_scale);
}
std::shared_ptr<Node> ScaledQuantizedMaxPool(const std::shared_ptr<Node>& arg,
std::shared_ptr<Node> ScaledQuantizedMaxPool(std::shared_ptr<Node> input,
const Shape& window_shape,
const Strides& window_movement_strides,
const Shape& padding_below,
const Shape& padding_above,
const std::shared_ptr<Node> min,
const std::shared_ptr<Node> max)
std::shared_ptr<Node> min,
std::shared_ptr<Node> max)
{
return make_shared<op::QuantizedMaxPool>(
arg, window_shape, window_movement_strides, padding_below, padding_above);
input, window_shape, window_movement_strides, padding_below, padding_above);
}
}
}
......@@ -43,68 +43,67 @@ namespace ngraph
const ngraph::element::Type& type,
const ngraph::AxisSet& axes);
std::shared_ptr<Node> ScaledQuantizedAvgPool(const std::shared_ptr<Node>& arg,
std::shared_ptr<Node> ScaledQuantizedAvgPool(std::shared_ptr<Node> input,
const Shape& window_shape,
const Strides& window_movement_strides,
const Shape& padding_below,
const Shape& padding_above,
bool include_padding_in_avg_computation,
const std::shared_ptr<Node> min,
const std::shared_ptr<Node> max);
std::shared_ptr<Node> min,
std::shared_ptr<Node> max);
std::shared_ptr<Node>
ScaledQuantizedConvolutionBias(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const std::shared_ptr<Node>& bias,
ScaledQuantizedConvolutionBias(std::shared_ptr<Node> input,
std::shared_ptr<Node> filters,
std::shared_ptr<Node> bias,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output,
std::shared_ptr<Node> min_input,
std::shared_ptr<Node> max_input,
std::shared_ptr<Node> min_filter,
std::shared_ptr<Node> max_filter,
std::shared_ptr<Node> min_freezed_output,
std::shared_ptr<Node> max_freezed_output,
const bool with_relu = false);
std::shared_ptr<Node>
ScaledQuantizedConvolutionRelu(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
ScaledQuantizedConvolutionRelu(std::shared_ptr<Node> input,
std::shared_ptr<Node> filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output);
std::shared_ptr<Node> min_input,
std::shared_ptr<Node> max_input,
std::shared_ptr<Node> min_filter,
std::shared_ptr<Node> max_filter,
std::shared_ptr<Node> min_freezed_output,
std::shared_ptr<Node> max_freezed_output);
std::shared_ptr<Node>
ScaledQuantizedConvolution(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output);
std::shared_ptr<Node> ScaledQuantizedConvolution(std::shared_ptr<Node> input,
std::shared_ptr<Node> filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
std::shared_ptr<Node> min_input,
std::shared_ptr<Node> max_input,
std::shared_ptr<Node> min_filter,
std::shared_ptr<Node> max_filter,
std::shared_ptr<Node> min_freezed_output,
std::shared_ptr<Node> max_freezed_output);
std::shared_ptr<Node> ScaledQuantizedMaxPool(const std::shared_ptr<Node>& arg,
std::shared_ptr<Node> ScaledQuantizedMaxPool(std::shared_ptr<Node> input,
const Shape& window_shape,
const Strides& window_movement_strides,
const Shape& padding_below,
const Shape& padding_above,
const std::shared_ptr<Node> min,
const std::shared_ptr<Node> max);
std::shared_ptr<Node> min,
std::shared_ptr<Node> max);
}
}
......@@ -13,25 +13,22 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
//*******************************************************************************
// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//==============================================================================
#pragma once
#include <limits>
#include <vector>
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/abs.hpp"
#include "ngraph/op/add.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/divide.hpp"
#include "ngraph/op/maximum.hpp"
#include "ngraph/op/minimum.hpp"
#include "ngraph/op/multiply.hpp"
#include "ngraph/op/subtract.hpp"
#include "ngraph/util.hpp"
namespace ngraph
......@@ -40,117 +37,141 @@ namespace ngraph
{
namespace quantization_util
{
template <class T1, class T2, class T3>
void quantization_range_for_multiplication(
float min_a, float max_a, float min_b, float max_b, float* min_c, float* max_c)
std::shared_ptr<Node> max_abs(std::shared_ptr<Node> a, std::shared_ptr<Node> b)
{
// begin code copied and pasted (and modified) from
// github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/quantization_utils.h
float a_one_quant_level = (max_a - min_a) / (std::numeric_limits<T1>::max() -
std::numeric_limits<T1>::min());
float b_one_quant_level = (max_b - min_b) / (std::numeric_limits<T2>::max() -
std::numeric_limits<T2>::min());
float c_one_quant_level = a_one_quant_level * b_one_quant_level;
*min_c = c_one_quant_level * std::numeric_limits<T3>::min();
*max_c = c_one_quant_level * std::numeric_limits<T3>::max();
// end code copied and pasted (and modified) from
// github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/quantization_utils.h
auto abs_a = std::make_shared<op::Abs>(a);
auto abs_b = std::make_shared<op::Abs>(b);
return std::make_shared<op::Maximum>(abs_a, abs_b);
}
float get_scale(const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output)
std::pair<std::shared_ptr<Node>, std::shared_ptr<Node>>
quantization_range_for_multiplication(std::shared_ptr<Node> min_a,
std::shared_ptr<Node> max_a,
std::shared_ptr<Node> min_b,
std::shared_ptr<Node> max_b)
{
auto min_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(min_input);
auto max_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(max_input);
auto min_filter_const_op =
std::static_pointer_cast<ngraph::op::Constant>(min_filter);
auto max_filter_const_op =
std::static_pointer_cast<ngraph::op::Constant>(max_filter);
auto min_freezed_output_const_op =
std::static_pointer_cast<ngraph::op::Constant>(min_freezed_output);
auto max_freezed_output_const_op =
std::static_pointer_cast<ngraph::op::Constant>(max_freezed_output);
auto input_min = min_input_const_op->get_vector<float>();
auto input_max = max_input_const_op->get_vector<float>();
auto filter_min = min_filter_const_op->get_vector<float>();
auto filter_max = max_filter_const_op->get_vector<float>();
auto output_min = min_freezed_output_const_op->get_vector<float>();
auto output_max = max_freezed_output_const_op->get_vector<float>();
float min_out_value;
float max_out_value;
quantization_range_for_multiplication<uint8_t, int8_t, int32_t>(input_min[0],
input_max[0],
filter_min[0],
filter_max[0],
&min_out_value,
&max_out_value);
const float max_abs32 = std::max(std::abs(min_out_value), std::abs(max_out_value));
const float max_abs8 = std::max(std::abs(output_min[0]), std::abs(output_max[0]));
auto type = min_a->get_element_type();
if (type != max_a->get_element_type() || type != min_b->get_element_type() ||
type != max_b->get_element_type())
{
throw ngraph_error(
"quantization_range_for_multiplication: min and max must have same type");
}
auto shape = min_a->get_shape();
if (shape != max_a->get_shape() || shape != min_b->get_shape() ||
shape != max_b->get_shape())
{
throw ngraph_error(
"quantization_range_for_multiplication: min and max must have same shape");
}
auto u8_range = make_constant(type,
shape,
std::numeric_limits<uint8_t>::max() -
std::numeric_limits<uint8_t>::min());
auto i8_range = make_constant(type,
shape,
std::numeric_limits<int8_t>::max() -
std::numeric_limits<int8_t>::min());
auto a_one_quant_level = (max_a - min_a) / u8_range;
auto b_one_quant_level = (max_b - min_b) / i8_range;
auto c_one_quant_level = a_one_quant_level * b_one_quant_level;
auto i32_min = make_constant(type, shape, std::numeric_limits<int32_t>::min());
auto i32_max = make_constant(type, shape, std::numeric_limits<int32_t>::max());
auto min_c = c_one_quant_level * i32_min;
auto max_c = c_one_quant_level * i32_max;
return std::pair<std::shared_ptr<Node>, std::shared_ptr<Node>>(min_c, max_c);
}
std::shared_ptr<Node> get_scale(std::shared_ptr<Node> min_input,
std::shared_ptr<Node> max_input,
std::shared_ptr<Node> min_filter,
std::shared_ptr<Node> max_filter,
std::shared_ptr<Node> min_freezed_output,
std::shared_ptr<Node> max_freezed_output)
{
auto type = min_input->get_element_type();
if (type != max_input->get_element_type() ||
type != min_filter->get_element_type() ||
type != max_filter->get_element_type() ||
type != min_freezed_output->get_element_type() ||
type != max_freezed_output->get_element_type())
{
throw ngraph_error("get_scale: min and max must have same type");
}
auto shape = min_input->get_shape();
if (shape != max_input->get_shape() || shape != min_filter->get_shape() ||
shape != max_filter->get_shape() || shape != min_freezed_output->get_shape() ||
shape != max_freezed_output->get_shape())
{
throw ngraph_error("get_scale: min and max must have same shape");
}
auto ranges = quantization_range_for_multiplication(
min_input, max_input, min_filter, max_filter);
auto min_out_value = ranges.first;
auto max_out_value = ranges.second;
auto max_abs32 = max_abs(min_out_value, max_out_value);
auto max_abs8 = max_abs(min_freezed_output, max_freezed_output);
// Output is signed int.
// s32 = f32 * std::pow(2, 31)/ max_abs32;
// s8 = f32 * std::pow(2, 7)/ max_abs8;
// s8 = s32 * std::pow(2, -24) * max_abs32 / max_abs8;
const float scale = static_cast<float>(
(std::pow(2, -24) * static_cast<double>(max_abs32 / max_abs8)));
return scale;
return make_constant(type, shape, std::pow(2, -24)) * (max_abs32 / max_abs8);
}
template <typename T>
static inline T get_quantization_scale(const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const ngraph::element::Type& type,
bool bump_by_eps = false)
std::shared_ptr<Node> get_scale(std::shared_ptr<Node> input_min_range,
std::shared_ptr<Node> input_max_range,
const ngraph::element::Type& quant_type,
bool bump_by_eps = false)
{
auto min_input_const_op =
std::dynamic_pointer_cast<ngraph::op::Constant>(min_input);
auto max_input_const_op =
std::dynamic_pointer_cast<ngraph::op::Constant>(max_input);
if (min_input_const_op == nullptr)
auto type = input_min_range->get_element_type();
if (type != input_max_range->get_element_type())
{
throw ngraph_error("min input must be constant");
throw ngraph_error("get_scale: min and max must have same type");
}
else if (max_input_const_op == nullptr)
auto shape = input_min_range->get_shape();
if (shape != input_max_range->get_shape())
{
throw ngraph_error("max input must be constant");
throw ngraph_error("get_scale: min and max must have same shape");
}
auto input_min_range = min_input_const_op->get_vector<T>();
auto input_max_range = max_input_const_op->get_vector<T>();
auto min_range = input_min_range;
auto max_range = input_max_range;
T min_range = std::numeric_limits<T>::min();
T max_range = std::numeric_limits<T>::max();
if (bump_by_eps)
{
// If input_min_range and input_max_range are close,
// introduce a slightly larger delta between them.
min_range = std::min(static_cast<T>(0.0f), input_min_range[0]);
const T epsilon = std::max(static_cast<T>(1.0f),
static_cast<T>(std::max(fabs(input_min_range[0]),
fabs(input_max_range[0])))) /
static_cast<T>(100.0f);
max_range = std::max(input_max_range[0], min_range + epsilon);
max_range = std::max(static_cast<T>(0.0f), max_range);
// end code copied and pasted from
// github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/quantize_op.cc
}
else
{
min_range = input_min_range[0];
max_range = input_max_range[0];
auto zero = make_constant(type, shape, 0);
min_range = std::make_shared<op::Minimum>(zero, input_min_range);
auto max_abs_input_range = max_abs(input_min_range, input_max_range);
auto one = make_constant(type, shape, 1);
auto hundred = make_constant(type, shape, 100);
auto epsilon =
std::make_shared<op::Maximum>(one, max_abs_input_range) / hundred;
max_range = std::make_shared<op::Maximum>(input_max_range, min_range + epsilon);
max_range = std::make_shared<op::Maximum>(zero, max_range);
}
const T max_abs = std::max(std::abs(min_range), std::abs(max_range));
const T bitwidth = type.bitwidth();
const T target_range = static_cast<T>(
(type.is_signed() ? std::pow(2, (bitwidth - 1)) : std::pow(2, bitwidth)) - 1);
const T scale_factor = max_abs / target_range;
return scale_factor;
size_t bw = quant_type.bitwidth();
float range = static_cast<float>(
(quant_type.is_signed() ? std::pow(2, (bw - 1)) : std::pow(2, bw)) - 1);
auto max_abs_range = max_abs(min_range, max_range);
auto target_range = make_constant(type, shape, range);
return max_abs_range / target_range;
}
}
}
......
......@@ -45,11 +45,6 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc
auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape();
auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
auto scale_val = scale_const_op->get_vector<float>();
this->m_scale = scale_val[0];
set_output_type(0,
element::i8,
util::infer_convolution_output_shape(this,
......
......@@ -41,7 +41,6 @@ namespace ngraph
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
float get_scale() const { return m_scale; }
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
......@@ -51,7 +50,6 @@ namespace ngraph
CoordinateDiff m_padding_below;
CoordinateDiff m_padding_above;
Strides m_data_dilation_strides;
float m_scale;
};
}
}
......@@ -49,10 +49,6 @@ op::QuantizedConvolutionBias::QuantizedConvolutionBias(const shared_ptr<Node>& d
auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape();
auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
auto scale_val = scale_const_op->get_vector<float>();
this->m_scale = scale_val[0];
// TODO: call ngraph util
// util::validate_convbias_shapes(data_batch_shape, filters_shape, bias->get_shape());
......
......@@ -47,7 +47,6 @@ namespace ngraph
const CoordinateDiff& get_padding_below() const { return m_padding_below; }
const CoordinateDiff& get_padding_above() const { return m_padding_above; }
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
float get_scale() const { return m_scale; }
std::shared_ptr<Node> get_bias() { return get_argument(2); }
std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
......@@ -62,7 +61,6 @@ namespace ngraph
CoordinateDiff m_padding_above;
Strides m_data_dilation_strides;
bool m_with_relu;
float m_scale;
};
}
}
......@@ -44,10 +44,6 @@ op::QuantizedConvolutionRelu::QuantizedConvolutionRelu(const std::shared_ptr<Nod
auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape();
auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
float scale_val = *(static_cast<float const*>(scale_const_op->get_data_ptr()));
this->m_scale = scale_val;
set_output_type(0,
element::u8,
util::infer_convolution_output_shape(this,
......
......@@ -43,7 +43,6 @@ namespace ngraph
const CoordinateDiff& get_padding_below() const { return m_padding_below; }
const CoordinateDiff& get_padding_above() const { return m_padding_above; }
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
float get_scale() const { return m_scale; }
std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
virtual std::shared_ptr<Node>
......@@ -55,7 +54,6 @@ namespace ngraph
CoordinateDiff m_padding_below;
CoordinateDiff m_padding_above;
Strides m_data_dilation_strides;
float m_scale;
};
}
}
......@@ -30,6 +30,7 @@
#include "ngraph/op/negative.hpp"
#include "ngraph/op/pad.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/relu.hpp"
#include "ngraph/op/reshape.hpp"
#include "ngraph/op/subtract.hpp"
#include "ngraph/pattern/matcher.hpp"
......@@ -45,6 +46,7 @@
#include "ngraph/runtime/reference/negate.hpp"
#include "ngraph/runtime/reference/pad.hpp"
#include "ngraph/runtime/reference/quantize.hpp"
#include "ngraph/runtime/reference/relu.hpp"
#include "ngraph/runtime/reference/reshape.hpp"
#include "ngraph/runtime/reference/subtract.hpp"
......@@ -378,7 +380,8 @@ void ngraph::pass::ConstantFolding::construct_constant_binary()
bool is_supported_unary_op(std::shared_ptr<Node> n)
{
return std::dynamic_pointer_cast<op::Abs>(n) || std::dynamic_pointer_cast<op::Negative>(n);
return std::dynamic_pointer_cast<op::Abs>(n) || std::dynamic_pointer_cast<op::Negative>(n) ||
std::dynamic_pointer_cast<op::Relu>(n);
}
template <class T>
......@@ -398,6 +401,11 @@ shared_ptr<op::Constant> make_constant_unary(shared_ptr<op::Constant> constant,
runtime::reference::negate<T>(
constant->get_vector<T>().data(), out_vec.data(), shape_size(out_shape));
}
else if (std::dynamic_pointer_cast<op::Relu>(unary))
{
runtime::reference::relu<T>(
constant->get_vector<T>().data(), out_vec.data(), shape_size(out_shape));
}
else
{
NGRAPH_ASSERT(false) << "must be consistent with is_supported_unary_op";
......
......@@ -118,6 +118,7 @@
#include "ngraph/op/topk.hpp"
#include "ngraph/pass/algebraic_simplification.hpp"
#include "ngraph/pass/common_function_collection.hpp"
#include "ngraph/pass/constant_folding.hpp"
#include "ngraph/pass/core_fusion.hpp"
#include "ngraph/pass/cse.hpp"
#include "ngraph/pass/dump_sorted.hpp"
......@@ -1049,6 +1050,7 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes(ngraph::pass::Ma
NodeVector nv_cwi; // We dont need CPUWorkspaceInsertion to return list of indices
pass_manager.register_pass<runtime::cpu::pass::CPUWorkspaceInsertion>(nv_cwi, false);
pass_manager.register_pass<runtime::cpu::pass::CPUAssignment>(this);
pass_manager.register_pass<ngraph::pass::ConstantFolding>();
pass_manager.register_pass<runtime::cpu::pass::CPULayout>(this);
pass_manager.register_pass<ngraph::pass::CommonSubexpressionElimination>(
runtime::cpu::get_cse_handlers_map());
......
......@@ -25,6 +25,7 @@
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
......@@ -226,6 +227,16 @@ namespace ngraph
}
else if (std::is_same<OP, ngraph::op::QuantizedConvolution>())
{
auto qc = dynamic_cast<const ngraph::op::QuantizedConvolution*>(node);
auto scale_const_op =
std::dynamic_pointer_cast<ngraph::op::Constant>(qc->get_arguments()[2]);
if (scale_const_op == nullptr)
{
throw ngraph_error("QuantizedConvolution scale must be a Constant");
}
auto scale_val = scale_const_op->get_vector<float>();
return build_quantized_convolution(
data_desc,
weights_desc,
......@@ -234,12 +245,21 @@ namespace ngraph
window_dilation_strides_adjusted,
convolution->get_padding_below(),
convolution->get_padding_above(),
(dynamic_cast<const ngraph::op::QuantizedConvolution*>(node))
->get_scale(),
scale_val[0],
ops);
}
else if (std::is_same<OP, ngraph::op::QuantizedConvolutionRelu>())
{
auto qcr = dynamic_cast<const ngraph::op::QuantizedConvolutionRelu*>(node);
auto scale_const_op = std::dynamic_pointer_cast<ngraph::op::Constant>(
qcr->get_arguments()[2]);
if (scale_const_op == nullptr)
{
throw ngraph_error("QuantizedConvolutionRelu scale must be a Constant");
}
auto scale_val = scale_const_op->get_vector<float>();
return build_quantized_convolution(
data_desc,
weights_desc,
......@@ -248,12 +268,21 @@ namespace ngraph
window_dilation_strides_adjusted,
convolution->get_padding_below(),
convolution->get_padding_above(),
(dynamic_cast<const ngraph::op::QuantizedConvolutionRelu*>(node))
->get_scale(),
scale_val[0],
ops);
}
else if (std::is_same<OP, ngraph::op::QuantizedConvolutionBias>())
{
auto qcb = dynamic_cast<const ngraph::op::QuantizedConvolutionBias*>(node);
auto scale_const_op = std::dynamic_pointer_cast<ngraph::op::Constant>(
qcb->get_arguments()[3]);
if (scale_const_op == nullptr)
{
throw ngraph_error("QuantizedConvolutionBias scale must be a Constant");
}
auto scale_val = scale_const_op->get_vector<float>();
// conv+bias = cvt_to_int8(scale*(dst + bias))
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
return build_quantized_convolution(
......@@ -265,8 +294,7 @@ namespace ngraph
window_dilation_strides_adjusted,
convolution->get_padding_below(),
convolution->get_padding_above(),
(dynamic_cast<const ngraph::op::QuantizedConvolutionBias*>(node))
->get_scale(),
scale_val[0],
ops);
}
else
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment