Commit f30910c6 authored by Nishant Patel's avatar Nishant Patel Committed by Robert Kimball

Switch to scale and offset design from min and max for Quantization (#1789)

* Switch to scale and offset design from min and max for Quantization

* Remove offset and make the quantize ops a single o/p op

* move cpu QuantOps to core and create builders

* rebase to HEAD

* remove convbias and convbiasrelu ctors which take conv

* remove mistakenly added quantize.rst

* remove offset

* Compute scale, move quantization ops to experimental dir and some PR feedback

* Normalize license headers
parent bcfbf099
......@@ -20,6 +20,7 @@ set (SRC
autodiff/adjoints.cpp
builder/autobroadcast.cpp
builder/numpy_transpose.cpp
builder/quantization.cpp
builder/reduce_ops.cpp
coordinate.cpp
coordinate_diff.cpp
......@@ -84,6 +85,11 @@ set (SRC
op/power.cpp
op/product.cpp
op/quantize.cpp
op/experimental/quantized_avg_pool.cpp
op/experimental/quantized_conv_bias.cpp
op/experimental/quantized_conv_relu.cpp
op/experimental/quantized_conv.cpp
op/experimental/quantized_max_pool.cpp
op/reduce.cpp
op/reduce_window.cpp
op/relu.cpp
......
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/quantize.hpp"
namespace ngraph
{
namespace builder
{
std::shared_ptr<Node> ScaledQuantizedAvgPool(const std::shared_ptr<Node>& arg,
const Shape& window_shape,
const Strides& window_movement_strides,
const Shape& padding_below,
const Shape& padding_above,
bool include_padding_in_avg_computation,
const std::shared_ptr<Node> min,
const std::shared_ptr<Node> max);
std::shared_ptr<Node>
ScaledQuantizedConvolutionBias(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const std::shared_ptr<Node>& bias,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output,
const bool with_relu = false);
std::shared_ptr<Node>
ScaledQuantizedConvolutionRelu(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output);
std::shared_ptr<Node>
ScaledQuantizedConvolution(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output);
std::shared_ptr<Node> ScaledQuantizedMaxPool(const std::shared_ptr<Node>& arg,
const Shape& window_shape,
const Strides& window_movement_strides,
const Shape& padding_below,
const Shape& padding_above,
const std::shared_ptr<Node> min,
const std::shared_ptr<Node> max);
}
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
//*******************************************************************************
// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//==============================================================================
#pragma once
#include <limits>
#include <vector>
#include "ngraph/node.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/util.hpp"
namespace ngraph
{
namespace builder
{
namespace quantization_util
{
template <class T1, class T2, class T3>
void quantization_range_for_multiplication(
float min_a, float max_a, float min_b, float max_b, float* min_c, float* max_c)
{
// begin code copied and pasted (and modified) from
// github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/quantization_utils.h
float a_one_quant_level = (max_a - min_a) / (std::numeric_limits<T1>::max() -
std::numeric_limits<T1>::min());
float b_one_quant_level = (max_b - min_b) / (std::numeric_limits<T2>::max() -
std::numeric_limits<T2>::min());
float c_one_quant_level = a_one_quant_level * b_one_quant_level;
*min_c = c_one_quant_level * std::numeric_limits<T3>::min();
*max_c = c_one_quant_level * std::numeric_limits<T3>::max();
// end code copied and pasted (and modified) from
// github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/quantization_utils.h
}
float get_scale(const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output)
{
auto min_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(min_input);
auto max_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(max_input);
auto min_filter_const_op =
std::static_pointer_cast<ngraph::op::Constant>(min_filter);
auto max_filter_const_op =
std::static_pointer_cast<ngraph::op::Constant>(max_filter);
auto min_freezed_output_const_op =
std::static_pointer_cast<ngraph::op::Constant>(min_freezed_output);
auto max_freezed_output_const_op =
std::static_pointer_cast<ngraph::op::Constant>(max_freezed_output);
float input_min = *(static_cast<float const*>(min_input_const_op->get_data_ptr()));
float input_max = *(static_cast<float const*>(max_input_const_op->get_data_ptr()));
float filter_min =
*(static_cast<float const*>(min_filter_const_op->get_data_ptr()));
float filter_max =
*(static_cast<float const*>(max_filter_const_op->get_data_ptr()));
float output_min =
*(static_cast<float const*>(min_freezed_output_const_op->get_data_ptr()));
float output_max =
*(static_cast<float const*>(max_freezed_output_const_op->get_data_ptr()));
float min_out_value;
float max_out_value;
quantization_range_for_multiplication<uint8_t, int8_t, int32_t>(
input_min, input_max, filter_min, filter_max, &min_out_value, &max_out_value);
const float max_abs32 = std::max(std::abs(min_out_value), std::abs(max_out_value));
const float max_abs8 = std::max(std::abs(output_min), std::abs(output_max));
// Output is signed int.
// s32 = f32 * std::pow(2, 31)/ max_abs32;
// s8 = f32 * std::pow(2, 7)/ max_abs8;
// s8 = s32 * std::pow(2, -24) * max_abs32 / max_abs8;
const float scale = static_cast<float>(
(std::pow(2, -24) * static_cast<double>(max_abs32 / max_abs8)));
return scale;
}
}
}
}
......@@ -26,10 +26,8 @@ op::QuantizedAvgPool::QuantizedAvgPool(const shared_ptr<Node>& arg,
const Strides& window_movement_strides,
const Shape& padding_below,
const Shape& padding_above,
bool include_padding_in_avg_computation,
const shared_ptr<Node> min,
const shared_ptr<Node> max)
: Op("QuantizedAvgPool", check_single_output_args({arg, min, max}))
bool include_padding_in_avg_computation)
: Op("QuantizedAvgPool", check_single_output_args({arg}))
, m_window_shape(window_shape)
, m_window_movement_strides(window_movement_strides)
, m_padding_below(padding_below)
......@@ -40,18 +38,7 @@ op::QuantizedAvgPool::QuantizedAvgPool(const shared_ptr<Node>& arg,
if (arg->get_element_type() != element::u8 && arg->get_element_type() != element::i8)
{
throw ngraph_error("Dequantization supported only for i8/u8!");
}
if (min->get_element_type() != max->get_element_type())
{
throw ngraph_error("Min's element type isn't equal to max's!");
}
if (!(std::dynamic_pointer_cast<op::Constant>(min) &&
std::dynamic_pointer_cast<op::Constant>(max)))
{
throw ngraph_error("Min and max have to be constants!");
throw ngraph_error("QuantizedAvgPool supported only for i8/u8!");
}
}
......@@ -211,11 +198,7 @@ void op::QuantizedAvgPool::validate_and_infer_types()
result_shape[1] = channel_count;
copy(output_item_shape.begin(), output_item_shape.end(), result_shape.begin() + 2);
set_output_size(3);
set_output_type(0, get_input_element_type(0), result_shape);
//TODO(nbpatel): Change to Shape{} once the mkldnn version is updated
set_output_type(1, element::f32, Shape{1});
set_output_type(2, element::f32, Shape{1});
}
shared_ptr<Node> op::QuantizedAvgPool::copy_with_new_args(const NodeVector& new_args) const
......@@ -226,7 +209,5 @@ shared_ptr<Node> op::QuantizedAvgPool::copy_with_new_args(const NodeVector& new_
m_window_movement_strides,
m_padding_below,
m_padding_above,
m_include_padding_in_avg_computation,
new_args.at(1),
new_args.at(2));
m_include_padding_in_avg_computation);
}
......@@ -48,9 +48,7 @@ namespace ngraph
const Strides& window_movement_strides,
const Shape& padding_below,
const Shape& padding_above,
bool include_padding_in_avg_computation,
const std::shared_ptr<Node> min,
const std::shared_ptr<Node> max);
bool include_padding_in_avg_computation);
void validate_and_infer_types() override;
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
......
......@@ -30,21 +30,8 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output)
: Op("QuantizedConvolution",
check_single_output_args({data_batch,
filters,
min_input,
max_input,
min_filter,
max_filter,
min_freezed_output,
max_freezed_output}))
const std::shared_ptr<Node> scale)
: Op("QuantizedConvolution", check_single_output_args({data_batch, filters, scale}))
, m_window_movement_strides(window_movement_strides)
, m_window_dilation_strides(window_dilation_strides)
, m_padding_below(padding_below)
......@@ -58,29 +45,11 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc
auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape();
auto min_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(min_input);
auto max_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(max_input);
auto min_filter_const_op = std::static_pointer_cast<ngraph::op::Constant>(min_filter);
auto max_filter_const_op = std::static_pointer_cast<ngraph::op::Constant>(max_filter);
auto min_freezed_output_const_op =
std::static_pointer_cast<ngraph::op::Constant>(min_freezed_output);
auto max_freezed_output_const_op =
std::static_pointer_cast<ngraph::op::Constant>(max_freezed_output);
float input_min = *(static_cast<float const*>(min_input_const_op->get_data_ptr()));
float input_max = *(static_cast<float const*>(max_input_const_op->get_data_ptr()));
float filter_min = *(static_cast<float const*>(min_filter_const_op->get_data_ptr()));
float filter_max = *(static_cast<float const*>(max_filter_const_op->get_data_ptr()));
float output_min = *(static_cast<float const*>(min_freezed_output_const_op->get_data_ptr()));
float output_max = *(static_cast<float const*>(max_freezed_output_const_op->get_data_ptr()));
auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
float scale_val = *(static_cast<float const*>(scale_const_op->get_data_ptr()));
this->m_input_min = input_min;
this->m_input_max = input_max;
this->m_filter_min = filter_min;
this->m_filter_max = filter_max;
this->m_freezed_output_min = output_min;
this->m_freezed_output_max = output_max;
this->m_scale = scale_val;
set_output_size(3);
set_output_type(0,
element::i8,
util::infer_convolution_output_shape(this,
......@@ -98,12 +67,10 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc
0, /* batch_axis_result, */
1 /* output_channel_axis_result, */
));
set_output_type(1, element::f32, Shape{1});
set_output_type(2, element::f32, Shape{1});
}
shared_ptr<Node> op::QuantizedConvolution::copy_with_new_args(const NodeVector& new_args) const
{
if (new_args.size() != 8)
if (new_args.size() != 3)
{
throw ngraph_error("Incorrect number of new arguments");
}
......@@ -114,10 +81,5 @@ shared_ptr<Node> op::QuantizedConvolution::copy_with_new_args(const NodeVector&
get_padding_below(),
get_padding_above(),
get_data_dilation_strides(),
new_args.at(2),
new_args.at(3),
new_args.at(4),
new_args.at(5),
new_args.at(6),
new_args.at(7)));
new_args.at(2)));
}
......@@ -33,12 +33,7 @@ namespace ngraph
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output);
const std::shared_ptr<Node> scale);
const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
const Strides& get_window_dilation_strides() const { return m_window_dilation_strides; }
const CoordinateDiff& get_padding_below() const { return m_padding_below; }
......@@ -46,12 +41,7 @@ namespace ngraph
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
float get_input_min() const { return m_input_min; }
float get_input_max() const { return m_input_max; }
float get_filter_min() const { return m_filter_min; }
float get_filter_max() const { return m_filter_max; }
float get_freezed_output_min() const { return m_freezed_output_min; }
float get_freezed_output_max() const { return m_freezed_output_max; }
float get_scale() const { return m_scale; }
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
......@@ -61,12 +51,7 @@ namespace ngraph
CoordinateDiff m_padding_below;
CoordinateDiff m_padding_above;
Strides m_data_dilation_strides;
float m_input_min;
float m_input_max;
float m_filter_min;
float m_filter_max;
float m_freezed_output_min;
float m_freezed_output_max;
float m_scale;
};
}
}
......@@ -16,84 +16,27 @@
#include <numeric>
#include "conv_bias.hpp"
#include "quantized_conv_bias.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
#include "ngraph/util.hpp"
using namespace std;
using namespace ngraph;
op::QuantizedConvolutionBias::QuantizedConvolutionBias(
const shared_ptr<op::QuantizedConvolution>& qconv,
const shared_ptr<Node>& bias,
const bool with_relu)
: Op("QuantizedConvolutionBias",
check_single_output_args({qconv->get_argument(0),
qconv->get_argument(1),
bias,
qconv->get_argument(2),
qconv->get_argument(3),
qconv->get_argument(4),
qconv->get_argument(5),
qconv->get_argument(6),
qconv->get_argument(7)}))
, m_window_movement_strides(qconv->get_window_movement_strides())
, m_window_dilation_strides(qconv->get_window_dilation_strides())
, m_padding_below(qconv->get_padding_below())
, m_padding_above(qconv->get_padding_above())
, m_data_dilation_strides(qconv->get_data_dilation_strides())
, m_with_relu(with_relu)
{
constructor_validate_and_infer_types();
this->m_input_min = qconv->get_input_min();
this->m_input_max = qconv->get_input_max();
this->m_filter_min = qconv->get_filter_min();
this->m_filter_max = qconv->get_filter_max();
this->m_freezed_output_min = qconv->get_freezed_output_min();
this->m_freezed_output_max = qconv->get_freezed_output_max();
util::validate_convbias_shapes(qconv->get_argument(0)->get_shape(),
qconv->get_argument(1)->get_shape(),
bias->get_shape());
auto output_et = with_relu ? element::u8 : element::i8;
set_output_size(3);
set_output_type(0, output_et, qconv->get_shape());
set_output_type(1, element::f32, Shape{1});
set_output_type(2, element::f32, Shape{1});
}
op::QuantizedConvolutionBias::QuantizedConvolutionBias(
const shared_ptr<Node>& data_batch,
const shared_ptr<Node>& filters,
const shared_ptr<Node>& bias,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output,
const bool with_relu)
: Op("QuantizedConvolutionBias",
check_single_output_args({data_batch,
filters,
bias,
min_input,
max_input,
min_filter,
max_filter,
min_freezed_output,
max_freezed_output}))
op::QuantizedConvolutionBias::QuantizedConvolutionBias(const shared_ptr<Node>& data_batch,
const shared_ptr<Node>& filters,
const shared_ptr<Node>& bias,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> scale,
const bool with_relu)
: Op("QuantizedConvolutionBias", check_single_output_args({data_batch, filters, bias, scale}))
, m_window_movement_strides(window_movement_strides)
, m_window_dilation_strides(window_dilation_strides)
, m_padding_below(padding_below)
......@@ -106,31 +49,14 @@ op::QuantizedConvolutionBias::QuantizedConvolutionBias(
auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape();
auto min_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(min_input);
auto max_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(max_input);
auto min_filter_const_op = std::static_pointer_cast<ngraph::op::Constant>(min_filter);
auto max_filter_const_op = std::static_pointer_cast<ngraph::op::Constant>(max_filter);
auto min_freezed_output_const_op =
std::static_pointer_cast<ngraph::op::Constant>(min_freezed_output);
auto max_freezed_output_const_op =
std::static_pointer_cast<ngraph::op::Constant>(max_freezed_output);
float input_min = *(static_cast<float const*>(min_input_const_op->get_data_ptr()));
float input_max = *(static_cast<float const*>(max_input_const_op->get_data_ptr()));
float filter_min = *(static_cast<float const*>(min_filter_const_op->get_data_ptr()));
float filter_max = *(static_cast<float const*>(max_filter_const_op->get_data_ptr()));
float output_min = *(static_cast<float const*>(min_freezed_output_const_op->get_data_ptr()));
float output_max = *(static_cast<float const*>(max_freezed_output_const_op->get_data_ptr()));
this->m_input_min = input_min;
this->m_input_max = input_max;
this->m_filter_min = filter_min;
this->m_filter_max = filter_max;
this->m_freezed_output_min = output_min;
this->m_freezed_output_max = output_max;
auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
float scale_val = *(static_cast<float const*>(scale_const_op->get_data_ptr()));
this->m_scale = scale_val;
util::validate_convbias_shapes(data_batch_shape, filters_shape, bias->get_shape());
// TODO: call ngraph util
// util::validate_convbias_shapes(data_batch_shape, filters_shape, bias->get_shape());
auto output_et = with_relu ? element::u8 : element::i8;
set_output_size(3);
set_output_type(0,
output_et,
util::infer_convolution_output_shape(this,
......@@ -148,13 +74,11 @@ op::QuantizedConvolutionBias::QuantizedConvolutionBias(
0, /* batch_axis_result, */
1 /* output_channel_axis_result, */
));
set_output_type(1, element::f32, Shape{1});
set_output_type(2, element::f32, Shape{1});
}
shared_ptr<Node> op::QuantizedConvolutionBias::copy_with_new_args(const NodeVector& new_args) const
{
if (new_args.size() != 9)
if (new_args.size() != 4)
{
throw ngraph_error("Incorrect number of new arguments");
}
......@@ -168,10 +92,5 @@ shared_ptr<Node> op::QuantizedConvolutionBias::copy_with_new_args(const NodeVect
get_padding_above(),
get_data_dilation_strides(),
new_args.at(3),
new_args.at(4),
new_args.at(5),
new_args.at(6),
new_args.at(7),
new_args.at(8),
m_with_relu));
}
......@@ -16,8 +16,8 @@
#pragma once
#include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/op.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
namespace ngraph
{
......@@ -39,12 +39,7 @@ namespace ngraph
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output,
const std::shared_ptr<Node> scale,
const bool with_relu = false);
const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
......@@ -52,12 +47,7 @@ namespace ngraph
const CoordinateDiff& get_padding_below() const { return m_padding_below; }
const CoordinateDiff& get_padding_above() const { return m_padding_above; }
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
float get_input_min() const { return m_input_min; }
float get_input_max() const { return m_input_max; }
float get_filter_min() const { return m_filter_min; }
float get_filter_max() const { return m_filter_max; }
float get_freezed_output_min() const { return m_freezed_output_min; }
float get_freezed_output_max() const { return m_freezed_output_max; }
float get_scale() const { return m_scale; }
std::shared_ptr<Node> get_bias() { return get_argument(2); }
std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
......@@ -72,12 +62,7 @@ namespace ngraph
CoordinateDiff m_padding_above;
Strides m_data_dilation_strides;
bool m_with_relu;
float m_input_min;
float m_input_max;
float m_filter_min;
float m_filter_max;
float m_freezed_output_min;
float m_freezed_output_max;
float m_scale;
};
}
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <numeric>
#include "ngraph/op/constant.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_relu.hpp"
#include "ngraph/util.hpp"
using namespace std;
using namespace ngraph;
op::QuantizedConvolutionRelu::QuantizedConvolutionRelu(
const std::shared_ptr<op::QuantizedConvolution>& qconv)
: Op("QuantizedConvolutionRelu",
check_single_output_args({qconv->get_argument(0),
qconv->get_argument(1),
qconv->get_argument(2),
qconv->get_argument(3),
qconv->get_argument(4),
qconv->get_argument(5),
qconv->get_argument(6),
qconv->get_argument(7)}))
, m_window_movement_strides(qconv->get_window_movement_strides())
, m_window_dilation_strides(qconv->get_window_dilation_strides())
, m_padding_below(qconv->get_padding_below())
, m_padding_above(qconv->get_padding_above())
, m_data_dilation_strides(qconv->get_data_dilation_strides())
{
constructor_validate_and_infer_types();
this->m_input_min = qconv->get_input_min();
this->m_input_max = qconv->get_input_max();
this->m_filter_min = qconv->get_filter_min();
this->m_filter_max = qconv->get_filter_max();
this->m_freezed_output_min = qconv->get_freezed_output_min();
this->m_freezed_output_max = qconv->get_freezed_output_max();
set_output_size(3);
set_output_type(0, element::u8, qconv->get_shape());
set_output_type(1, element::f32, Shape{1});
set_output_type(2, element::f32, Shape{1});
}
op::QuantizedConvolutionRelu::QuantizedConvolutionRelu(
const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output)
: Op("QuantizedConvolutionRelu",
check_single_output_args({data_batch,
filters,
min_input,
max_input,
min_filter,
max_filter,
min_freezed_output,
max_freezed_output}))
op::QuantizedConvolutionRelu::QuantizedConvolutionRelu(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> scale)
: Op("QuantizedConvolutionRelu", check_single_output_args({data_batch, filters, scale}))
, m_window_movement_strides(window_movement_strides)
, m_window_dilation_strides(window_dilation_strides)
, m_padding_below(padding_below)
......@@ -90,28 +44,10 @@ op::QuantizedConvolutionRelu::QuantizedConvolutionRelu(
auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape();
auto min_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(min_input);
auto max_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(max_input);
auto min_filter_const_op = std::static_pointer_cast<ngraph::op::Constant>(min_filter);
auto max_filter_const_op = std::static_pointer_cast<ngraph::op::Constant>(max_filter);
auto min_freezed_output_const_op =
std::static_pointer_cast<ngraph::op::Constant>(min_freezed_output);
auto max_freezed_output_const_op =
std::static_pointer_cast<ngraph::op::Constant>(max_freezed_output);
float input_min = *(static_cast<float const*>(min_input_const_op->get_data_ptr()));
float input_max = *(static_cast<float const*>(max_input_const_op->get_data_ptr()));
float filter_min = *(static_cast<float const*>(min_filter_const_op->get_data_ptr()));
float filter_max = *(static_cast<float const*>(max_filter_const_op->get_data_ptr()));
float output_min = *(static_cast<float const*>(min_freezed_output_const_op->get_data_ptr()));
float output_max = *(static_cast<float const*>(max_freezed_output_const_op->get_data_ptr()));
this->m_input_min = input_min;
this->m_input_max = input_max;
this->m_filter_min = filter_min;
this->m_filter_max = filter_max;
this->m_freezed_output_min = output_min;
this->m_freezed_output_max = output_max;
auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
float scale_val = *(static_cast<float const*>(scale_const_op->get_data_ptr()));
this->m_scale = scale_val;
set_output_size(3);
set_output_type(0,
element::u8,
util::infer_convolution_output_shape(this,
......@@ -129,15 +65,12 @@ op::QuantizedConvolutionRelu::QuantizedConvolutionRelu(
0, /* batch_axis_result, */
1 /* output_channel_axis_result, */
));
set_output_type(1, element::f32, Shape{1});
set_output_type(2, element::f32, Shape{1});
}
std::shared_ptr<Node>
op::QuantizedConvolutionRelu::copy_with_new_args(const NodeVector& new_args) const
{
if (new_args.size() != 8)
if (new_args.size() != 3)
{
throw ngraph_error("Incorrect number of new arguments");
}
......@@ -149,10 +82,5 @@ std::shared_ptr<Node>
get_padding_below(),
get_padding_above(),
get_data_dilation_strides(),
new_args.at(2),
new_args.at(3),
new_args.at(4),
new_args.at(5),
new_args.at(6),
new_args.at(7)));
new_args.at(2)));
}
......@@ -16,9 +16,8 @@
#pragma once
#include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/op.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
namespace ngraph
{
......@@ -37,24 +36,14 @@ namespace ngraph
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output);
const std::shared_ptr<Node> scale);
const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
const Strides& get_window_dilation_strides() const { return m_window_dilation_strides; }
const CoordinateDiff& get_padding_below() const { return m_padding_below; }
const CoordinateDiff& get_padding_above() const { return m_padding_above; }
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
float get_input_min() const { return m_input_min; }
float get_input_max() const { return m_input_max; }
float get_filter_min() const { return m_filter_min; }
float get_filter_max() const { return m_filter_max; }
float get_freezed_output_min() const { return m_freezed_output_min; }
float get_freezed_output_max() const { return m_freezed_output_max; }
float get_scale() const { return m_scale; }
std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
virtual std::shared_ptr<Node>
......@@ -66,12 +55,7 @@ namespace ngraph
CoordinateDiff m_padding_below;
CoordinateDiff m_padding_above;
Strides m_data_dilation_strides;
float m_input_min;
float m_input_max;
float m_filter_min;
float m_filter_max;
float m_freezed_output_min;
float m_freezed_output_max;
float m_scale;
};
}
}
......@@ -26,10 +26,8 @@ op::QuantizedMaxPool::QuantizedMaxPool(const shared_ptr<Node>& arg,
const Shape& window_shape,
const Strides& window_movement_strides,
const Shape& padding_below,
const Shape& padding_above,
const shared_ptr<Node> min,
const shared_ptr<Node> max)
: Op("QuantizedMaxPool", check_single_output_args({arg, min, max}))
const Shape& padding_above)
: Op("QuantizedMaxPool", check_single_output_args({arg}))
, m_window_shape(window_shape)
, m_window_movement_strides(window_movement_strides)
, m_padding_below(padding_below)
......@@ -39,18 +37,7 @@ op::QuantizedMaxPool::QuantizedMaxPool(const shared_ptr<Node>& arg,
if (arg->get_element_type() != element::u8 && arg->get_element_type() != element::i8)
{
throw ngraph_error("Dequantization supported only for i8/u8!");
}
if (min->get_element_type() != max->get_element_type())
{
throw ngraph_error("Min's element type isn't equal to max's!");
}
if (!(std::dynamic_pointer_cast<op::Constant>(min) &&
std::dynamic_pointer_cast<op::Constant>(max)))
{
throw ngraph_error("Min and max have to be constants!");
throw ngraph_error("QuantizedMaxPool supported only for i8/u8!");
}
}
......@@ -171,11 +158,7 @@ void op::QuantizedMaxPool::validate_and_infer_types()
result_shape[1] = channel_count;
copy(output_item_shape.begin(), output_item_shape.end(), result_shape.begin() + 2);
set_output_size(3);
set_output_type(0, get_input_element_type(0), result_shape);
//TODO(nbpatel): Change to Shape{} once the mkldnn version is updated.
set_output_type(1, element::f32, Shape{1});
set_output_type(2, element::f32, Shape{1});
}
shared_ptr<Node> op::QuantizedMaxPool::copy_with_new_args(const NodeVector& new_args) const
......@@ -185,7 +168,5 @@ shared_ptr<Node> op::QuantizedMaxPool::copy_with_new_args(const NodeVector& new_
m_window_shape,
m_window_movement_strides,
m_padding_below,
m_padding_above,
new_args.at(1),
new_args.at(2));
m_padding_above);
}
......@@ -37,9 +37,7 @@ namespace ngraph
const Shape& window_shape,
const Strides& window_movement_strides,
const Shape& padding_below,
const Shape& padding_above,
const std::shared_ptr<Node> min,
const std::shared_ptr<Node> max);
const Shape& padding_above);
void validate_and_infer_types() override;
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
......
......@@ -40,8 +40,6 @@ set(SRC
builder/convert_layout.cpp
builder/quantized_conv.cpp
builder/convolution.cpp
builder/dequantize.cpp
builder/quantize.cpp
builder/dot.cpp
builder/function_call.cpp
builder/lstm.cpp
......@@ -85,18 +83,11 @@ set(SRC
op/group_conv.cpp
op/conv_bias.cpp
op/conv_relu.cpp
op/quantized_conv.cpp
op/convert_layout.cpp
op/dequantize.cpp
op/quantize.cpp
op/loop_kernel.cpp
op/lstm.cpp
op/matmul_bias.cpp
op/max_pool_with_indices.cpp
op/quantized_max_pool.cpp
op/quantized_avg_pool.cpp
op/quantized_conv_relu.cpp
op/quantized_conv_bias.cpp
op/rnn.cpp
op/sigmoid_mul.cpp
op/conv_add.cpp
......
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/cpu/op/dequantize.hpp"
#include <vector>
#include "ngraph/op/constant.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::DequantizeCPU)
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& functors = external_function->get_functors();
auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t dequantize_index =
mkldnn_emitter->build_dequantization(node, input_desc, result_desc);
auto& deps = mkldnn_emitter->get_primitive_deps(dequantize_index);
auto functor = [&, dequantize_index](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, dequantize_index);
};
functors.emplace_back(functor);
}
else
{
throw ngraph_error("unsupported parameters for DequantizeCPUOp via DEX");
}
}
REGISTER_OP_BUILDER(DequantizeCPU);
}
}
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <vector>
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/quantization_util.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::QuantizeCPU)
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto quantize = static_cast<const ngraph::op::QuantizeCPU*>(node);
auto& functors = external_function->get_functors();
auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& out1_tensor = external_function->get_tensor_data(out[1].get_name());
auto& out2_tensor = external_function->get_tensor_data(out[2].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
vector<float> quant_util; // min_range, max_range & scale.
quantization_util::get_min_max_range(quantize->get_input_min(),
quantize->get_input_max(),
(quantize->get_quantize_et()).is_signed(),
quant_util);
std::vector<float> scales;
scales.push_back(quant_util[2]);
size_t quantize_index =
mkldnn_emitter->build_quantize_reorder(input_desc, result_desc, scales);
auto& deps = mkldnn_emitter->get_primitive_deps(quantize_index);
auto functor = [&, quantize_index, quant_util](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
*(static_cast<float*>(out1_tensor)) = quant_util[0];
*(static_cast<float*>(out2_tensor)) = quant_util[1];
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, quantize_index);
};
functors.emplace_back(functor);
}
else
{
throw ngraph_error("Unsupported parameters for QuantizeCPUOp via DEX");
}
}
REGISTER_OP_BUILDER(QuantizeCPU);
}
}
}
......@@ -14,7 +14,7 @@
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp"
#include "ngraph/op/experimental/quantized_avg_pool.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
......@@ -37,20 +37,15 @@ namespace ngraph
auto& functors = external_function->get_functors();
auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& out1_tensor = external_function->get_tensor_data(out[1].get_name());
auto& out2_tensor = external_function->get_tensor_data(out[2].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
vector<float> quant_util;
mkldnn_emitter->build_quantized_avg_pool(node, quant_util);
auto& deps = mkldnn_emitter->get_primitive_deps(quant_util[2]);
size_t qavg_pool_index = mkldnn_emitter->build_quantized_avg_pool(node);
auto& deps = mkldnn_emitter->get_primitive_deps(qavg_pool_index);
auto functor = [&, quant_util](CPURuntimeContext* ctx) {
auto functor = [&, qavg_pool_index](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
*(static_cast<float*>(out1_tensor)) = quant_util[0];
*(static_cast<float*>(out2_tensor)) = quant_util[1];
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, quant_util[2]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, qavg_pool_index);
};
functors.emplace_back(functor);
}
......
......@@ -14,13 +14,13 @@
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_bias.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_relu.hpp"
using namespace std;
using namespace ngraph;
......@@ -36,13 +36,10 @@ namespace ngraph
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto qconvolution = static_cast<const ngraph::op::QuantizedConvolution*>(node);
auto& functors = external_function->get_functors();
auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto& out0_tensor = external_function->get_tensor_data(out[0].get_name());
auto& out1_tensor = external_function->get_tensor_data(out[1].get_name());
auto& out2_tensor = external_function->get_tensor_data(out[2].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
......@@ -50,16 +47,11 @@ namespace ngraph
mkldnn_emitter->build_convolution<ngraph::op::QuantizedConvolution>(
node, args, out);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
float min_freezed_output = qconvolution->get_freezed_output_min();
float max_freezed_output = qconvolution->get_freezed_output_max();
auto functor = [&, conv_index, min_freezed_output, max_freezed_output](
CPURuntimeContext* ctx) {
auto functor = [&, conv_index](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out0_tensor);
*(static_cast<float*>(out1_tensor)) = min_freezed_output;
*(static_cast<float*>(out2_tensor)) = max_freezed_output;
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, conv_index);
};
functors.emplace_back(functor);
......@@ -75,14 +67,10 @@ namespace ngraph
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto qconvolution_relu =
static_cast<const ngraph::op::QuantizedConvolutionRelu*>(node);
auto& functors = external_function->get_functors();
auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto& out0_tensor = external_function->get_tensor_data(out[0].get_name());
auto& out1_tensor = external_function->get_tensor_data(out[1].get_name());
auto& out2_tensor = external_function->get_tensor_data(out[2].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
......@@ -90,16 +78,11 @@ namespace ngraph
mkldnn_emitter->build_convolution<ngraph::op::QuantizedConvolutionRelu>(
node, args, out);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
float min_freezed_output = qconvolution_relu->get_freezed_output_min();
float max_freezed_output = qconvolution_relu->get_freezed_output_max();
auto functor = [&, conv_index, min_freezed_output, max_freezed_output](
CPURuntimeContext* ctx) {
auto functor = [&, conv_index](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out0_tensor);
*(static_cast<float*>(out1_tensor)) = min_freezed_output;
*(static_cast<float*>(out2_tensor)) = max_freezed_output;
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, conv_index);
};
functors.emplace_back(functor);
......@@ -116,15 +99,11 @@ namespace ngraph
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto qconvolution_bias =
static_cast<const ngraph::op::QuantizedConvolutionBias*>(node);
auto& functors = external_function->get_functors();
auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto& arg2_tensor = external_function->get_tensor_data(args[2].get_name());
auto& out0_tensor = external_function->get_tensor_data(out[0].get_name());
auto& out1_tensor = external_function->get_tensor_data(out[1].get_name());
auto& out2_tensor = external_function->get_tensor_data(out[2].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
......@@ -132,17 +111,12 @@ namespace ngraph
mkldnn_emitter->build_convolution<ngraph::op::QuantizedConvolutionBias>(
node, args, out);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
float min_freezed_output = qconvolution_bias->get_freezed_output_min();
float max_freezed_output = qconvolution_bias->get_freezed_output_max();
auto functor = [&, conv_index, min_freezed_output, max_freezed_output](
CPURuntimeContext* ctx) {
auto functor = [&, conv_index](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], arg2_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[3], out0_tensor);
*(static_cast<float*>(out1_tensor)) = min_freezed_output;
*(static_cast<float*>(out2_tensor)) = max_freezed_output;
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, conv_index);
};
functors.emplace_back(functor);
......
......@@ -14,7 +14,7 @@
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/cpu/op/quantized_max_pool.hpp"
#include "ngraph/op/experimental/quantized_max_pool.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
......@@ -37,21 +37,16 @@ namespace ngraph
auto& functors = external_function->get_functors();
auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& out1_tensor = external_function->get_tensor_data(out[1].get_name());
auto& out2_tensor = external_function->get_tensor_data(out[2].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
vector<float> quant_util;
mkldnn_emitter->build_quantized_max_pool(node, quant_util);
auto& deps = mkldnn_emitter->get_primitive_deps(quant_util[2]);
size_t qmax_pool_index = mkldnn_emitter->build_quantized_max_pool(node);
auto& deps = mkldnn_emitter->get_primitive_deps(qmax_pool_index);
auto functor = [&, quant_util](CPURuntimeContext* ctx) {
auto functor = [&, qmax_pool_index](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
*(static_cast<float*>(out1_tensor)) = quant_util[0];
*(static_cast<float*>(out2_tensor)) = quant_util[1];
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, quant_util[2]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, qmax_pool_index);
};
functors.emplace_back(functor);
}
......
This diff is collapsed.
......@@ -70,6 +70,11 @@
#include "ngraph/op/dot.hpp"
#include "ngraph/op/equal.hpp"
#include "ngraph/op/exp.hpp"
#include "ngraph/op/experimental/quantized_avg_pool.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/op/experimental/quantized_max_pool.hpp"
#include "ngraph/op/floor.hpp"
#include "ngraph/op/function_call.hpp"
#include "ngraph/op/get_output_element.hpp"
......@@ -146,18 +151,11 @@
#include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp"
#include "ngraph/runtime/cpu/op/dequantize.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/loop_kernel.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/matmul_bias.hpp"
#include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
#include "ngraph/runtime/cpu/op/quantize.hpp"
#include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_bias.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_relu.hpp"
#include "ngraph/runtime/cpu/op/quantized_max_pool.hpp"
#include "ngraph/runtime/cpu/op/rnn.hpp"
#include "ngraph/runtime/cpu/op/sigmoid.hpp"
#include "ngraph/runtime/cpu/op/sigmoid_mul.hpp"
......@@ -304,8 +302,6 @@ static const runtime::cpu::OpMap dispatcher{
{TI(ngraph::op::Ceiling), &runtime::cpu::CPU_Emitter::emit<op::Ceiling>},
{TI(ngraph::op::Sqrt), &runtime::cpu::CPU_Emitter::emit<op::Sqrt>},
{TI(ngraph::op::Convolution), &runtime::cpu::CPU_Emitter::emit<op::Convolution>},
{TI(ngraph::op::QuantizeCPU), &runtime::cpu::CPU_Emitter::emit<op::QuantizeCPU>},
{TI(ngraph::op::DequantizeCPU), &runtime::cpu::CPU_Emitter::emit<op::DequantizeCPU>},
{TI(ngraph::op::ConvolutionBackpropFilters),
&runtime::cpu::CPU_Emitter::emit<op::ConvolutionBackpropFilters>},
{TI(ngraph::op::ConvolutionBackpropData),
......
......@@ -20,13 +20,12 @@
#include "mkldnn_emitter.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/experimental/quantized_avg_pool.hpp"
#include "ngraph/op/experimental/quantized_max_pool.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/dequantize.hpp"
#include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp"
#include "ngraph/runtime/cpu/op/quantized_max_pool.hpp"
#include "ngraph/type/element_type.hpp"
using namespace ngraph::runtime::cpu;
......@@ -123,31 +122,7 @@ size_t MKLDNNEmitter::build_memory_primitive(const mkldnn::memory::desc& desc)
return index;
}
size_t MKLDNNEmitter::build_dequantization(const ngraph::Node* node,
const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc)
{
auto dequantize = static_cast<const ngraph::op::DequantizeCPU*>(node);
auto min_const_op = std::static_pointer_cast<ngraph::op::Constant>(dequantize->get_argument(1));
auto max_const_op = std::static_pointer_cast<ngraph::op::Constant>(dequantize->get_argument(2));
float min_range = *(static_cast<float const*>(min_const_op->get_data_ptr()));
float max_range = *(static_cast<float const*>(max_const_op->get_data_ptr()));
const float max_abs = std::max(std::abs(min_range), std::abs(max_range));
bool is_signed = (dequantize->get_dequantize_et()).is_signed();
const float target_range =
static_cast<float>((is_signed ? std::pow(2, 7) : std::pow(2, 8)) - 1);
const float scale_factor = max_abs / target_range;
std::vector<float> scales;
scales.push_back(scale_factor);
size_t dequantize_index = 0;
dequantize_index = this->build_quantize_reorder(input_desc, result_desc, scales);
return dequantize_index;
}
void MKLDNNEmitter::build_quantized_max_pool(const ngraph::Node* node,
std::vector<float>& quant_util)
size_t MKLDNNEmitter::build_quantized_max_pool(const ngraph::Node* node)
{
auto qmax_pool = static_cast<const ngraph::op::QuantizedMaxPool*>(node);
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
......@@ -159,17 +134,10 @@ void MKLDNNEmitter::build_quantized_max_pool(const ngraph::Node* node,
qmax_pool->get_window_shape(),
qmax_pool->get_padding_below(),
qmax_pool->get_padding_above());
auto min_const_op = std::static_pointer_cast<ngraph::op::Constant>(qmax_pool->get_argument(1));
auto max_const_op = std::static_pointer_cast<ngraph::op::Constant>(qmax_pool->get_argument(2));
float min = *(static_cast<float const*>(min_const_op->get_data_ptr()));
float max = *(static_cast<float const*>(max_const_op->get_data_ptr()));
quant_util.push_back(min);
quant_util.push_back(max);
quant_util.push_back(qmax_pool_index);
return qmax_pool_index;
}
void MKLDNNEmitter::build_quantized_avg_pool(const ngraph::Node* node,
std::vector<float>& quant_util)
size_t MKLDNNEmitter::build_quantized_avg_pool(const ngraph::Node* node)
{
auto qavg_pool = static_cast<const ngraph::op::QuantizedAvgPool*>(node);
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
......@@ -184,13 +152,7 @@ void MKLDNNEmitter::build_quantized_avg_pool(const ngraph::Node* node,
qavg_pool->get_window_shape(),
qavg_pool->get_padding_below(),
qavg_pool->get_padding_above());
auto min_const_op = std::static_pointer_cast<ngraph::op::Constant>(qavg_pool->get_argument(1));
auto max_const_op = std::static_pointer_cast<ngraph::op::Constant>(qavg_pool->get_argument(2));
float min = *(static_cast<float const*>(min_const_op->get_data_ptr()));
float max = *(static_cast<float const*>(max_const_op->get_data_ptr()));
quant_util.push_back(min);
quant_util.push_back(max);
quant_util.push_back(qavg_pool_index);
return qavg_pool_index;
}
mkldnn::memory::format MKLDNNEmitter::query_convolution_forward_weight_format(
......@@ -787,28 +749,6 @@ size_t MKLDNNEmitter::build_reorder(const mkldnn::memory::desc& input_desc,
return primitive_index;
}
size_t MKLDNNEmitter::build_quantize_reorder(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
const std::vector<float>& scales)
{
size_t input_index = build_memory_primitive(input_desc);
size_t result_index = build_memory_primitive(result_desc);
mkldnn::primitive_attr attr;
attr.set_output_scales(0, scales);
attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
auto reorder_desc =
mkldnn::reorder::primitive_desc({input_desc, mkldnn_utils::global_cpu_engine},
{result_desc, mkldnn_utils::global_cpu_engine},
attr);
size_t primitive_index = insert_primitive(new mkldnn::reorder(
reorder_desc, *m_mkldnn_primitives[input_index], *m_mkldnn_primitives[result_index]));
m_primitive_deps[primitive_index] = {input_index, result_index};
return primitive_index;
}
size_t MKLDNNEmitter::build_lrn_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
float alpha,
......
......@@ -26,16 +26,15 @@
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/bounded_relu.hpp"
#include "ngraph/runtime/cpu/op/conv_add.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_bias.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_relu.hpp"
#include "ngraph/runtime/cpu/quantization_util.hpp"
#include "ngraph/shape.hpp"
#include "ngraph/strides.hpp"
#include "ngraph/type/element_type.hpp"
......@@ -227,8 +226,6 @@ namespace ngraph
}
else if (std::is_same<OP, ngraph::op::QuantizedConvolution>())
{
const float scale =
quantization_util::get_scale<ngraph::op::QuantizedConvolution>(node);
return build_quantized_convolution(
data_desc,
weights_desc,
......@@ -237,14 +234,12 @@ namespace ngraph
window_dilation_strides_adjusted,
convolution->get_padding_below(),
convolution->get_padding_above(),
scale,
(dynamic_cast<const ngraph::op::QuantizedConvolution*>(node))
->get_scale(),
ops);
}
else if (std::is_same<OP, ngraph::op::QuantizedConvolutionRelu>())
{
const float scale =
quantization_util::get_scale<ngraph::op::QuantizedConvolutionRelu>(
node);
return build_quantized_convolution(
data_desc,
weights_desc,
......@@ -253,15 +248,13 @@ namespace ngraph
window_dilation_strides_adjusted,
convolution->get_padding_below(),
convolution->get_padding_above(),
scale,
(dynamic_cast<const ngraph::op::QuantizedConvolutionRelu*>(node))
->get_scale(),
ops);
}
else if (std::is_same<OP, ngraph::op::QuantizedConvolutionBias>())
{
// conv+bias = cvt_to_int8(scale*(dst + bias))
const float scale =
quantization_util::get_scale<ngraph::op::QuantizedConvolutionBias>(
node);
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
return build_quantized_convolution(
data_desc,
......@@ -272,7 +265,8 @@ namespace ngraph
window_dilation_strides_adjusted,
convolution->get_padding_below(),
convolution->get_padding_above(),
scale,
(dynamic_cast<const ngraph::op::QuantizedConvolutionBias*>(node))
->get_scale(),
ops);
}
else
......@@ -595,19 +589,9 @@ namespace ngraph
const mkldnn::memory::desc& result_desc,
float alpha);
size_t build_quantize_reorder(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
const std::vector<float>& scales);
size_t build_quantized_max_pool(const ngraph::Node* node);
size_t build_dequantization(const ngraph::Node* node,
const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc);
void build_quantized_max_pool(const ngraph::Node* node,
std::vector<float>& quant_util);
void build_quantized_avg_pool(const ngraph::Node* node,
std::vector<float>& quant_util);
size_t build_quantized_avg_pool(const ngraph::Node* node);
private:
std::vector<mkldnn::primitive*> m_mkldnn_primitives;
......
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/cpu/op/dequantize.hpp"
#include "ngraph/op/constant.hpp"
ngraph::op::DequantizeCPU::DequantizeCPU(std::shared_ptr<Node> input,
std::shared_ptr<Node> min,
std::shared_ptr<Node> max,
const element::Type& type)
: Op("DequantizeCPU", check_single_output_args({input, min, max}))
, m_element_type(type)
{
constructor_validate_and_infer_types();
if (input->get_element_type() != element::u8 && input->get_element_type() != element::i8)
{
throw ngraph_error("Dequantization supported only for i8/u8!");
}
if (min->get_element_type() != min->get_element_type())
{
throw ngraph_error("Min's element type isn't equal to max's!");
}
if (min->get_shape().size() != 0)
{
throw ngraph_error("Min is not a scalar!");
}
if (max->get_shape().size() != 0)
{
throw ngraph_error("Max is not a scalar!");
}
if (!(std::dynamic_pointer_cast<op::Constant>(min) &&
std::dynamic_pointer_cast<op::Constant>(max)))
{
throw ngraph_error("Min and max have to be constants!");
}
set_output_type(0, element::f32, input->get_shape());
}
std::shared_ptr<ngraph::Node>
ngraph::op::DequantizeCPU::copy_with_new_args(const NodeVector& new_args) const
{
if (new_args.size() != 3)
{
throw ngraph_error("Incorrect number of new arguments");
}
return std::make_shared<DequantizeCPU>(
new_args.at(0), new_args.at(1), new_args.at(2), m_element_type);
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/node.hpp"
#include "ngraph/node_vector.hpp"
#include "ngraph/op/op.hpp"
namespace ngraph
{
namespace op
{
class DequantizeCPU : public Op
{
public:
DequantizeCPU(std::shared_ptr<Node> input,
std::shared_ptr<Node> min,
std::shared_ptr<Node> max,
const element::Type& type);
const element::Type& get_dequantize_et() const { return m_element_type; }
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
private:
const element::Type m_element_type;
};
}
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/cpu/op/quantize.hpp"
#include "ngraph/op/constant.hpp"
ngraph::op::QuantizeCPU::QuantizeCPU(std::shared_ptr<Node> input,
std::shared_ptr<Node> min,
std::shared_ptr<Node> max,
const element::Type& type)
: Op("QuantizeCPU", check_single_output_args({input, min, max}))
, m_element_type(type)
{
constructor_validate_and_infer_types();
if (input->get_element_type() != element::f32)
{
throw ngraph_error("Quantization supported only from float32 --> i8/u8!");
}
if (min->get_element_type() != min->get_element_type())
{
throw ngraph_error("Min's element type isn't equal to max's!");
}
if (min->get_shape().size() != 0)
{
throw ngraph_error("Min is not a scalar!");
}
if (max->get_shape().size() != 0)
{
throw ngraph_error("Max is not a scalar!");
}
if (!(std::dynamic_pointer_cast<op::Constant>(min) &&
std::dynamic_pointer_cast<op::Constant>(max)))
{
throw ngraph_error("Min and max have to be constants!");
}
auto min_const_op = std::static_pointer_cast<ngraph::op::Constant>(min);
auto max_const_op = std::static_pointer_cast<ngraph::op::Constant>(max);
float input_min_range = *(static_cast<float const*>(min_const_op->get_data_ptr()));
float input_max_range = *(static_cast<float const*>(max_const_op->get_data_ptr()));
this->m_input_min = input_min_range;
this->m_input_max = input_max_range;
set_output_size(3);
set_output_type(0, type, input->get_shape());
set_output_type(1, element::f32, Shape{});
set_output_type(2, element::f32, Shape{});
}
std::shared_ptr<ngraph::Node>
ngraph::op::QuantizeCPU::copy_with_new_args(const NodeVector& new_args) const
{
if (new_args.size() != 3)
{
throw ngraph_error("Incorrect number of new arguments");
}
return std::make_shared<QuantizeCPU>(
new_args.at(0), new_args.at(1), new_args.at(2), m_element_type);
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/node.hpp"
#include "ngraph/node_vector.hpp"
#include "ngraph/op/op.hpp"
namespace ngraph
{
namespace op
{
class QuantizeCPU : public Op
{
public:
QuantizeCPU(std::shared_ptr<Node> input,
std::shared_ptr<Node> min,
std::shared_ptr<Node> max,
const element::Type& type);
const element::Type& get_quantize_et() const { return m_element_type; }
float get_input_min() const { return m_input_min; }
float get_input_max() const { return m_input_max; }
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
private:
const element::Type m_element_type;
float m_input_min;
float m_input_max;
};
}
}
......@@ -30,6 +30,11 @@
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/experimental/quantized_avg_pool.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/op/experimental/quantized_max_pool.hpp"
#include "ngraph/op/lrn.hpp"
#include "ngraph/op/max_pool.hpp"
#include "ngraph/op/relu.hpp"
......@@ -43,16 +48,9 @@
#include "ngraph/runtime/cpu/op/conv_add.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/dequantize.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
#include "ngraph/runtime/cpu/op/quantize.hpp"
#include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_bias.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_relu.hpp"
#include "ngraph/runtime/cpu/op/quantized_max_pool.hpp"
#include "ngraph/runtime/cpu/op/rnn.hpp"
#include "ngraph/runtime/cpu/op/sigmoid.hpp"
......@@ -733,27 +731,14 @@ namespace ngraph
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::DequantizeCPU)
{
if (node->get_input_element_type(0) == element::u8 ||
node->get_input_element_type(0) == element::i8)
{
auto dequantize = static_cast<op::DequantizeCPU*>(node);
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
dequantize->set_op_annotations(op_annotations);
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::QuantizedConvolution)
{
auto quantized_conv = static_cast<op::QuantizedConvolution*>(node);
if (node->get_input_element_type(0) == element::u8 &&
node->get_input_element_type(1) == element::i8)
{
auto quantized_conv = static_cast<op::QuantizedConvolution*>(node);
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
......@@ -764,10 +749,11 @@ namespace ngraph
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::QuantizedConvolutionRelu)
{
auto quantized_conv_relu = static_cast<op::QuantizedConvolutionRelu*>(node);
if (node->get_input_element_type(0) == element::u8 &&
node->get_input_element_type(1) == element::i8)
{
auto quantized_conv_relu = static_cast<op::QuantizedConvolutionRelu*>(node);
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
......@@ -775,26 +761,14 @@ namespace ngraph
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::QuantizeCPU)
{
if (node->get_input_element_type(0) == element::f32)
{
auto quantize = static_cast<op::QuantizeCPU*>(node);
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
quantize->set_op_annotations(op_annotations);
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::QuantizedConvolutionBias)
{
auto quantized_conv_bias = static_cast<op::QuantizedConvolutionBias*>(node);
if (node->get_input_element_type(0) == element::u8 &&
node->get_input_element_type(1) == element::i8)
{
auto quantized_conv_bias = static_cast<op::QuantizedConvolutionBias*>(node);
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
......@@ -861,14 +835,10 @@ static const runtime::cpu::pass::AssignOpMap s_dispatcher{
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::QuantizedAvgPool>},
{TI(ngraph::op::Softmax), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Softmax>},
{TI(ngraph::op::Slice), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Slice>},
{TI(ngraph::op::QuantizeCPU),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::QuantizeCPU>},
{TI(ngraph::op::ReplaceSlice),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::ReplaceSlice>},
{TI(ngraph::op::ConvolutionAdd),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::ConvolutionAdd>},
{TI(ngraph::op::DequantizeCPU),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::DequantizeCPU>},
{TI(ngraph::op::QuantizedConvolutionRelu),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::QuantizedConvolutionRelu>},
{TI(ngraph::op::QuantizedConvolutionBias),
......
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#pragma once
#include <limits>
#include <vector>
#include "ngraph/node.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/runtime/cpu/op/quantize.hpp"
#include "ngraph/util.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace quantization_util
{
template <class T1, class T2, class T3>
void quantization_range_for_multiplication(
float min_a, float max_a, float min_b, float max_b, float* min_c, float* max_c)
{
// begin code copied and pasted (and modified) from
// github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/quantization_utils.h
float a_one_quant_level = (max_a - min_a) / (std::numeric_limits<T1>::max() -
std::numeric_limits<T1>::min());
float b_one_quant_level = (max_b - min_b) / (std::numeric_limits<T2>::max() -
std::numeric_limits<T2>::min());
float c_one_quant_level = a_one_quant_level * b_one_quant_level;
*min_c = c_one_quant_level * std::numeric_limits<T3>::min();
*max_c = c_one_quant_level * std::numeric_limits<T3>::max();
// end code copied and pasted (and modified) from
// github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/quantization_utils.h
}
static inline void get_min_max_range(float input_min_range,
float input_max_range,
bool is_signed,
std::vector<float>& quant_util)
{
// begin code copied and pasted from
// github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/quantize_op.cc
float min_range;
float max_range;
// If input_min_range and input_max_range are close,
// introduce a slightly larger delta between them.
min_range = std::min(0.0f, input_min_range);
const float epsilon =
std::max(1.0f, std::max(fabsf(input_min_range), fabsf(input_max_range))) /
100.0f;
max_range = std::max(input_max_range, min_range + epsilon);
max_range = std::max(0.0f, max_range);
// end code copied and pasted from
// github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/quantize_op.cc
const float max_abs = std::max(std::abs(min_range), std::abs(max_range));
const float target_range =
static_cast<float>((is_signed ? std::pow(2, 7) : std::pow(2, 8)) - 1);
max_range = max_abs;
min_range = is_signed ? -max_abs : 0;
const float scale = target_range / max_abs;
quant_util.push_back(min_range);
quant_util.push_back(max_range);
quant_util.push_back(scale);
}
template <typename OP>
float get_scale(const ngraph::Node* node)
{
auto qconvolution = static_cast<const OP*>(node);
float min_out_value;
float max_out_value;
quantization_range_for_multiplication<uint8_t, int8_t, int32_t>(
qconvolution->get_input_min(),
qconvolution->get_input_max(),
qconvolution->get_filter_min(),
qconvolution->get_filter_max(),
&min_out_value,
&max_out_value);
const float max_abs32 =
std::max(std::abs(min_out_value), std::abs(max_out_value));
const float max_abs8 =
std::max(std::abs(qconvolution->get_freezed_output_min()),
std::abs(qconvolution->get_freezed_output_max()));
// Output is signed int.
// s32 = f32 * std::pow(2, 31)/ max_abs32;
// s8 = f32 * std::pow(2, 7)/ max_abs8;
// s8 = s32 * std::pow(2, -24) * max_abs32 / max_abs8;
const float scale = static_cast<float>(
(std::pow(2, -24) * static_cast<double>(max_abs32 / max_abs8)));
return scale;
}
}
}
}
}
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment