Commit f30910c6 authored by Nishant Patel's avatar Nishant Patel Committed by Robert Kimball

Switch to scale and offset design from min and max for Quantization (#1789)

* Switch to scale and offset design from min and max for Quantization

* Remove offset and make the quantize ops a single o/p op

* move cpu QuantOps to core and create builders

* rebase to HEAD

* remove convbias and convbiasrelu ctors which take conv

* remove mistakenly added quantize.rst

* remove offset

* Compute scale, move quantization ops to experimental dir and some PR feedback

* Normalize license headers
parent bcfbf099
...@@ -20,6 +20,7 @@ set (SRC ...@@ -20,6 +20,7 @@ set (SRC
autodiff/adjoints.cpp autodiff/adjoints.cpp
builder/autobroadcast.cpp builder/autobroadcast.cpp
builder/numpy_transpose.cpp builder/numpy_transpose.cpp
builder/quantization.cpp
builder/reduce_ops.cpp builder/reduce_ops.cpp
coordinate.cpp coordinate.cpp
coordinate_diff.cpp coordinate_diff.cpp
...@@ -84,6 +85,11 @@ set (SRC ...@@ -84,6 +85,11 @@ set (SRC
op/power.cpp op/power.cpp
op/product.cpp op/product.cpp
op/quantize.cpp op/quantize.cpp
op/experimental/quantized_avg_pool.cpp
op/experimental/quantized_conv_bias.cpp
op/experimental/quantized_conv_relu.cpp
op/experimental/quantized_conv.cpp
op/experimental/quantized_max_pool.cpp
op/reduce.cpp op/reduce.cpp
op/reduce_window.cpp op/reduce_window.cpp
op/relu.cpp op/relu.cpp
......
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/quantize.hpp"
namespace ngraph
{
namespace builder
{
std::shared_ptr<Node> ScaledQuantizedAvgPool(const std::shared_ptr<Node>& arg,
const Shape& window_shape,
const Strides& window_movement_strides,
const Shape& padding_below,
const Shape& padding_above,
bool include_padding_in_avg_computation,
const std::shared_ptr<Node> min,
const std::shared_ptr<Node> max);
std::shared_ptr<Node>
ScaledQuantizedConvolutionBias(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const std::shared_ptr<Node>& bias,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output,
const bool with_relu = false);
std::shared_ptr<Node>
ScaledQuantizedConvolutionRelu(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output);
std::shared_ptr<Node>
ScaledQuantizedConvolution(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output);
std::shared_ptr<Node> ScaledQuantizedMaxPool(const std::shared_ptr<Node>& arg,
const Shape& window_shape,
const Strides& window_movement_strides,
const Shape& padding_below,
const Shape& padding_above,
const std::shared_ptr<Node> min,
const std::shared_ptr<Node> max);
}
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
//*******************************************************************************
// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//==============================================================================
#pragma once
#include <limits>
#include <vector>
#include "ngraph/node.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/util.hpp"
namespace ngraph
{
namespace builder
{
namespace quantization_util
{
template <class T1, class T2, class T3>
void quantization_range_for_multiplication(
float min_a, float max_a, float min_b, float max_b, float* min_c, float* max_c)
{
// begin code copied and pasted (and modified) from
// github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/quantization_utils.h
float a_one_quant_level = (max_a - min_a) / (std::numeric_limits<T1>::max() -
std::numeric_limits<T1>::min());
float b_one_quant_level = (max_b - min_b) / (std::numeric_limits<T2>::max() -
std::numeric_limits<T2>::min());
float c_one_quant_level = a_one_quant_level * b_one_quant_level;
*min_c = c_one_quant_level * std::numeric_limits<T3>::min();
*max_c = c_one_quant_level * std::numeric_limits<T3>::max();
// end code copied and pasted (and modified) from
// github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/quantization_utils.h
}
float get_scale(const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output)
{
auto min_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(min_input);
auto max_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(max_input);
auto min_filter_const_op =
std::static_pointer_cast<ngraph::op::Constant>(min_filter);
auto max_filter_const_op =
std::static_pointer_cast<ngraph::op::Constant>(max_filter);
auto min_freezed_output_const_op =
std::static_pointer_cast<ngraph::op::Constant>(min_freezed_output);
auto max_freezed_output_const_op =
std::static_pointer_cast<ngraph::op::Constant>(max_freezed_output);
float input_min = *(static_cast<float const*>(min_input_const_op->get_data_ptr()));
float input_max = *(static_cast<float const*>(max_input_const_op->get_data_ptr()));
float filter_min =
*(static_cast<float const*>(min_filter_const_op->get_data_ptr()));
float filter_max =
*(static_cast<float const*>(max_filter_const_op->get_data_ptr()));
float output_min =
*(static_cast<float const*>(min_freezed_output_const_op->get_data_ptr()));
float output_max =
*(static_cast<float const*>(max_freezed_output_const_op->get_data_ptr()));
float min_out_value;
float max_out_value;
quantization_range_for_multiplication<uint8_t, int8_t, int32_t>(
input_min, input_max, filter_min, filter_max, &min_out_value, &max_out_value);
const float max_abs32 = std::max(std::abs(min_out_value), std::abs(max_out_value));
const float max_abs8 = std::max(std::abs(output_min), std::abs(output_max));
// Output is signed int.
// s32 = f32 * std::pow(2, 31)/ max_abs32;
// s8 = f32 * std::pow(2, 7)/ max_abs8;
// s8 = s32 * std::pow(2, -24) * max_abs32 / max_abs8;
const float scale = static_cast<float>(
(std::pow(2, -24) * static_cast<double>(max_abs32 / max_abs8)));
return scale;
}
}
}
}
...@@ -26,10 +26,8 @@ op::QuantizedAvgPool::QuantizedAvgPool(const shared_ptr<Node>& arg, ...@@ -26,10 +26,8 @@ op::QuantizedAvgPool::QuantizedAvgPool(const shared_ptr<Node>& arg,
const Strides& window_movement_strides, const Strides& window_movement_strides,
const Shape& padding_below, const Shape& padding_below,
const Shape& padding_above, const Shape& padding_above,
bool include_padding_in_avg_computation, bool include_padding_in_avg_computation)
const shared_ptr<Node> min, : Op("QuantizedAvgPool", check_single_output_args({arg}))
const shared_ptr<Node> max)
: Op("QuantizedAvgPool", check_single_output_args({arg, min, max}))
, m_window_shape(window_shape) , m_window_shape(window_shape)
, m_window_movement_strides(window_movement_strides) , m_window_movement_strides(window_movement_strides)
, m_padding_below(padding_below) , m_padding_below(padding_below)
...@@ -40,18 +38,7 @@ op::QuantizedAvgPool::QuantizedAvgPool(const shared_ptr<Node>& arg, ...@@ -40,18 +38,7 @@ op::QuantizedAvgPool::QuantizedAvgPool(const shared_ptr<Node>& arg,
if (arg->get_element_type() != element::u8 && arg->get_element_type() != element::i8) if (arg->get_element_type() != element::u8 && arg->get_element_type() != element::i8)
{ {
throw ngraph_error("Dequantization supported only for i8/u8!"); throw ngraph_error("QuantizedAvgPool supported only for i8/u8!");
}
if (min->get_element_type() != max->get_element_type())
{
throw ngraph_error("Min's element type isn't equal to max's!");
}
if (!(std::dynamic_pointer_cast<op::Constant>(min) &&
std::dynamic_pointer_cast<op::Constant>(max)))
{
throw ngraph_error("Min and max have to be constants!");
} }
} }
...@@ -211,11 +198,7 @@ void op::QuantizedAvgPool::validate_and_infer_types() ...@@ -211,11 +198,7 @@ void op::QuantizedAvgPool::validate_and_infer_types()
result_shape[1] = channel_count; result_shape[1] = channel_count;
copy(output_item_shape.begin(), output_item_shape.end(), result_shape.begin() + 2); copy(output_item_shape.begin(), output_item_shape.end(), result_shape.begin() + 2);
set_output_size(3);
set_output_type(0, get_input_element_type(0), result_shape); set_output_type(0, get_input_element_type(0), result_shape);
//TODO(nbpatel): Change to Shape{} once the mkldnn version is updated
set_output_type(1, element::f32, Shape{1});
set_output_type(2, element::f32, Shape{1});
} }
shared_ptr<Node> op::QuantizedAvgPool::copy_with_new_args(const NodeVector& new_args) const shared_ptr<Node> op::QuantizedAvgPool::copy_with_new_args(const NodeVector& new_args) const
...@@ -226,7 +209,5 @@ shared_ptr<Node> op::QuantizedAvgPool::copy_with_new_args(const NodeVector& new_ ...@@ -226,7 +209,5 @@ shared_ptr<Node> op::QuantizedAvgPool::copy_with_new_args(const NodeVector& new_
m_window_movement_strides, m_window_movement_strides,
m_padding_below, m_padding_below,
m_padding_above, m_padding_above,
m_include_padding_in_avg_computation, m_include_padding_in_avg_computation);
new_args.at(1),
new_args.at(2));
} }
...@@ -48,9 +48,7 @@ namespace ngraph ...@@ -48,9 +48,7 @@ namespace ngraph
const Strides& window_movement_strides, const Strides& window_movement_strides,
const Shape& padding_below, const Shape& padding_below,
const Shape& padding_above, const Shape& padding_above,
bool include_padding_in_avg_computation, bool include_padding_in_avg_computation);
const std::shared_ptr<Node> min,
const std::shared_ptr<Node> max);
void validate_and_infer_types() override; void validate_and_infer_types() override;
virtual std::shared_ptr<Node> virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override; copy_with_new_args(const NodeVector& new_args) const override;
......
...@@ -30,21 +30,8 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc ...@@ -30,21 +30,8 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc
const CoordinateDiff& padding_below, const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above, const CoordinateDiff& padding_above,
const Strides& data_dilation_strides, const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input, const std::shared_ptr<Node> scale)
const std::shared_ptr<Node> max_input, : Op("QuantizedConvolution", check_single_output_args({data_batch, filters, scale}))
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output)
: Op("QuantizedConvolution",
check_single_output_args({data_batch,
filters,
min_input,
max_input,
min_filter,
max_filter,
min_freezed_output,
max_freezed_output}))
, m_window_movement_strides(window_movement_strides) , m_window_movement_strides(window_movement_strides)
, m_window_dilation_strides(window_dilation_strides) , m_window_dilation_strides(window_dilation_strides)
, m_padding_below(padding_below) , m_padding_below(padding_below)
...@@ -58,29 +45,11 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc ...@@ -58,29 +45,11 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc
auto& data_batch_shape = data_batch->get_shape(); auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape(); auto& filters_shape = filters->get_shape();
auto min_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(min_input); auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
auto max_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(max_input); float scale_val = *(static_cast<float const*>(scale_const_op->get_data_ptr()));
auto min_filter_const_op = std::static_pointer_cast<ngraph::op::Constant>(min_filter);
auto max_filter_const_op = std::static_pointer_cast<ngraph::op::Constant>(max_filter);
auto min_freezed_output_const_op =
std::static_pointer_cast<ngraph::op::Constant>(min_freezed_output);
auto max_freezed_output_const_op =
std::static_pointer_cast<ngraph::op::Constant>(max_freezed_output);
float input_min = *(static_cast<float const*>(min_input_const_op->get_data_ptr()));
float input_max = *(static_cast<float const*>(max_input_const_op->get_data_ptr()));
float filter_min = *(static_cast<float const*>(min_filter_const_op->get_data_ptr()));
float filter_max = *(static_cast<float const*>(max_filter_const_op->get_data_ptr()));
float output_min = *(static_cast<float const*>(min_freezed_output_const_op->get_data_ptr()));
float output_max = *(static_cast<float const*>(max_freezed_output_const_op->get_data_ptr()));
this->m_input_min = input_min; this->m_scale = scale_val;
this->m_input_max = input_max;
this->m_filter_min = filter_min;
this->m_filter_max = filter_max;
this->m_freezed_output_min = output_min;
this->m_freezed_output_max = output_max;
set_output_size(3);
set_output_type(0, set_output_type(0,
element::i8, element::i8,
util::infer_convolution_output_shape(this, util::infer_convolution_output_shape(this,
...@@ -98,12 +67,10 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc ...@@ -98,12 +67,10 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc
0, /* batch_axis_result, */ 0, /* batch_axis_result, */
1 /* output_channel_axis_result, */ 1 /* output_channel_axis_result, */
)); ));
set_output_type(1, element::f32, Shape{1});
set_output_type(2, element::f32, Shape{1});
} }
shared_ptr<Node> op::QuantizedConvolution::copy_with_new_args(const NodeVector& new_args) const shared_ptr<Node> op::QuantizedConvolution::copy_with_new_args(const NodeVector& new_args) const
{ {
if (new_args.size() != 8) if (new_args.size() != 3)
{ {
throw ngraph_error("Incorrect number of new arguments"); throw ngraph_error("Incorrect number of new arguments");
} }
...@@ -114,10 +81,5 @@ shared_ptr<Node> op::QuantizedConvolution::copy_with_new_args(const NodeVector& ...@@ -114,10 +81,5 @@ shared_ptr<Node> op::QuantizedConvolution::copy_with_new_args(const NodeVector&
get_padding_below(), get_padding_below(),
get_padding_above(), get_padding_above(),
get_data_dilation_strides(), get_data_dilation_strides(),
new_args.at(2), new_args.at(2)));
new_args.at(3),
new_args.at(4),
new_args.at(5),
new_args.at(6),
new_args.at(7)));
} }
...@@ -33,12 +33,7 @@ namespace ngraph ...@@ -33,12 +33,7 @@ namespace ngraph
const CoordinateDiff& padding_below, const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above, const CoordinateDiff& padding_above,
const Strides& data_dilation_strides, const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input, const std::shared_ptr<Node> scale);
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output);
const Strides& get_window_movement_strides() const { return m_window_movement_strides; } const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
const Strides& get_window_dilation_strides() const { return m_window_dilation_strides; } const Strides& get_window_dilation_strides() const { return m_window_dilation_strides; }
const CoordinateDiff& get_padding_below() const { return m_padding_below; } const CoordinateDiff& get_padding_below() const { return m_padding_below; }
...@@ -46,12 +41,7 @@ namespace ngraph ...@@ -46,12 +41,7 @@ namespace ngraph
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; } const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
std::shared_ptr<Node> get_filters() { return get_argument(1); } std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); } std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
float get_input_min() const { return m_input_min; } float get_scale() const { return m_scale; }
float get_input_max() const { return m_input_max; }
float get_filter_min() const { return m_filter_min; }
float get_filter_max() const { return m_filter_max; }
float get_freezed_output_min() const { return m_freezed_output_min; }
float get_freezed_output_max() const { return m_freezed_output_max; }
virtual std::shared_ptr<Node> virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override; copy_with_new_args(const NodeVector& new_args) const override;
...@@ -61,12 +51,7 @@ namespace ngraph ...@@ -61,12 +51,7 @@ namespace ngraph
CoordinateDiff m_padding_below; CoordinateDiff m_padding_below;
CoordinateDiff m_padding_above; CoordinateDiff m_padding_above;
Strides m_data_dilation_strides; Strides m_data_dilation_strides;
float m_input_min; float m_scale;
float m_input_max;
float m_filter_min;
float m_filter_max;
float m_freezed_output_min;
float m_freezed_output_max;
}; };
} }
} }
...@@ -16,84 +16,27 @@ ...@@ -16,84 +16,27 @@
#include <numeric> #include <numeric>
#include "conv_bias.hpp"
#include "quantized_conv_bias.hpp" #include "quantized_conv_bias.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/get_output_element.hpp" #include "ngraph/op/get_output_element.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
op::QuantizedConvolutionBias::QuantizedConvolutionBias( op::QuantizedConvolutionBias::QuantizedConvolutionBias(const shared_ptr<Node>& data_batch,
const shared_ptr<op::QuantizedConvolution>& qconv, const shared_ptr<Node>& filters,
const shared_ptr<Node>& bias, const shared_ptr<Node>& bias,
const bool with_relu) const Strides& window_movement_strides,
: Op("QuantizedConvolutionBias", const Strides& window_dilation_strides,
check_single_output_args({qconv->get_argument(0), const CoordinateDiff& padding_below,
qconv->get_argument(1), const CoordinateDiff& padding_above,
bias, const Strides& data_dilation_strides,
qconv->get_argument(2), const std::shared_ptr<Node> scale,
qconv->get_argument(3), const bool with_relu)
qconv->get_argument(4), : Op("QuantizedConvolutionBias", check_single_output_args({data_batch, filters, bias, scale}))
qconv->get_argument(5),
qconv->get_argument(6),
qconv->get_argument(7)}))
, m_window_movement_strides(qconv->get_window_movement_strides())
, m_window_dilation_strides(qconv->get_window_dilation_strides())
, m_padding_below(qconv->get_padding_below())
, m_padding_above(qconv->get_padding_above())
, m_data_dilation_strides(qconv->get_data_dilation_strides())
, m_with_relu(with_relu)
{
constructor_validate_and_infer_types();
this->m_input_min = qconv->get_input_min();
this->m_input_max = qconv->get_input_max();
this->m_filter_min = qconv->get_filter_min();
this->m_filter_max = qconv->get_filter_max();
this->m_freezed_output_min = qconv->get_freezed_output_min();
this->m_freezed_output_max = qconv->get_freezed_output_max();
util::validate_convbias_shapes(qconv->get_argument(0)->get_shape(),
qconv->get_argument(1)->get_shape(),
bias->get_shape());
auto output_et = with_relu ? element::u8 : element::i8;
set_output_size(3);
set_output_type(0, output_et, qconv->get_shape());
set_output_type(1, element::f32, Shape{1});
set_output_type(2, element::f32, Shape{1});
}
op::QuantizedConvolutionBias::QuantizedConvolutionBias(
const shared_ptr<Node>& data_batch,
const shared_ptr<Node>& filters,
const shared_ptr<Node>& bias,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output,
const bool with_relu)
: Op("QuantizedConvolutionBias",
check_single_output_args({data_batch,
filters,
bias,
min_input,
max_input,
min_filter,
max_filter,
min_freezed_output,
max_freezed_output}))
, m_window_movement_strides(window_movement_strides) , m_window_movement_strides(window_movement_strides)
, m_window_dilation_strides(window_dilation_strides) , m_window_dilation_strides(window_dilation_strides)
, m_padding_below(padding_below) , m_padding_below(padding_below)
...@@ -106,31 +49,14 @@ op::QuantizedConvolutionBias::QuantizedConvolutionBias( ...@@ -106,31 +49,14 @@ op::QuantizedConvolutionBias::QuantizedConvolutionBias(
auto& data_batch_shape = data_batch->get_shape(); auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape(); auto& filters_shape = filters->get_shape();
auto min_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(min_input); auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
auto max_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(max_input); float scale_val = *(static_cast<float const*>(scale_const_op->get_data_ptr()));
auto min_filter_const_op = std::static_pointer_cast<ngraph::op::Constant>(min_filter); this->m_scale = scale_val;
auto max_filter_const_op = std::static_pointer_cast<ngraph::op::Constant>(max_filter);
auto min_freezed_output_const_op =
std::static_pointer_cast<ngraph::op::Constant>(min_freezed_output);
auto max_freezed_output_const_op =
std::static_pointer_cast<ngraph::op::Constant>(max_freezed_output);
float input_min = *(static_cast<float const*>(min_input_const_op->get_data_ptr()));
float input_max = *(static_cast<float const*>(max_input_const_op->get_data_ptr()));
float filter_min = *(static_cast<float const*>(min_filter_const_op->get_data_ptr()));
float filter_max = *(static_cast<float const*>(max_filter_const_op->get_data_ptr()));
float output_min = *(static_cast<float const*>(min_freezed_output_const_op->get_data_ptr()));
float output_max = *(static_cast<float const*>(max_freezed_output_const_op->get_data_ptr()));
this->m_input_min = input_min;
this->m_input_max = input_max;
this->m_filter_min = filter_min;
this->m_filter_max = filter_max;
this->m_freezed_output_min = output_min;
this->m_freezed_output_max = output_max;
util::validate_convbias_shapes(data_batch_shape, filters_shape, bias->get_shape()); // TODO: call ngraph util
// util::validate_convbias_shapes(data_batch_shape, filters_shape, bias->get_shape());
auto output_et = with_relu ? element::u8 : element::i8; auto output_et = with_relu ? element::u8 : element::i8;
set_output_size(3);
set_output_type(0, set_output_type(0,
output_et, output_et,
util::infer_convolution_output_shape(this, util::infer_convolution_output_shape(this,
...@@ -148,13 +74,11 @@ op::QuantizedConvolutionBias::QuantizedConvolutionBias( ...@@ -148,13 +74,11 @@ op::QuantizedConvolutionBias::QuantizedConvolutionBias(
0, /* batch_axis_result, */ 0, /* batch_axis_result, */
1 /* output_channel_axis_result, */ 1 /* output_channel_axis_result, */
)); ));
set_output_type(1, element::f32, Shape{1});
set_output_type(2, element::f32, Shape{1});
} }
shared_ptr<Node> op::QuantizedConvolutionBias::copy_with_new_args(const NodeVector& new_args) const shared_ptr<Node> op::QuantizedConvolutionBias::copy_with_new_args(const NodeVector& new_args) const
{ {
if (new_args.size() != 9) if (new_args.size() != 4)
{ {
throw ngraph_error("Incorrect number of new arguments"); throw ngraph_error("Incorrect number of new arguments");
} }
...@@ -168,10 +92,5 @@ shared_ptr<Node> op::QuantizedConvolutionBias::copy_with_new_args(const NodeVect ...@@ -168,10 +92,5 @@ shared_ptr<Node> op::QuantizedConvolutionBias::copy_with_new_args(const NodeVect
get_padding_above(), get_padding_above(),
get_data_dilation_strides(), get_data_dilation_strides(),
new_args.at(3), new_args.at(3),
new_args.at(4),
new_args.at(5),
new_args.at(6),
new_args.at(7),
new_args.at(8),
m_with_relu)); m_with_relu));
} }
...@@ -16,8 +16,8 @@ ...@@ -16,8 +16,8 @@
#pragma once #pragma once
#include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/op.hpp" #include "ngraph/op/op.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
namespace ngraph namespace ngraph
{ {
...@@ -39,12 +39,7 @@ namespace ngraph ...@@ -39,12 +39,7 @@ namespace ngraph
const CoordinateDiff& padding_below, const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above, const CoordinateDiff& padding_above,
const Strides& data_dilation_strides, const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input, const std::shared_ptr<Node> scale,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output,
const bool with_relu = false); const bool with_relu = false);
const Strides& get_window_movement_strides() const { return m_window_movement_strides; } const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
...@@ -52,12 +47,7 @@ namespace ngraph ...@@ -52,12 +47,7 @@ namespace ngraph
const CoordinateDiff& get_padding_below() const { return m_padding_below; } const CoordinateDiff& get_padding_below() const { return m_padding_below; }
const CoordinateDiff& get_padding_above() const { return m_padding_above; } const CoordinateDiff& get_padding_above() const { return m_padding_above; }
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; } const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
float get_input_min() const { return m_input_min; } float get_scale() const { return m_scale; }
float get_input_max() const { return m_input_max; }
float get_filter_min() const { return m_filter_min; }
float get_filter_max() const { return m_filter_max; }
float get_freezed_output_min() const { return m_freezed_output_min; }
float get_freezed_output_max() const { return m_freezed_output_max; }
std::shared_ptr<Node> get_bias() { return get_argument(2); } std::shared_ptr<Node> get_bias() { return get_argument(2); }
std::shared_ptr<Node> get_filters() { return get_argument(1); } std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); } std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
...@@ -72,12 +62,7 @@ namespace ngraph ...@@ -72,12 +62,7 @@ namespace ngraph
CoordinateDiff m_padding_above; CoordinateDiff m_padding_above;
Strides m_data_dilation_strides; Strides m_data_dilation_strides;
bool m_with_relu; bool m_with_relu;
float m_input_min; float m_scale;
float m_input_max;
float m_filter_min;
float m_filter_max;
float m_freezed_output_min;
float m_freezed_output_max;
}; };
} }
} }
//***************************************************************************** /*******************************************************************************
// Copyright 2017-2018 Intel Corporation * Copyright 2017-2018 Intel Corporation
// *
// Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
// You may obtain a copy of the License at * You may obtain a copy of the License at
// *
// http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
// *
// Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
// limitations under the License. * limitations under the License.
//***************************************************************************** *******************************************************************************/
#include <numeric> #include <numeric>
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/op/get_output_element.hpp" #include "ngraph/op/get_output_element.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_relu.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
op::QuantizedConvolutionRelu::QuantizedConvolutionRelu( op::QuantizedConvolutionRelu::QuantizedConvolutionRelu(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<op::QuantizedConvolution>& qconv) const std::shared_ptr<Node>& filters,
: Op("QuantizedConvolutionRelu", const Strides& window_movement_strides,
check_single_output_args({qconv->get_argument(0), const Strides& window_dilation_strides,
qconv->get_argument(1), const CoordinateDiff& padding_below,
qconv->get_argument(2), const CoordinateDiff& padding_above,
qconv->get_argument(3), const Strides& data_dilation_strides,
qconv->get_argument(4), const std::shared_ptr<Node> scale)
qconv->get_argument(5), : Op("QuantizedConvolutionRelu", check_single_output_args({data_batch, filters, scale}))
qconv->get_argument(6),
qconv->get_argument(7)}))
, m_window_movement_strides(qconv->get_window_movement_strides())
, m_window_dilation_strides(qconv->get_window_dilation_strides())
, m_padding_below(qconv->get_padding_below())
, m_padding_above(qconv->get_padding_above())
, m_data_dilation_strides(qconv->get_data_dilation_strides())
{
constructor_validate_and_infer_types();
this->m_input_min = qconv->get_input_min();
this->m_input_max = qconv->get_input_max();
this->m_filter_min = qconv->get_filter_min();
this->m_filter_max = qconv->get_filter_max();
this->m_freezed_output_min = qconv->get_freezed_output_min();
this->m_freezed_output_max = qconv->get_freezed_output_max();
set_output_size(3);
set_output_type(0, element::u8, qconv->get_shape());
set_output_type(1, element::f32, Shape{1});
set_output_type(2, element::f32, Shape{1});
}
op::QuantizedConvolutionRelu::QuantizedConvolutionRelu(
const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output)
: Op("QuantizedConvolutionRelu",
check_single_output_args({data_batch,
filters,
min_input,
max_input,
min_filter,
max_filter,
min_freezed_output,
max_freezed_output}))
, m_window_movement_strides(window_movement_strides) , m_window_movement_strides(window_movement_strides)
, m_window_dilation_strides(window_dilation_strides) , m_window_dilation_strides(window_dilation_strides)
, m_padding_below(padding_below) , m_padding_below(padding_below)
...@@ -90,28 +44,10 @@ op::QuantizedConvolutionRelu::QuantizedConvolutionRelu( ...@@ -90,28 +44,10 @@ op::QuantizedConvolutionRelu::QuantizedConvolutionRelu(
auto& data_batch_shape = data_batch->get_shape(); auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape(); auto& filters_shape = filters->get_shape();
auto min_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(min_input); auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
auto max_input_const_op = std::static_pointer_cast<ngraph::op::Constant>(max_input); float scale_val = *(static_cast<float const*>(scale_const_op->get_data_ptr()));
auto min_filter_const_op = std::static_pointer_cast<ngraph::op::Constant>(min_filter); this->m_scale = scale_val;
auto max_filter_const_op = std::static_pointer_cast<ngraph::op::Constant>(max_filter);
auto min_freezed_output_const_op =
std::static_pointer_cast<ngraph::op::Constant>(min_freezed_output);
auto max_freezed_output_const_op =
std::static_pointer_cast<ngraph::op::Constant>(max_freezed_output);
float input_min = *(static_cast<float const*>(min_input_const_op->get_data_ptr()));
float input_max = *(static_cast<float const*>(max_input_const_op->get_data_ptr()));
float filter_min = *(static_cast<float const*>(min_filter_const_op->get_data_ptr()));
float filter_max = *(static_cast<float const*>(max_filter_const_op->get_data_ptr()));
float output_min = *(static_cast<float const*>(min_freezed_output_const_op->get_data_ptr()));
float output_max = *(static_cast<float const*>(max_freezed_output_const_op->get_data_ptr()));
this->m_input_min = input_min;
this->m_input_max = input_max;
this->m_filter_min = filter_min;
this->m_filter_max = filter_max;
this->m_freezed_output_min = output_min;
this->m_freezed_output_max = output_max;
set_output_size(3);
set_output_type(0, set_output_type(0,
element::u8, element::u8,
util::infer_convolution_output_shape(this, util::infer_convolution_output_shape(this,
...@@ -129,15 +65,12 @@ op::QuantizedConvolutionRelu::QuantizedConvolutionRelu( ...@@ -129,15 +65,12 @@ op::QuantizedConvolutionRelu::QuantizedConvolutionRelu(
0, /* batch_axis_result, */ 0, /* batch_axis_result, */
1 /* output_channel_axis_result, */ 1 /* output_channel_axis_result, */
)); ));
set_output_type(1, element::f32, Shape{1});
set_output_type(2, element::f32, Shape{1});
} }
std::shared_ptr<Node> std::shared_ptr<Node>
op::QuantizedConvolutionRelu::copy_with_new_args(const NodeVector& new_args) const op::QuantizedConvolutionRelu::copy_with_new_args(const NodeVector& new_args) const
{ {
if (new_args.size() != 8) if (new_args.size() != 3)
{ {
throw ngraph_error("Incorrect number of new arguments"); throw ngraph_error("Incorrect number of new arguments");
} }
...@@ -149,10 +82,5 @@ std::shared_ptr<Node> ...@@ -149,10 +82,5 @@ std::shared_ptr<Node>
get_padding_below(), get_padding_below(),
get_padding_above(), get_padding_above(),
get_data_dilation_strides(), get_data_dilation_strides(),
new_args.at(2), new_args.at(2)));
new_args.at(3),
new_args.at(4),
new_args.at(5),
new_args.at(6),
new_args.at(7)));
} }
...@@ -16,9 +16,8 @@ ...@@ -16,9 +16,8 @@
#pragma once #pragma once
#include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/op.hpp" #include "ngraph/op/op.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
namespace ngraph namespace ngraph
{ {
...@@ -37,24 +36,14 @@ namespace ngraph ...@@ -37,24 +36,14 @@ namespace ngraph
const CoordinateDiff& padding_below, const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above, const CoordinateDiff& padding_above,
const Strides& data_dilation_strides, const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input, const std::shared_ptr<Node> scale);
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output);
const Strides& get_window_movement_strides() const { return m_window_movement_strides; } const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
const Strides& get_window_dilation_strides() const { return m_window_dilation_strides; } const Strides& get_window_dilation_strides() const { return m_window_dilation_strides; }
const CoordinateDiff& get_padding_below() const { return m_padding_below; } const CoordinateDiff& get_padding_below() const { return m_padding_below; }
const CoordinateDiff& get_padding_above() const { return m_padding_above; } const CoordinateDiff& get_padding_above() const { return m_padding_above; }
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; } const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
float get_input_min() const { return m_input_min; } float get_scale() const { return m_scale; }
float get_input_max() const { return m_input_max; }
float get_filter_min() const { return m_filter_min; }
float get_filter_max() const { return m_filter_max; }
float get_freezed_output_min() const { return m_freezed_output_min; }
float get_freezed_output_max() const { return m_freezed_output_max; }
std::shared_ptr<Node> get_filters() { return get_argument(1); } std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); } std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
virtual std::shared_ptr<Node> virtual std::shared_ptr<Node>
...@@ -66,12 +55,7 @@ namespace ngraph ...@@ -66,12 +55,7 @@ namespace ngraph
CoordinateDiff m_padding_below; CoordinateDiff m_padding_below;
CoordinateDiff m_padding_above; CoordinateDiff m_padding_above;
Strides m_data_dilation_strides; Strides m_data_dilation_strides;
float m_input_min; float m_scale;
float m_input_max;
float m_filter_min;
float m_filter_max;
float m_freezed_output_min;
float m_freezed_output_max;
}; };
} }
} }
...@@ -26,10 +26,8 @@ op::QuantizedMaxPool::QuantizedMaxPool(const shared_ptr<Node>& arg, ...@@ -26,10 +26,8 @@ op::QuantizedMaxPool::QuantizedMaxPool(const shared_ptr<Node>& arg,
const Shape& window_shape, const Shape& window_shape,
const Strides& window_movement_strides, const Strides& window_movement_strides,
const Shape& padding_below, const Shape& padding_below,
const Shape& padding_above, const Shape& padding_above)
const shared_ptr<Node> min, : Op("QuantizedMaxPool", check_single_output_args({arg}))
const shared_ptr<Node> max)
: Op("QuantizedMaxPool", check_single_output_args({arg, min, max}))
, m_window_shape(window_shape) , m_window_shape(window_shape)
, m_window_movement_strides(window_movement_strides) , m_window_movement_strides(window_movement_strides)
, m_padding_below(padding_below) , m_padding_below(padding_below)
...@@ -39,18 +37,7 @@ op::QuantizedMaxPool::QuantizedMaxPool(const shared_ptr<Node>& arg, ...@@ -39,18 +37,7 @@ op::QuantizedMaxPool::QuantizedMaxPool(const shared_ptr<Node>& arg,
if (arg->get_element_type() != element::u8 && arg->get_element_type() != element::i8) if (arg->get_element_type() != element::u8 && arg->get_element_type() != element::i8)
{ {
throw ngraph_error("Dequantization supported only for i8/u8!"); throw ngraph_error("QuantizedMaxPool supported only for i8/u8!");
}
if (min->get_element_type() != max->get_element_type())
{
throw ngraph_error("Min's element type isn't equal to max's!");
}
if (!(std::dynamic_pointer_cast<op::Constant>(min) &&
std::dynamic_pointer_cast<op::Constant>(max)))
{
throw ngraph_error("Min and max have to be constants!");
} }
} }
...@@ -171,11 +158,7 @@ void op::QuantizedMaxPool::validate_and_infer_types() ...@@ -171,11 +158,7 @@ void op::QuantizedMaxPool::validate_and_infer_types()
result_shape[1] = channel_count; result_shape[1] = channel_count;
copy(output_item_shape.begin(), output_item_shape.end(), result_shape.begin() + 2); copy(output_item_shape.begin(), output_item_shape.end(), result_shape.begin() + 2);
set_output_size(3);
set_output_type(0, get_input_element_type(0), result_shape); set_output_type(0, get_input_element_type(0), result_shape);
//TODO(nbpatel): Change to Shape{} once the mkldnn version is updated.
set_output_type(1, element::f32, Shape{1});
set_output_type(2, element::f32, Shape{1});
} }
shared_ptr<Node> op::QuantizedMaxPool::copy_with_new_args(const NodeVector& new_args) const shared_ptr<Node> op::QuantizedMaxPool::copy_with_new_args(const NodeVector& new_args) const
...@@ -185,7 +168,5 @@ shared_ptr<Node> op::QuantizedMaxPool::copy_with_new_args(const NodeVector& new_ ...@@ -185,7 +168,5 @@ shared_ptr<Node> op::QuantizedMaxPool::copy_with_new_args(const NodeVector& new_
m_window_shape, m_window_shape,
m_window_movement_strides, m_window_movement_strides,
m_padding_below, m_padding_below,
m_padding_above, m_padding_above);
new_args.at(1),
new_args.at(2));
} }
...@@ -37,9 +37,7 @@ namespace ngraph ...@@ -37,9 +37,7 @@ namespace ngraph
const Shape& window_shape, const Shape& window_shape,
const Strides& window_movement_strides, const Strides& window_movement_strides,
const Shape& padding_below, const Shape& padding_below,
const Shape& padding_above, const Shape& padding_above);
const std::shared_ptr<Node> min,
const std::shared_ptr<Node> max);
void validate_and_infer_types() override; void validate_and_infer_types() override;
virtual std::shared_ptr<Node> virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override; copy_with_new_args(const NodeVector& new_args) const override;
......
...@@ -40,8 +40,6 @@ set(SRC ...@@ -40,8 +40,6 @@ set(SRC
builder/convert_layout.cpp builder/convert_layout.cpp
builder/quantized_conv.cpp builder/quantized_conv.cpp
builder/convolution.cpp builder/convolution.cpp
builder/dequantize.cpp
builder/quantize.cpp
builder/dot.cpp builder/dot.cpp
builder/function_call.cpp builder/function_call.cpp
builder/lstm.cpp builder/lstm.cpp
...@@ -85,18 +83,11 @@ set(SRC ...@@ -85,18 +83,11 @@ set(SRC
op/group_conv.cpp op/group_conv.cpp
op/conv_bias.cpp op/conv_bias.cpp
op/conv_relu.cpp op/conv_relu.cpp
op/quantized_conv.cpp
op/convert_layout.cpp op/convert_layout.cpp
op/dequantize.cpp
op/quantize.cpp
op/loop_kernel.cpp op/loop_kernel.cpp
op/lstm.cpp op/lstm.cpp
op/matmul_bias.cpp op/matmul_bias.cpp
op/max_pool_with_indices.cpp op/max_pool_with_indices.cpp
op/quantized_max_pool.cpp
op/quantized_avg_pool.cpp
op/quantized_conv_relu.cpp
op/quantized_conv_bias.cpp
op/rnn.cpp op/rnn.cpp
op/sigmoid_mul.cpp op/sigmoid_mul.cpp
op/conv_add.cpp op/conv_add.cpp
......
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/cpu/op/dequantize.hpp"
#include <vector>
#include "ngraph/op/constant.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::DequantizeCPU)
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& functors = external_function->get_functors();
auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t dequantize_index =
mkldnn_emitter->build_dequantization(node, input_desc, result_desc);
auto& deps = mkldnn_emitter->get_primitive_deps(dequantize_index);
auto functor = [&, dequantize_index](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, dequantize_index);
};
functors.emplace_back(functor);
}
else
{
throw ngraph_error("unsupported parameters for DequantizeCPUOp via DEX");
}
}
REGISTER_OP_BUILDER(DequantizeCPU);
}
}
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <vector>
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/quantization_util.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::QuantizeCPU)
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto quantize = static_cast<const ngraph::op::QuantizeCPU*>(node);
auto& functors = external_function->get_functors();
auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& out1_tensor = external_function->get_tensor_data(out[1].get_name());
auto& out2_tensor = external_function->get_tensor_data(out[2].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
vector<float> quant_util; // min_range, max_range & scale.
quantization_util::get_min_max_range(quantize->get_input_min(),
quantize->get_input_max(),
(quantize->get_quantize_et()).is_signed(),
quant_util);
std::vector<float> scales;
scales.push_back(quant_util[2]);
size_t quantize_index =
mkldnn_emitter->build_quantize_reorder(input_desc, result_desc, scales);
auto& deps = mkldnn_emitter->get_primitive_deps(quantize_index);
auto functor = [&, quantize_index, quant_util](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
*(static_cast<float*>(out1_tensor)) = quant_util[0];
*(static_cast<float*>(out2_tensor)) = quant_util[1];
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, quantize_index);
};
functors.emplace_back(functor);
}
else
{
throw ngraph_error("Unsupported parameters for QuantizeCPUOp via DEX");
}
}
REGISTER_OP_BUILDER(QuantizeCPU);
}
}
}
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
// limitations under the License. // limitations under the License.
//***************************************************************************** //*****************************************************************************
#include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp" #include "ngraph/op/experimental/quantized_avg_pool.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp" #include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp" #include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
...@@ -37,20 +37,15 @@ namespace ngraph ...@@ -37,20 +37,15 @@ namespace ngraph
auto& functors = external_function->get_functors(); auto& functors = external_function->get_functors();
auto& arg_tensor = external_function->get_tensor_data(args[0].get_name()); auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name()); auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& out1_tensor = external_function->get_tensor_data(out[1].get_name());
auto& out2_tensor = external_function->get_tensor_data(out[2].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
vector<float> quant_util; size_t qavg_pool_index = mkldnn_emitter->build_quantized_avg_pool(node);
mkldnn_emitter->build_quantized_avg_pool(node, quant_util); auto& deps = mkldnn_emitter->get_primitive_deps(qavg_pool_index);
auto& deps = mkldnn_emitter->get_primitive_deps(quant_util[2]);
auto functor = [&, quant_util](CPURuntimeContext* ctx) { auto functor = [&, qavg_pool_index](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
*(static_cast<float*>(out1_tensor)) = quant_util[0]; cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, qavg_pool_index);
*(static_cast<float*>(out2_tensor)) = quant_util[1];
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, quant_util[2]);
}; };
functors.emplace_back(functor); functors.emplace_back(functor);
} }
......
...@@ -14,13 +14,13 @@ ...@@ -14,13 +14,13 @@
// limitations under the License. // limitations under the License.
//***************************************************************************** //*****************************************************************************
#include "ngraph/runtime/cpu/op/quantized_conv.hpp" #include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp" #include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp" #include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp" #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_bias.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_relu.hpp"
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
...@@ -36,13 +36,10 @@ namespace ngraph ...@@ -36,13 +36,10 @@ namespace ngraph
{ {
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto qconvolution = static_cast<const ngraph::op::QuantizedConvolution*>(node);
auto& functors = external_function->get_functors(); auto& functors = external_function->get_functors();
auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name()); auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name()); auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto& out0_tensor = external_function->get_tensor_data(out[0].get_name()); auto& out0_tensor = external_function->get_tensor_data(out[0].get_name());
auto& out1_tensor = external_function->get_tensor_data(out[1].get_name());
auto& out2_tensor = external_function->get_tensor_data(out[2].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
...@@ -50,16 +47,11 @@ namespace ngraph ...@@ -50,16 +47,11 @@ namespace ngraph
mkldnn_emitter->build_convolution<ngraph::op::QuantizedConvolution>( mkldnn_emitter->build_convolution<ngraph::op::QuantizedConvolution>(
node, args, out); node, args, out);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
float min_freezed_output = qconvolution->get_freezed_output_min();
float max_freezed_output = qconvolution->get_freezed_output_max();
auto functor = [&, conv_index, min_freezed_output, max_freezed_output]( auto functor = [&, conv_index](CPURuntimeContext* ctx) {
CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out0_tensor);
*(static_cast<float*>(out1_tensor)) = min_freezed_output;
*(static_cast<float*>(out2_tensor)) = max_freezed_output;
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, conv_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, conv_index);
}; };
functors.emplace_back(functor); functors.emplace_back(functor);
...@@ -75,14 +67,10 @@ namespace ngraph ...@@ -75,14 +67,10 @@ namespace ngraph
{ {
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto qconvolution_relu =
static_cast<const ngraph::op::QuantizedConvolutionRelu*>(node);
auto& functors = external_function->get_functors(); auto& functors = external_function->get_functors();
auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name()); auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name()); auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto& out0_tensor = external_function->get_tensor_data(out[0].get_name()); auto& out0_tensor = external_function->get_tensor_data(out[0].get_name());
auto& out1_tensor = external_function->get_tensor_data(out[1].get_name());
auto& out2_tensor = external_function->get_tensor_data(out[2].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
...@@ -90,16 +78,11 @@ namespace ngraph ...@@ -90,16 +78,11 @@ namespace ngraph
mkldnn_emitter->build_convolution<ngraph::op::QuantizedConvolutionRelu>( mkldnn_emitter->build_convolution<ngraph::op::QuantizedConvolutionRelu>(
node, args, out); node, args, out);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
float min_freezed_output = qconvolution_relu->get_freezed_output_min();
float max_freezed_output = qconvolution_relu->get_freezed_output_max();
auto functor = [&, conv_index, min_freezed_output, max_freezed_output]( auto functor = [&, conv_index](CPURuntimeContext* ctx) {
CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out0_tensor);
*(static_cast<float*>(out1_tensor)) = min_freezed_output;
*(static_cast<float*>(out2_tensor)) = max_freezed_output;
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, conv_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, conv_index);
}; };
functors.emplace_back(functor); functors.emplace_back(functor);
...@@ -116,15 +99,11 @@ namespace ngraph ...@@ -116,15 +99,11 @@ namespace ngraph
{ {
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto qconvolution_bias =
static_cast<const ngraph::op::QuantizedConvolutionBias*>(node);
auto& functors = external_function->get_functors(); auto& functors = external_function->get_functors();
auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name()); auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name()); auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto& arg2_tensor = external_function->get_tensor_data(args[2].get_name()); auto& arg2_tensor = external_function->get_tensor_data(args[2].get_name());
auto& out0_tensor = external_function->get_tensor_data(out[0].get_name()); auto& out0_tensor = external_function->get_tensor_data(out[0].get_name());
auto& out1_tensor = external_function->get_tensor_data(out[1].get_name());
auto& out2_tensor = external_function->get_tensor_data(out[2].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
...@@ -132,17 +111,12 @@ namespace ngraph ...@@ -132,17 +111,12 @@ namespace ngraph
mkldnn_emitter->build_convolution<ngraph::op::QuantizedConvolutionBias>( mkldnn_emitter->build_convolution<ngraph::op::QuantizedConvolutionBias>(
node, args, out); node, args, out);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
float min_freezed_output = qconvolution_bias->get_freezed_output_min();
float max_freezed_output = qconvolution_bias->get_freezed_output_max();
auto functor = [&, conv_index, min_freezed_output, max_freezed_output]( auto functor = [&, conv_index](CPURuntimeContext* ctx) {
CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], arg2_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], arg2_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[3], out0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[3], out0_tensor);
*(static_cast<float*>(out1_tensor)) = min_freezed_output;
*(static_cast<float*>(out2_tensor)) = max_freezed_output;
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, conv_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, conv_index);
}; };
functors.emplace_back(functor); functors.emplace_back(functor);
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
// limitations under the License. // limitations under the License.
//***************************************************************************** //*****************************************************************************
#include "ngraph/runtime/cpu/op/quantized_max_pool.hpp" #include "ngraph/op/experimental/quantized_max_pool.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp" #include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp" #include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
...@@ -37,21 +37,16 @@ namespace ngraph ...@@ -37,21 +37,16 @@ namespace ngraph
auto& functors = external_function->get_functors(); auto& functors = external_function->get_functors();
auto& arg_tensor = external_function->get_tensor_data(args[0].get_name()); auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name()); auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& out1_tensor = external_function->get_tensor_data(out[1].get_name());
auto& out2_tensor = external_function->get_tensor_data(out[2].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
vector<float> quant_util; size_t qmax_pool_index = mkldnn_emitter->build_quantized_max_pool(node);
mkldnn_emitter->build_quantized_max_pool(node, quant_util); auto& deps = mkldnn_emitter->get_primitive_deps(qmax_pool_index);
auto& deps = mkldnn_emitter->get_primitive_deps(quant_util[2]);
auto functor = [&, quant_util](CPURuntimeContext* ctx) { auto functor = [&, qmax_pool_index](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
*(static_cast<float*>(out1_tensor)) = quant_util[0]; cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, qmax_pool_index);
*(static_cast<float*>(out2_tensor)) = quant_util[1];
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, quant_util[2]);
}; };
functors.emplace_back(functor); functors.emplace_back(functor);
} }
......
This diff is collapsed.
...@@ -70,6 +70,11 @@ ...@@ -70,6 +70,11 @@
#include "ngraph/op/dot.hpp" #include "ngraph/op/dot.hpp"
#include "ngraph/op/equal.hpp" #include "ngraph/op/equal.hpp"
#include "ngraph/op/exp.hpp" #include "ngraph/op/exp.hpp"
#include "ngraph/op/experimental/quantized_avg_pool.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/op/experimental/quantized_max_pool.hpp"
#include "ngraph/op/floor.hpp" #include "ngraph/op/floor.hpp"
#include "ngraph/op/function_call.hpp" #include "ngraph/op/function_call.hpp"
#include "ngraph/op/get_output_element.hpp" #include "ngraph/op/get_output_element.hpp"
...@@ -146,18 +151,11 @@ ...@@ -146,18 +151,11 @@
#include "ngraph/runtime/cpu/op/conv_bias.hpp" #include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp" #include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp" #include "ngraph/runtime/cpu/op/convert_layout.hpp"
#include "ngraph/runtime/cpu/op/dequantize.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp" #include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/loop_kernel.hpp" #include "ngraph/runtime/cpu/op/loop_kernel.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp" #include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/matmul_bias.hpp" #include "ngraph/runtime/cpu/op/matmul_bias.hpp"
#include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp" #include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
#include "ngraph/runtime/cpu/op/quantize.hpp"
#include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_bias.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_relu.hpp"
#include "ngraph/runtime/cpu/op/quantized_max_pool.hpp"
#include "ngraph/runtime/cpu/op/rnn.hpp" #include "ngraph/runtime/cpu/op/rnn.hpp"
#include "ngraph/runtime/cpu/op/sigmoid.hpp" #include "ngraph/runtime/cpu/op/sigmoid.hpp"
#include "ngraph/runtime/cpu/op/sigmoid_mul.hpp" #include "ngraph/runtime/cpu/op/sigmoid_mul.hpp"
...@@ -304,8 +302,6 @@ static const runtime::cpu::OpMap dispatcher{ ...@@ -304,8 +302,6 @@ static const runtime::cpu::OpMap dispatcher{
{TI(ngraph::op::Ceiling), &runtime::cpu::CPU_Emitter::emit<op::Ceiling>}, {TI(ngraph::op::Ceiling), &runtime::cpu::CPU_Emitter::emit<op::Ceiling>},
{TI(ngraph::op::Sqrt), &runtime::cpu::CPU_Emitter::emit<op::Sqrt>}, {TI(ngraph::op::Sqrt), &runtime::cpu::CPU_Emitter::emit<op::Sqrt>},
{TI(ngraph::op::Convolution), &runtime::cpu::CPU_Emitter::emit<op::Convolution>}, {TI(ngraph::op::Convolution), &runtime::cpu::CPU_Emitter::emit<op::Convolution>},
{TI(ngraph::op::QuantizeCPU), &runtime::cpu::CPU_Emitter::emit<op::QuantizeCPU>},
{TI(ngraph::op::DequantizeCPU), &runtime::cpu::CPU_Emitter::emit<op::DequantizeCPU>},
{TI(ngraph::op::ConvolutionBackpropFilters), {TI(ngraph::op::ConvolutionBackpropFilters),
&runtime::cpu::CPU_Emitter::emit<op::ConvolutionBackpropFilters>}, &runtime::cpu::CPU_Emitter::emit<op::ConvolutionBackpropFilters>},
{TI(ngraph::op::ConvolutionBackpropData), {TI(ngraph::op::ConvolutionBackpropData),
......
...@@ -20,13 +20,12 @@ ...@@ -20,13 +20,12 @@
#include "mkldnn_emitter.hpp" #include "mkldnn_emitter.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "ngraph/op/experimental/quantized_avg_pool.hpp"
#include "ngraph/op/experimental/quantized_max_pool.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp" #include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp" #include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp" #include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp" #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/dequantize.hpp"
#include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp"
#include "ngraph/runtime/cpu/op/quantized_max_pool.hpp"
#include "ngraph/type/element_type.hpp" #include "ngraph/type/element_type.hpp"
using namespace ngraph::runtime::cpu; using namespace ngraph::runtime::cpu;
...@@ -123,31 +122,7 @@ size_t MKLDNNEmitter::build_memory_primitive(const mkldnn::memory::desc& desc) ...@@ -123,31 +122,7 @@ size_t MKLDNNEmitter::build_memory_primitive(const mkldnn::memory::desc& desc)
return index; return index;
} }
size_t MKLDNNEmitter::build_dequantization(const ngraph::Node* node, size_t MKLDNNEmitter::build_quantized_max_pool(const ngraph::Node* node)
const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc)
{
auto dequantize = static_cast<const ngraph::op::DequantizeCPU*>(node);
auto min_const_op = std::static_pointer_cast<ngraph::op::Constant>(dequantize->get_argument(1));
auto max_const_op = std::static_pointer_cast<ngraph::op::Constant>(dequantize->get_argument(2));
float min_range = *(static_cast<float const*>(min_const_op->get_data_ptr()));
float max_range = *(static_cast<float const*>(max_const_op->get_data_ptr()));
const float max_abs = std::max(std::abs(min_range), std::abs(max_range));
bool is_signed = (dequantize->get_dequantize_et()).is_signed();
const float target_range =
static_cast<float>((is_signed ? std::pow(2, 7) : std::pow(2, 8)) - 1);
const float scale_factor = max_abs / target_range;
std::vector<float> scales;
scales.push_back(scale_factor);
size_t dequantize_index = 0;
dequantize_index = this->build_quantize_reorder(input_desc, result_desc, scales);
return dequantize_index;
}
void MKLDNNEmitter::build_quantized_max_pool(const ngraph::Node* node,
std::vector<float>& quant_util)
{ {
auto qmax_pool = static_cast<const ngraph::op::QuantizedMaxPool*>(node); auto qmax_pool = static_cast<const ngraph::op::QuantizedMaxPool*>(node);
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
...@@ -159,17 +134,10 @@ void MKLDNNEmitter::build_quantized_max_pool(const ngraph::Node* node, ...@@ -159,17 +134,10 @@ void MKLDNNEmitter::build_quantized_max_pool(const ngraph::Node* node,
qmax_pool->get_window_shape(), qmax_pool->get_window_shape(),
qmax_pool->get_padding_below(), qmax_pool->get_padding_below(),
qmax_pool->get_padding_above()); qmax_pool->get_padding_above());
auto min_const_op = std::static_pointer_cast<ngraph::op::Constant>(qmax_pool->get_argument(1)); return qmax_pool_index;
auto max_const_op = std::static_pointer_cast<ngraph::op::Constant>(qmax_pool->get_argument(2));
float min = *(static_cast<float const*>(min_const_op->get_data_ptr()));
float max = *(static_cast<float const*>(max_const_op->get_data_ptr()));
quant_util.push_back(min);
quant_util.push_back(max);
quant_util.push_back(qmax_pool_index);
} }
void MKLDNNEmitter::build_quantized_avg_pool(const ngraph::Node* node, size_t MKLDNNEmitter::build_quantized_avg_pool(const ngraph::Node* node)
std::vector<float>& quant_util)
{ {
auto qavg_pool = static_cast<const ngraph::op::QuantizedAvgPool*>(node); auto qavg_pool = static_cast<const ngraph::op::QuantizedAvgPool*>(node);
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0); auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
...@@ -184,13 +152,7 @@ void MKLDNNEmitter::build_quantized_avg_pool(const ngraph::Node* node, ...@@ -184,13 +152,7 @@ void MKLDNNEmitter::build_quantized_avg_pool(const ngraph::Node* node,
qavg_pool->get_window_shape(), qavg_pool->get_window_shape(),
qavg_pool->get_padding_below(), qavg_pool->get_padding_below(),
qavg_pool->get_padding_above()); qavg_pool->get_padding_above());
auto min_const_op = std::static_pointer_cast<ngraph::op::Constant>(qavg_pool->get_argument(1)); return qavg_pool_index;
auto max_const_op = std::static_pointer_cast<ngraph::op::Constant>(qavg_pool->get_argument(2));
float min = *(static_cast<float const*>(min_const_op->get_data_ptr()));
float max = *(static_cast<float const*>(max_const_op->get_data_ptr()));
quant_util.push_back(min);
quant_util.push_back(max);
quant_util.push_back(qavg_pool_index);
} }
mkldnn::memory::format MKLDNNEmitter::query_convolution_forward_weight_format( mkldnn::memory::format MKLDNNEmitter::query_convolution_forward_weight_format(
...@@ -787,28 +749,6 @@ size_t MKLDNNEmitter::build_reorder(const mkldnn::memory::desc& input_desc, ...@@ -787,28 +749,6 @@ size_t MKLDNNEmitter::build_reorder(const mkldnn::memory::desc& input_desc,
return primitive_index; return primitive_index;
} }
size_t MKLDNNEmitter::build_quantize_reorder(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc,
const std::vector<float>& scales)
{
size_t input_index = build_memory_primitive(input_desc);
size_t result_index = build_memory_primitive(result_desc);
mkldnn::primitive_attr attr;
attr.set_output_scales(0, scales);
attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
auto reorder_desc =
mkldnn::reorder::primitive_desc({input_desc, mkldnn_utils::global_cpu_engine},
{result_desc, mkldnn_utils::global_cpu_engine},
attr);
size_t primitive_index = insert_primitive(new mkldnn::reorder(
reorder_desc, *m_mkldnn_primitives[input_index], *m_mkldnn_primitives[result_index]));
m_primitive_deps[primitive_index] = {input_index, result_index};
return primitive_index;
}
size_t MKLDNNEmitter::build_lrn_forward(const mkldnn::memory::desc& input_desc, size_t MKLDNNEmitter::build_lrn_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
float alpha, float alpha,
......
...@@ -26,16 +26,15 @@ ...@@ -26,16 +26,15 @@
#include "ngraph/coordinate_diff.hpp" #include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/op/convolution.hpp" #include "ngraph/op/convolution.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp" #include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp" #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/bounded_relu.hpp" #include "ngraph/runtime/cpu/op/bounded_relu.hpp"
#include "ngraph/runtime/cpu/op/conv_add.hpp" #include "ngraph/runtime/cpu/op/conv_add.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp" #include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp" #include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_bias.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_relu.hpp"
#include "ngraph/runtime/cpu/quantization_util.hpp"
#include "ngraph/shape.hpp" #include "ngraph/shape.hpp"
#include "ngraph/strides.hpp" #include "ngraph/strides.hpp"
#include "ngraph/type/element_type.hpp" #include "ngraph/type/element_type.hpp"
...@@ -227,8 +226,6 @@ namespace ngraph ...@@ -227,8 +226,6 @@ namespace ngraph
} }
else if (std::is_same<OP, ngraph::op::QuantizedConvolution>()) else if (std::is_same<OP, ngraph::op::QuantizedConvolution>())
{ {
const float scale =
quantization_util::get_scale<ngraph::op::QuantizedConvolution>(node);
return build_quantized_convolution( return build_quantized_convolution(
data_desc, data_desc,
weights_desc, weights_desc,
...@@ -237,14 +234,12 @@ namespace ngraph ...@@ -237,14 +234,12 @@ namespace ngraph
window_dilation_strides_adjusted, window_dilation_strides_adjusted,
convolution->get_padding_below(), convolution->get_padding_below(),
convolution->get_padding_above(), convolution->get_padding_above(),
scale, (dynamic_cast<const ngraph::op::QuantizedConvolution*>(node))
->get_scale(),
ops); ops);
} }
else if (std::is_same<OP, ngraph::op::QuantizedConvolutionRelu>()) else if (std::is_same<OP, ngraph::op::QuantizedConvolutionRelu>())
{ {
const float scale =
quantization_util::get_scale<ngraph::op::QuantizedConvolutionRelu>(
node);
return build_quantized_convolution( return build_quantized_convolution(
data_desc, data_desc,
weights_desc, weights_desc,
...@@ -253,15 +248,13 @@ namespace ngraph ...@@ -253,15 +248,13 @@ namespace ngraph
window_dilation_strides_adjusted, window_dilation_strides_adjusted,
convolution->get_padding_below(), convolution->get_padding_below(),
convolution->get_padding_above(), convolution->get_padding_above(),
scale, (dynamic_cast<const ngraph::op::QuantizedConvolutionRelu*>(node))
->get_scale(),
ops); ops);
} }
else if (std::is_same<OP, ngraph::op::QuantizedConvolutionBias>()) else if (std::is_same<OP, ngraph::op::QuantizedConvolutionBias>())
{ {
// conv+bias = cvt_to_int8(scale*(dst + bias)) // conv+bias = cvt_to_int8(scale*(dst + bias))
const float scale =
quantization_util::get_scale<ngraph::op::QuantizedConvolutionBias>(
node);
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2); auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
return build_quantized_convolution( return build_quantized_convolution(
data_desc, data_desc,
...@@ -272,7 +265,8 @@ namespace ngraph ...@@ -272,7 +265,8 @@ namespace ngraph
window_dilation_strides_adjusted, window_dilation_strides_adjusted,
convolution->get_padding_below(), convolution->get_padding_below(),
convolution->get_padding_above(), convolution->get_padding_above(),
scale, (dynamic_cast<const ngraph::op::QuantizedConvolutionBias*>(node))
->get_scale(),
ops); ops);
} }
else else
...@@ -595,19 +589,9 @@ namespace ngraph ...@@ -595,19 +589,9 @@ namespace ngraph
const mkldnn::memory::desc& result_desc, const mkldnn::memory::desc& result_desc,
float alpha); float alpha);
size_t build_quantize_reorder(const mkldnn::memory::desc& input_desc, size_t build_quantized_max_pool(const ngraph::Node* node);
const mkldnn::memory::desc& result_desc,
const std::vector<float>& scales);
size_t build_dequantization(const ngraph::Node* node, size_t build_quantized_avg_pool(const ngraph::Node* node);
const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc);
void build_quantized_max_pool(const ngraph::Node* node,
std::vector<float>& quant_util);
void build_quantized_avg_pool(const ngraph::Node* node,
std::vector<float>& quant_util);
private: private:
std::vector<mkldnn::primitive*> m_mkldnn_primitives; std::vector<mkldnn::primitive*> m_mkldnn_primitives;
......
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/cpu/op/dequantize.hpp"
#include "ngraph/op/constant.hpp"
ngraph::op::DequantizeCPU::DequantizeCPU(std::shared_ptr<Node> input,
std::shared_ptr<Node> min,
std::shared_ptr<Node> max,
const element::Type& type)
: Op("DequantizeCPU", check_single_output_args({input, min, max}))
, m_element_type(type)
{
constructor_validate_and_infer_types();
if (input->get_element_type() != element::u8 && input->get_element_type() != element::i8)
{
throw ngraph_error("Dequantization supported only for i8/u8!");
}
if (min->get_element_type() != min->get_element_type())
{
throw ngraph_error("Min's element type isn't equal to max's!");
}
if (min->get_shape().size() != 0)
{
throw ngraph_error("Min is not a scalar!");
}
if (max->get_shape().size() != 0)
{
throw ngraph_error("Max is not a scalar!");
}
if (!(std::dynamic_pointer_cast<op::Constant>(min) &&
std::dynamic_pointer_cast<op::Constant>(max)))
{
throw ngraph_error("Min and max have to be constants!");
}
set_output_type(0, element::f32, input->get_shape());
}
std::shared_ptr<ngraph::Node>
ngraph::op::DequantizeCPU::copy_with_new_args(const NodeVector& new_args) const
{
if (new_args.size() != 3)
{
throw ngraph_error("Incorrect number of new arguments");
}
return std::make_shared<DequantizeCPU>(
new_args.at(0), new_args.at(1), new_args.at(2), m_element_type);
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/node.hpp"
#include "ngraph/node_vector.hpp"
#include "ngraph/op/op.hpp"
namespace ngraph
{
namespace op
{
class DequantizeCPU : public Op
{
public:
DequantizeCPU(std::shared_ptr<Node> input,
std::shared_ptr<Node> min,
std::shared_ptr<Node> max,
const element::Type& type);
const element::Type& get_dequantize_et() const { return m_element_type; }
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
private:
const element::Type m_element_type;
};
}
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/cpu/op/quantize.hpp"
#include "ngraph/op/constant.hpp"
ngraph::op::QuantizeCPU::QuantizeCPU(std::shared_ptr<Node> input,
std::shared_ptr<Node> min,
std::shared_ptr<Node> max,
const element::Type& type)
: Op("QuantizeCPU", check_single_output_args({input, min, max}))
, m_element_type(type)
{
constructor_validate_and_infer_types();
if (input->get_element_type() != element::f32)
{
throw ngraph_error("Quantization supported only from float32 --> i8/u8!");
}
if (min->get_element_type() != min->get_element_type())
{
throw ngraph_error("Min's element type isn't equal to max's!");
}
if (min->get_shape().size() != 0)
{
throw ngraph_error("Min is not a scalar!");
}
if (max->get_shape().size() != 0)
{
throw ngraph_error("Max is not a scalar!");
}
if (!(std::dynamic_pointer_cast<op::Constant>(min) &&
std::dynamic_pointer_cast<op::Constant>(max)))
{
throw ngraph_error("Min and max have to be constants!");
}
auto min_const_op = std::static_pointer_cast<ngraph::op::Constant>(min);
auto max_const_op = std::static_pointer_cast<ngraph::op::Constant>(max);
float input_min_range = *(static_cast<float const*>(min_const_op->get_data_ptr()));
float input_max_range = *(static_cast<float const*>(max_const_op->get_data_ptr()));
this->m_input_min = input_min_range;
this->m_input_max = input_max_range;
set_output_size(3);
set_output_type(0, type, input->get_shape());
set_output_type(1, element::f32, Shape{});
set_output_type(2, element::f32, Shape{});
}
std::shared_ptr<ngraph::Node>
ngraph::op::QuantizeCPU::copy_with_new_args(const NodeVector& new_args) const
{
if (new_args.size() != 3)
{
throw ngraph_error("Incorrect number of new arguments");
}
return std::make_shared<QuantizeCPU>(
new_args.at(0), new_args.at(1), new_args.at(2), m_element_type);
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/node.hpp"
#include "ngraph/node_vector.hpp"
#include "ngraph/op/op.hpp"
namespace ngraph
{
namespace op
{
class QuantizeCPU : public Op
{
public:
QuantizeCPU(std::shared_ptr<Node> input,
std::shared_ptr<Node> min,
std::shared_ptr<Node> max,
const element::Type& type);
const element::Type& get_quantize_et() const { return m_element_type; }
float get_input_min() const { return m_input_min; }
float get_input_max() const { return m_input_max; }
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
private:
const element::Type m_element_type;
float m_input_min;
float m_input_max;
};
}
}
...@@ -30,6 +30,11 @@ ...@@ -30,6 +30,11 @@
#include "ngraph/op/batch_norm.hpp" #include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/concat.hpp" #include "ngraph/op/concat.hpp"
#include "ngraph/op/convolution.hpp" #include "ngraph/op/convolution.hpp"
#include "ngraph/op/experimental/quantized_avg_pool.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/op/experimental/quantized_max_pool.hpp"
#include "ngraph/op/lrn.hpp" #include "ngraph/op/lrn.hpp"
#include "ngraph/op/max_pool.hpp" #include "ngraph/op/max_pool.hpp"
#include "ngraph/op/relu.hpp" #include "ngraph/op/relu.hpp"
...@@ -43,16 +48,9 @@ ...@@ -43,16 +48,9 @@
#include "ngraph/runtime/cpu/op/conv_add.hpp" #include "ngraph/runtime/cpu/op/conv_add.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp" #include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp" #include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/dequantize.hpp"
#include "ngraph/runtime/cpu/op/group_conv.hpp" #include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp" #include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp" #include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
#include "ngraph/runtime/cpu/op/quantize.hpp"
#include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_bias.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_relu.hpp"
#include "ngraph/runtime/cpu/op/quantized_max_pool.hpp"
#include "ngraph/runtime/cpu/op/rnn.hpp" #include "ngraph/runtime/cpu/op/rnn.hpp"
#include "ngraph/runtime/cpu/op/sigmoid.hpp" #include "ngraph/runtime/cpu/op/sigmoid.hpp"
...@@ -733,27 +731,14 @@ namespace ngraph ...@@ -733,27 +731,14 @@ namespace ngraph
} }
} }
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::DequantizeCPU)
{
if (node->get_input_element_type(0) == element::u8 ||
node->get_input_element_type(0) == element::i8)
{
auto dequantize = static_cast<op::DequantizeCPU*>(node);
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
dequantize->set_op_annotations(op_annotations);
}
}
template <> template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::QuantizedConvolution) void CPUAssignment::ASSIGN_DECL(ngraph::op::QuantizedConvolution)
{ {
auto quantized_conv = static_cast<op::QuantizedConvolution*>(node);
if (node->get_input_element_type(0) == element::u8 && if (node->get_input_element_type(0) == element::u8 &&
node->get_input_element_type(1) == element::i8) node->get_input_element_type(1) == element::i8)
{ {
auto quantized_conv = static_cast<op::QuantizedConvolution*>(node);
auto op_annotations = auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>(); std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true); op_annotations->set_mkldnn_op(true);
...@@ -764,10 +749,11 @@ namespace ngraph ...@@ -764,10 +749,11 @@ namespace ngraph
template <> template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::QuantizedConvolutionRelu) void CPUAssignment::ASSIGN_DECL(ngraph::op::QuantizedConvolutionRelu)
{ {
auto quantized_conv_relu = static_cast<op::QuantizedConvolutionRelu*>(node);
if (node->get_input_element_type(0) == element::u8 && if (node->get_input_element_type(0) == element::u8 &&
node->get_input_element_type(1) == element::i8) node->get_input_element_type(1) == element::i8)
{ {
auto quantized_conv_relu = static_cast<op::QuantizedConvolutionRelu*>(node);
auto op_annotations = auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>(); std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true); op_annotations->set_mkldnn_op(true);
...@@ -775,26 +761,14 @@ namespace ngraph ...@@ -775,26 +761,14 @@ namespace ngraph
} }
} }
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::QuantizeCPU)
{
if (node->get_input_element_type(0) == element::f32)
{
auto quantize = static_cast<op::QuantizeCPU*>(node);
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
quantize->set_op_annotations(op_annotations);
}
}
template <> template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::QuantizedConvolutionBias) void CPUAssignment::ASSIGN_DECL(ngraph::op::QuantizedConvolutionBias)
{ {
auto quantized_conv_bias = static_cast<op::QuantizedConvolutionBias*>(node);
if (node->get_input_element_type(0) == element::u8 && if (node->get_input_element_type(0) == element::u8 &&
node->get_input_element_type(1) == element::i8) node->get_input_element_type(1) == element::i8)
{ {
auto quantized_conv_bias = static_cast<op::QuantizedConvolutionBias*>(node);
auto op_annotations = auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>(); std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true); op_annotations->set_mkldnn_op(true);
...@@ -861,14 +835,10 @@ static const runtime::cpu::pass::AssignOpMap s_dispatcher{ ...@@ -861,14 +835,10 @@ static const runtime::cpu::pass::AssignOpMap s_dispatcher{
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::QuantizedAvgPool>}, &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::QuantizedAvgPool>},
{TI(ngraph::op::Softmax), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Softmax>}, {TI(ngraph::op::Softmax), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Softmax>},
{TI(ngraph::op::Slice), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Slice>}, {TI(ngraph::op::Slice), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Slice>},
{TI(ngraph::op::QuantizeCPU),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::QuantizeCPU>},
{TI(ngraph::op::ReplaceSlice), {TI(ngraph::op::ReplaceSlice),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::ReplaceSlice>}, &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::ReplaceSlice>},
{TI(ngraph::op::ConvolutionAdd), {TI(ngraph::op::ConvolutionAdd),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::ConvolutionAdd>}, &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::ConvolutionAdd>},
{TI(ngraph::op::DequantizeCPU),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::DequantizeCPU>},
{TI(ngraph::op::QuantizedConvolutionRelu), {TI(ngraph::op::QuantizedConvolutionRelu),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::QuantizedConvolutionRelu>}, &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::QuantizedConvolutionRelu>},
{TI(ngraph::op::QuantizedConvolutionBias), {TI(ngraph::op::QuantizedConvolutionBias),
......
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#pragma once
#include <limits>
#include <vector>
#include "ngraph/node.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/runtime/cpu/op/quantize.hpp"
#include "ngraph/util.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace quantization_util
{
template <class T1, class T2, class T3>
void quantization_range_for_multiplication(
float min_a, float max_a, float min_b, float max_b, float* min_c, float* max_c)
{
// begin code copied and pasted (and modified) from
// github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/quantization_utils.h
float a_one_quant_level = (max_a - min_a) / (std::numeric_limits<T1>::max() -
std::numeric_limits<T1>::min());
float b_one_quant_level = (max_b - min_b) / (std::numeric_limits<T2>::max() -
std::numeric_limits<T2>::min());
float c_one_quant_level = a_one_quant_level * b_one_quant_level;
*min_c = c_one_quant_level * std::numeric_limits<T3>::min();
*max_c = c_one_quant_level * std::numeric_limits<T3>::max();
// end code copied and pasted (and modified) from
// github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/quantization_utils.h
}
static inline void get_min_max_range(float input_min_range,
float input_max_range,
bool is_signed,
std::vector<float>& quant_util)
{
// begin code copied and pasted from
// github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/quantize_op.cc
float min_range;
float max_range;
// If input_min_range and input_max_range are close,
// introduce a slightly larger delta between them.
min_range = std::min(0.0f, input_min_range);
const float epsilon =
std::max(1.0f, std::max(fabsf(input_min_range), fabsf(input_max_range))) /
100.0f;
max_range = std::max(input_max_range, min_range + epsilon);
max_range = std::max(0.0f, max_range);
// end code copied and pasted from
// github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/quantize_op.cc
const float max_abs = std::max(std::abs(min_range), std::abs(max_range));
const float target_range =
static_cast<float>((is_signed ? std::pow(2, 7) : std::pow(2, 8)) - 1);
max_range = max_abs;
min_range = is_signed ? -max_abs : 0;
const float scale = target_range / max_abs;
quant_util.push_back(min_range);
quant_util.push_back(max_range);
quant_util.push_back(scale);
}
template <typename OP>
float get_scale(const ngraph::Node* node)
{
auto qconvolution = static_cast<const OP*>(node);
float min_out_value;
float max_out_value;
quantization_range_for_multiplication<uint8_t, int8_t, int32_t>(
qconvolution->get_input_min(),
qconvolution->get_input_max(),
qconvolution->get_filter_min(),
qconvolution->get_filter_max(),
&min_out_value,
&max_out_value);
const float max_abs32 =
std::max(std::abs(min_out_value), std::abs(max_out_value));
const float max_abs8 =
std::max(std::abs(qconvolution->get_freezed_output_min()),
std::abs(qconvolution->get_freezed_output_max()));
// Output is signed int.
// s32 = f32 * std::pow(2, 31)/ max_abs32;
// s8 = f32 * std::pow(2, 7)/ max_abs8;
// s8 = s32 * std::pow(2, -24) * max_abs32 / max_abs8;
const float scale = static_cast<float>(
(std::pow(2, -24) * static_cast<double>(max_abs32 / max_abs8)));
return scale;
}
}
}
}
}
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment