Commit 8bd3846f authored by Adam Straw's avatar Adam Straw Committed by Robert Kimball

graph builders for quantize scale (#1976)

* quantize scale passing unit tests

* epsilon bump

* finished with quantization scale

* unit tests passing with convolution scale as builder

* broadcasted constants and cleanup

* api consistency for quant builders

* code style

* cleanup

* newline at EOF

* use requantization_scale

* drop TF license as we are no longer using TF code directly
parent 2a26558a
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/node.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/constant.hpp"
namespace ngraph
{
namespace builder
{
template <class T>
std::shared_ptr<Node>
make_constant(const element::Type& type, const Shape& shape, const T& num)
{
std::shared_ptr<Node> val = nullptr;
if (type == element::f32)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<float>{static_cast<float>(num)});
}
else if (type == element::f64)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<double>{static_cast<double>(num)});
}
else if (type == element::i64)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<int64_t>{static_cast<int64_t>(num)});
}
else if (type == element::i32)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<int32_t>{static_cast<int32_t>(num)});
}
else if (type == element::i16)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<int16_t>{static_cast<int16_t>(num)});
}
else if (type == element::i8)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<int8_t>{static_cast<int8_t>(num)});
}
else if (type == element::u64)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<uint64_t>{static_cast<uint64_t>(num)});
}
else if (type == element::u32)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<uint32_t>{static_cast<uint32_t>(num)});
}
else if (type == element::u16)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<uint16_t>{static_cast<uint16_t>(num)});
}
else if (type == element::u8)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<uint8_t>{static_cast<uint8_t>(num)});
}
else
{
throw ngraph_error("make_constant: Unsupported element type");
}
if (shape.size() > 0)
{
ngraph::AxisSet axes;
for (size_t i = 0; i < shape.size(); i++)
{
axes.insert(i);
}
val = std::make_shared<ngraph::op::Broadcast>(val, shape, axes);
}
return val;
}
}
}
This diff is collapsed.
......@@ -43,68 +43,67 @@ namespace ngraph
const ngraph::element::Type& type,
const ngraph::AxisSet& axes);
std::shared_ptr<Node> ScaledQuantizedAvgPool(const std::shared_ptr<Node>& arg,
std::shared_ptr<Node> ScaledQuantizedAvgPool(std::shared_ptr<Node> input,
const Shape& window_shape,
const Strides& window_movement_strides,
const Shape& padding_below,
const Shape& padding_above,
bool include_padding_in_avg_computation,
const std::shared_ptr<Node> min,
const std::shared_ptr<Node> max);
std::shared_ptr<Node> min,
std::shared_ptr<Node> max);
std::shared_ptr<Node>
ScaledQuantizedConvolutionBias(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const std::shared_ptr<Node>& bias,
ScaledQuantizedConvolutionBias(std::shared_ptr<Node> input,
std::shared_ptr<Node> filters,
std::shared_ptr<Node> bias,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output,
std::shared_ptr<Node> min_input,
std::shared_ptr<Node> max_input,
std::shared_ptr<Node> min_filter,
std::shared_ptr<Node> max_filter,
std::shared_ptr<Node> min_freezed_output,
std::shared_ptr<Node> max_freezed_output,
const bool with_relu = false);
std::shared_ptr<Node>
ScaledQuantizedConvolutionRelu(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
ScaledQuantizedConvolutionRelu(std::shared_ptr<Node> input,
std::shared_ptr<Node> filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output);
std::shared_ptr<Node> min_input,
std::shared_ptr<Node> max_input,
std::shared_ptr<Node> min_filter,
std::shared_ptr<Node> max_filter,
std::shared_ptr<Node> min_freezed_output,
std::shared_ptr<Node> max_freezed_output);
std::shared_ptr<Node>
ScaledQuantizedConvolution(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
std::shared_ptr<Node> ScaledQuantizedConvolution(std::shared_ptr<Node> input,
std::shared_ptr<Node> filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output);
std::shared_ptr<Node> min_input,
std::shared_ptr<Node> max_input,
std::shared_ptr<Node> min_filter,
std::shared_ptr<Node> max_filter,
std::shared_ptr<Node> min_freezed_output,
std::shared_ptr<Node> max_freezed_output);
std::shared_ptr<Node> ScaledQuantizedMaxPool(const std::shared_ptr<Node>& arg,
std::shared_ptr<Node> ScaledQuantizedMaxPool(std::shared_ptr<Node> input,
const Shape& window_shape,
const Strides& window_movement_strides,
const Shape& padding_below,
const Shape& padding_above,
const std::shared_ptr<Node> min,
const std::shared_ptr<Node> max);
std::shared_ptr<Node> min,
std::shared_ptr<Node> max);
}
}
This diff is collapsed.
......@@ -45,11 +45,6 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc
auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape();
auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
auto scale_val = scale_const_op->get_vector<float>();
this->m_scale = scale_val[0];
set_output_type(0,
element::i8,
util::infer_convolution_output_shape(this,
......
......@@ -41,7 +41,6 @@ namespace ngraph
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
float get_scale() const { return m_scale; }
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
......@@ -51,7 +50,6 @@ namespace ngraph
CoordinateDiff m_padding_below;
CoordinateDiff m_padding_above;
Strides m_data_dilation_strides;
float m_scale;
};
}
}
......@@ -49,10 +49,6 @@ op::QuantizedConvolutionBias::QuantizedConvolutionBias(const shared_ptr<Node>& d
auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape();
auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
auto scale_val = scale_const_op->get_vector<float>();
this->m_scale = scale_val[0];
// TODO: call ngraph util
// util::validate_convbias_shapes(data_batch_shape, filters_shape, bias->get_shape());
......
......@@ -47,7 +47,6 @@ namespace ngraph
const CoordinateDiff& get_padding_below() const { return m_padding_below; }
const CoordinateDiff& get_padding_above() const { return m_padding_above; }
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
float get_scale() const { return m_scale; }
std::shared_ptr<Node> get_bias() { return get_argument(2); }
std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
......@@ -62,7 +61,6 @@ namespace ngraph
CoordinateDiff m_padding_above;
Strides m_data_dilation_strides;
bool m_with_relu;
float m_scale;
};
}
}
......@@ -44,10 +44,6 @@ op::QuantizedConvolutionRelu::QuantizedConvolutionRelu(const std::shared_ptr<Nod
auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape();
auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
float scale_val = *(static_cast<float const*>(scale_const_op->get_data_ptr()));
this->m_scale = scale_val;
set_output_type(0,
element::u8,
util::infer_convolution_output_shape(this,
......
......@@ -43,7 +43,6 @@ namespace ngraph
const CoordinateDiff& get_padding_below() const { return m_padding_below; }
const CoordinateDiff& get_padding_above() const { return m_padding_above; }
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
float get_scale() const { return m_scale; }
std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
virtual std::shared_ptr<Node>
......@@ -55,7 +54,6 @@ namespace ngraph
CoordinateDiff m_padding_below;
CoordinateDiff m_padding_above;
Strides m_data_dilation_strides;
float m_scale;
};
}
}
......@@ -30,6 +30,7 @@
#include "ngraph/op/negative.hpp"
#include "ngraph/op/pad.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/relu.hpp"
#include "ngraph/op/reshape.hpp"
#include "ngraph/op/subtract.hpp"
#include "ngraph/pattern/matcher.hpp"
......@@ -45,6 +46,7 @@
#include "ngraph/runtime/reference/negate.hpp"
#include "ngraph/runtime/reference/pad.hpp"
#include "ngraph/runtime/reference/quantize.hpp"
#include "ngraph/runtime/reference/relu.hpp"
#include "ngraph/runtime/reference/reshape.hpp"
#include "ngraph/runtime/reference/subtract.hpp"
......@@ -378,7 +380,8 @@ void ngraph::pass::ConstantFolding::construct_constant_binary()
bool is_supported_unary_op(std::shared_ptr<Node> n)
{
return std::dynamic_pointer_cast<op::Abs>(n) || std::dynamic_pointer_cast<op::Negative>(n);
return std::dynamic_pointer_cast<op::Abs>(n) || std::dynamic_pointer_cast<op::Negative>(n) ||
std::dynamic_pointer_cast<op::Relu>(n);
}
template <class T>
......@@ -398,6 +401,11 @@ shared_ptr<op::Constant> make_constant_unary(shared_ptr<op::Constant> constant,
runtime::reference::negate<T>(
constant->get_vector<T>().data(), out_vec.data(), shape_size(out_shape));
}
else if (std::dynamic_pointer_cast<op::Relu>(unary))
{
runtime::reference::relu<T>(
constant->get_vector<T>().data(), out_vec.data(), shape_size(out_shape));
}
else
{
NGRAPH_ASSERT(false) << "must be consistent with is_supported_unary_op";
......
......@@ -118,6 +118,7 @@
#include "ngraph/op/topk.hpp"
#include "ngraph/pass/algebraic_simplification.hpp"
#include "ngraph/pass/common_function_collection.hpp"
#include "ngraph/pass/constant_folding.hpp"
#include "ngraph/pass/core_fusion.hpp"
#include "ngraph/pass/cse.hpp"
#include "ngraph/pass/dump_sorted.hpp"
......@@ -1049,6 +1050,7 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes(ngraph::pass::Ma
NodeVector nv_cwi; // We dont need CPUWorkspaceInsertion to return list of indices
pass_manager.register_pass<runtime::cpu::pass::CPUWorkspaceInsertion>(nv_cwi, false);
pass_manager.register_pass<runtime::cpu::pass::CPUAssignment>(this);
pass_manager.register_pass<ngraph::pass::ConstantFolding>();
pass_manager.register_pass<runtime::cpu::pass::CPULayout>(this);
pass_manager.register_pass<ngraph::pass::CommonSubexpressionElimination>(
runtime::cpu::get_cse_handlers_map());
......
......@@ -25,6 +25,7 @@
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
......@@ -226,6 +227,16 @@ namespace ngraph
}
else if (std::is_same<OP, ngraph::op::QuantizedConvolution>())
{
auto qc = dynamic_cast<const ngraph::op::QuantizedConvolution*>(node);
auto scale_const_op =
std::dynamic_pointer_cast<ngraph::op::Constant>(qc->get_arguments()[2]);
if (scale_const_op == nullptr)
{
throw ngraph_error("QuantizedConvolution scale must be a Constant");
}
auto scale_val = scale_const_op->get_vector<float>();
return build_quantized_convolution(
data_desc,
weights_desc,
......@@ -234,12 +245,21 @@ namespace ngraph
window_dilation_strides_adjusted,
convolution->get_padding_below(),
convolution->get_padding_above(),
(dynamic_cast<const ngraph::op::QuantizedConvolution*>(node))
->get_scale(),
scale_val[0],
ops);
}
else if (std::is_same<OP, ngraph::op::QuantizedConvolutionRelu>())
{
auto qcr = dynamic_cast<const ngraph::op::QuantizedConvolutionRelu*>(node);
auto scale_const_op = std::dynamic_pointer_cast<ngraph::op::Constant>(
qcr->get_arguments()[2]);
if (scale_const_op == nullptr)
{
throw ngraph_error("QuantizedConvolutionRelu scale must be a Constant");
}
auto scale_val = scale_const_op->get_vector<float>();
return build_quantized_convolution(
data_desc,
weights_desc,
......@@ -248,12 +268,21 @@ namespace ngraph
window_dilation_strides_adjusted,
convolution->get_padding_below(),
convolution->get_padding_above(),
(dynamic_cast<const ngraph::op::QuantizedConvolutionRelu*>(node))
->get_scale(),
scale_val[0],
ops);
}
else if (std::is_same<OP, ngraph::op::QuantizedConvolutionBias>())
{
auto qcb = dynamic_cast<const ngraph::op::QuantizedConvolutionBias*>(node);
auto scale_const_op = std::dynamic_pointer_cast<ngraph::op::Constant>(
qcb->get_arguments()[3]);
if (scale_const_op == nullptr)
{
throw ngraph_error("QuantizedConvolutionBias scale must be a Constant");
}
auto scale_val = scale_const_op->get_vector<float>();
// conv+bias = cvt_to_int8(scale*(dst + bias))
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
return build_quantized_convolution(
......@@ -265,8 +294,7 @@ namespace ngraph
window_dilation_strides_adjusted,
convolution->get_padding_below(),
convolution->get_padding_above(),
(dynamic_cast<const ngraph::op::QuantizedConvolutionBias*>(node))
->get_scale(),
scale_val[0],
ops);
}
else
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment