Commit 8bd3846f authored by Adam Straw's avatar Adam Straw Committed by Robert Kimball

graph builders for quantize scale (#1976)

* quantize scale passing unit tests

* epsilon bump

* finished with quantization scale

* unit tests passing with convolution scale as builder

* broadcasted constants and cleanup

* api consistency for quant builders

* code style

* cleanup

* newline at EOF

* use requantization_scale

* drop TF license as we are no longer using TF code directly
parent 2a26558a
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/node.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/constant.hpp"
namespace ngraph
{
namespace builder
{
template <class T>
std::shared_ptr<Node>
make_constant(const element::Type& type, const Shape& shape, const T& num)
{
std::shared_ptr<Node> val = nullptr;
if (type == element::f32)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<float>{static_cast<float>(num)});
}
else if (type == element::f64)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<double>{static_cast<double>(num)});
}
else if (type == element::i64)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<int64_t>{static_cast<int64_t>(num)});
}
else if (type == element::i32)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<int32_t>{static_cast<int32_t>(num)});
}
else if (type == element::i16)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<int16_t>{static_cast<int16_t>(num)});
}
else if (type == element::i8)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<int8_t>{static_cast<int8_t>(num)});
}
else if (type == element::u64)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<uint64_t>{static_cast<uint64_t>(num)});
}
else if (type == element::u32)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<uint32_t>{static_cast<uint32_t>(num)});
}
else if (type == element::u16)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<uint16_t>{static_cast<uint16_t>(num)});
}
else if (type == element::u8)
{
val = std::make_shared<ngraph::op::Constant>(
type, ngraph::Shape{}, std::vector<uint8_t>{static_cast<uint8_t>(num)});
}
else
{
throw ngraph_error("make_constant: Unsupported element type");
}
if (shape.size() > 0)
{
ngraph::AxisSet axes;
for (size_t i = 0; i < shape.size(); i++)
{
axes.insert(i);
}
val = std::make_shared<ngraph::op::Broadcast>(val, shape, axes);
}
return val;
}
}
}
This diff is collapsed.
...@@ -43,68 +43,67 @@ namespace ngraph ...@@ -43,68 +43,67 @@ namespace ngraph
const ngraph::element::Type& type, const ngraph::element::Type& type,
const ngraph::AxisSet& axes); const ngraph::AxisSet& axes);
std::shared_ptr<Node> ScaledQuantizedAvgPool(const std::shared_ptr<Node>& arg, std::shared_ptr<Node> ScaledQuantizedAvgPool(std::shared_ptr<Node> input,
const Shape& window_shape, const Shape& window_shape,
const Strides& window_movement_strides, const Strides& window_movement_strides,
const Shape& padding_below, const Shape& padding_below,
const Shape& padding_above, const Shape& padding_above,
bool include_padding_in_avg_computation, bool include_padding_in_avg_computation,
const std::shared_ptr<Node> min, std::shared_ptr<Node> min,
const std::shared_ptr<Node> max); std::shared_ptr<Node> max);
std::shared_ptr<Node> std::shared_ptr<Node>
ScaledQuantizedConvolutionBias(const std::shared_ptr<Node>& data_batch, ScaledQuantizedConvolutionBias(std::shared_ptr<Node> input,
const std::shared_ptr<Node>& filters, std::shared_ptr<Node> filters,
const std::shared_ptr<Node>& bias, std::shared_ptr<Node> bias,
const Strides& window_movement_strides, const Strides& window_movement_strides,
const Strides& window_dilation_strides, const Strides& window_dilation_strides,
const CoordinateDiff& padding_below, const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above, const CoordinateDiff& padding_above,
const Strides& data_dilation_strides, const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input, std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input, std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter, std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter, std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output, std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output, std::shared_ptr<Node> max_freezed_output,
const bool with_relu = false); const bool with_relu = false);
std::shared_ptr<Node> std::shared_ptr<Node>
ScaledQuantizedConvolutionRelu(const std::shared_ptr<Node>& data_batch, ScaledQuantizedConvolutionRelu(std::shared_ptr<Node> input,
const std::shared_ptr<Node>& filters, std::shared_ptr<Node> filters,
const Strides& window_movement_strides, const Strides& window_movement_strides,
const Strides& window_dilation_strides, const Strides& window_dilation_strides,
const CoordinateDiff& padding_below, const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above, const CoordinateDiff& padding_above,
const Strides& data_dilation_strides, const Strides& data_dilation_strides,
const std::shared_ptr<Node> min_input, std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> max_input, std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> min_filter, std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> max_filter, std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> min_freezed_output, std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> max_freezed_output); std::shared_ptr<Node> max_freezed_output);
std::shared_ptr<Node> std::shared_ptr<Node> ScaledQuantizedConvolution(std::shared_ptr<Node> input,
ScaledQuantizedConvolution(const std::shared_ptr<Node>& data_batch, std::shared_ptr<Node> filters,
const std::shared_ptr<Node>& filters, const Strides& window_movement_strides,
const Strides& window_movement_strides, const Strides& window_dilation_strides,
const Strides& window_dilation_strides, const CoordinateDiff& padding_below,
const CoordinateDiff& padding_below, const CoordinateDiff& padding_above,
const CoordinateDiff& padding_above, const Strides& data_dilation_strides,
const Strides& data_dilation_strides, std::shared_ptr<Node> min_input,
const std::shared_ptr<Node> min_input, std::shared_ptr<Node> max_input,
const std::shared_ptr<Node> max_input, std::shared_ptr<Node> min_filter,
const std::shared_ptr<Node> min_filter, std::shared_ptr<Node> max_filter,
const std::shared_ptr<Node> max_filter, std::shared_ptr<Node> min_freezed_output,
const std::shared_ptr<Node> min_freezed_output, std::shared_ptr<Node> max_freezed_output);
const std::shared_ptr<Node> max_freezed_output);
std::shared_ptr<Node> ScaledQuantizedMaxPool(const std::shared_ptr<Node>& arg, std::shared_ptr<Node> ScaledQuantizedMaxPool(std::shared_ptr<Node> input,
const Shape& window_shape, const Shape& window_shape,
const Strides& window_movement_strides, const Strides& window_movement_strides,
const Shape& padding_below, const Shape& padding_below,
const Shape& padding_above, const Shape& padding_above,
const std::shared_ptr<Node> min, std::shared_ptr<Node> min,
const std::shared_ptr<Node> max); std::shared_ptr<Node> max);
} }
} }
This diff is collapsed.
...@@ -45,11 +45,6 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc ...@@ -45,11 +45,6 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc
auto& data_batch_shape = data_batch->get_shape(); auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape(); auto& filters_shape = filters->get_shape();
auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
auto scale_val = scale_const_op->get_vector<float>();
this->m_scale = scale_val[0];
set_output_type(0, set_output_type(0,
element::i8, element::i8,
util::infer_convolution_output_shape(this, util::infer_convolution_output_shape(this,
......
...@@ -41,7 +41,6 @@ namespace ngraph ...@@ -41,7 +41,6 @@ namespace ngraph
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; } const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
std::shared_ptr<Node> get_filters() { return get_argument(1); } std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); } std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
float get_scale() const { return m_scale; }
virtual std::shared_ptr<Node> virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override; copy_with_new_args(const NodeVector& new_args) const override;
...@@ -51,7 +50,6 @@ namespace ngraph ...@@ -51,7 +50,6 @@ namespace ngraph
CoordinateDiff m_padding_below; CoordinateDiff m_padding_below;
CoordinateDiff m_padding_above; CoordinateDiff m_padding_above;
Strides m_data_dilation_strides; Strides m_data_dilation_strides;
float m_scale;
}; };
} }
} }
...@@ -49,10 +49,6 @@ op::QuantizedConvolutionBias::QuantizedConvolutionBias(const shared_ptr<Node>& d ...@@ -49,10 +49,6 @@ op::QuantizedConvolutionBias::QuantizedConvolutionBias(const shared_ptr<Node>& d
auto& data_batch_shape = data_batch->get_shape(); auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape(); auto& filters_shape = filters->get_shape();
auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
auto scale_val = scale_const_op->get_vector<float>();
this->m_scale = scale_val[0];
// TODO: call ngraph util // TODO: call ngraph util
// util::validate_convbias_shapes(data_batch_shape, filters_shape, bias->get_shape()); // util::validate_convbias_shapes(data_batch_shape, filters_shape, bias->get_shape());
......
...@@ -47,7 +47,6 @@ namespace ngraph ...@@ -47,7 +47,6 @@ namespace ngraph
const CoordinateDiff& get_padding_below() const { return m_padding_below; } const CoordinateDiff& get_padding_below() const { return m_padding_below; }
const CoordinateDiff& get_padding_above() const { return m_padding_above; } const CoordinateDiff& get_padding_above() const { return m_padding_above; }
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; } const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
float get_scale() const { return m_scale; }
std::shared_ptr<Node> get_bias() { return get_argument(2); } std::shared_ptr<Node> get_bias() { return get_argument(2); }
std::shared_ptr<Node> get_filters() { return get_argument(1); } std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); } std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
...@@ -62,7 +61,6 @@ namespace ngraph ...@@ -62,7 +61,6 @@ namespace ngraph
CoordinateDiff m_padding_above; CoordinateDiff m_padding_above;
Strides m_data_dilation_strides; Strides m_data_dilation_strides;
bool m_with_relu; bool m_with_relu;
float m_scale;
}; };
} }
} }
...@@ -44,10 +44,6 @@ op::QuantizedConvolutionRelu::QuantizedConvolutionRelu(const std::shared_ptr<Nod ...@@ -44,10 +44,6 @@ op::QuantizedConvolutionRelu::QuantizedConvolutionRelu(const std::shared_ptr<Nod
auto& data_batch_shape = data_batch->get_shape(); auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape(); auto& filters_shape = filters->get_shape();
auto scale_const_op = std::static_pointer_cast<ngraph::op::Constant>(scale);
float scale_val = *(static_cast<float const*>(scale_const_op->get_data_ptr()));
this->m_scale = scale_val;
set_output_type(0, set_output_type(0,
element::u8, element::u8,
util::infer_convolution_output_shape(this, util::infer_convolution_output_shape(this,
......
...@@ -43,7 +43,6 @@ namespace ngraph ...@@ -43,7 +43,6 @@ namespace ngraph
const CoordinateDiff& get_padding_below() const { return m_padding_below; } const CoordinateDiff& get_padding_below() const { return m_padding_below; }
const CoordinateDiff& get_padding_above() const { return m_padding_above; } const CoordinateDiff& get_padding_above() const { return m_padding_above; }
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; } const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
float get_scale() const { return m_scale; }
std::shared_ptr<Node> get_filters() { return get_argument(1); } std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); } std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
virtual std::shared_ptr<Node> virtual std::shared_ptr<Node>
...@@ -55,7 +54,6 @@ namespace ngraph ...@@ -55,7 +54,6 @@ namespace ngraph
CoordinateDiff m_padding_below; CoordinateDiff m_padding_below;
CoordinateDiff m_padding_above; CoordinateDiff m_padding_above;
Strides m_data_dilation_strides; Strides m_data_dilation_strides;
float m_scale;
}; };
} }
} }
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "ngraph/op/negative.hpp" #include "ngraph/op/negative.hpp"
#include "ngraph/op/pad.hpp" #include "ngraph/op/pad.hpp"
#include "ngraph/op/quantize.hpp" #include "ngraph/op/quantize.hpp"
#include "ngraph/op/relu.hpp"
#include "ngraph/op/reshape.hpp" #include "ngraph/op/reshape.hpp"
#include "ngraph/op/subtract.hpp" #include "ngraph/op/subtract.hpp"
#include "ngraph/pattern/matcher.hpp" #include "ngraph/pattern/matcher.hpp"
...@@ -45,6 +46,7 @@ ...@@ -45,6 +46,7 @@
#include "ngraph/runtime/reference/negate.hpp" #include "ngraph/runtime/reference/negate.hpp"
#include "ngraph/runtime/reference/pad.hpp" #include "ngraph/runtime/reference/pad.hpp"
#include "ngraph/runtime/reference/quantize.hpp" #include "ngraph/runtime/reference/quantize.hpp"
#include "ngraph/runtime/reference/relu.hpp"
#include "ngraph/runtime/reference/reshape.hpp" #include "ngraph/runtime/reference/reshape.hpp"
#include "ngraph/runtime/reference/subtract.hpp" #include "ngraph/runtime/reference/subtract.hpp"
...@@ -378,7 +380,8 @@ void ngraph::pass::ConstantFolding::construct_constant_binary() ...@@ -378,7 +380,8 @@ void ngraph::pass::ConstantFolding::construct_constant_binary()
bool is_supported_unary_op(std::shared_ptr<Node> n) bool is_supported_unary_op(std::shared_ptr<Node> n)
{ {
return std::dynamic_pointer_cast<op::Abs>(n) || std::dynamic_pointer_cast<op::Negative>(n); return std::dynamic_pointer_cast<op::Abs>(n) || std::dynamic_pointer_cast<op::Negative>(n) ||
std::dynamic_pointer_cast<op::Relu>(n);
} }
template <class T> template <class T>
...@@ -398,6 +401,11 @@ shared_ptr<op::Constant> make_constant_unary(shared_ptr<op::Constant> constant, ...@@ -398,6 +401,11 @@ shared_ptr<op::Constant> make_constant_unary(shared_ptr<op::Constant> constant,
runtime::reference::negate<T>( runtime::reference::negate<T>(
constant->get_vector<T>().data(), out_vec.data(), shape_size(out_shape)); constant->get_vector<T>().data(), out_vec.data(), shape_size(out_shape));
} }
else if (std::dynamic_pointer_cast<op::Relu>(unary))
{
runtime::reference::relu<T>(
constant->get_vector<T>().data(), out_vec.data(), shape_size(out_shape));
}
else else
{ {
NGRAPH_ASSERT(false) << "must be consistent with is_supported_unary_op"; NGRAPH_ASSERT(false) << "must be consistent with is_supported_unary_op";
......
...@@ -118,6 +118,7 @@ ...@@ -118,6 +118,7 @@
#include "ngraph/op/topk.hpp" #include "ngraph/op/topk.hpp"
#include "ngraph/pass/algebraic_simplification.hpp" #include "ngraph/pass/algebraic_simplification.hpp"
#include "ngraph/pass/common_function_collection.hpp" #include "ngraph/pass/common_function_collection.hpp"
#include "ngraph/pass/constant_folding.hpp"
#include "ngraph/pass/core_fusion.hpp" #include "ngraph/pass/core_fusion.hpp"
#include "ngraph/pass/cse.hpp" #include "ngraph/pass/cse.hpp"
#include "ngraph/pass/dump_sorted.hpp" #include "ngraph/pass/dump_sorted.hpp"
...@@ -1049,6 +1050,7 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes(ngraph::pass::Ma ...@@ -1049,6 +1050,7 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes(ngraph::pass::Ma
NodeVector nv_cwi; // We dont need CPUWorkspaceInsertion to return list of indices NodeVector nv_cwi; // We dont need CPUWorkspaceInsertion to return list of indices
pass_manager.register_pass<runtime::cpu::pass::CPUWorkspaceInsertion>(nv_cwi, false); pass_manager.register_pass<runtime::cpu::pass::CPUWorkspaceInsertion>(nv_cwi, false);
pass_manager.register_pass<runtime::cpu::pass::CPUAssignment>(this); pass_manager.register_pass<runtime::cpu::pass::CPUAssignment>(this);
pass_manager.register_pass<ngraph::pass::ConstantFolding>();
pass_manager.register_pass<runtime::cpu::pass::CPULayout>(this); pass_manager.register_pass<runtime::cpu::pass::CPULayout>(this);
pass_manager.register_pass<ngraph::pass::CommonSubexpressionElimination>( pass_manager.register_pass<ngraph::pass::CommonSubexpressionElimination>(
runtime::cpu::get_cse_handlers_map()); runtime::cpu::get_cse_handlers_map());
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "ngraph/coordinate_diff.hpp" #include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convolution.hpp" #include "ngraph/op/convolution.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp" #include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp" #include "ngraph/op/experimental/quantized_conv_bias.hpp"
...@@ -226,6 +227,16 @@ namespace ngraph ...@@ -226,6 +227,16 @@ namespace ngraph
} }
else if (std::is_same<OP, ngraph::op::QuantizedConvolution>()) else if (std::is_same<OP, ngraph::op::QuantizedConvolution>())
{ {
auto qc = dynamic_cast<const ngraph::op::QuantizedConvolution*>(node);
auto scale_const_op =
std::dynamic_pointer_cast<ngraph::op::Constant>(qc->get_arguments()[2]);
if (scale_const_op == nullptr)
{
throw ngraph_error("QuantizedConvolution scale must be a Constant");
}
auto scale_val = scale_const_op->get_vector<float>();
return build_quantized_convolution( return build_quantized_convolution(
data_desc, data_desc,
weights_desc, weights_desc,
...@@ -234,12 +245,21 @@ namespace ngraph ...@@ -234,12 +245,21 @@ namespace ngraph
window_dilation_strides_adjusted, window_dilation_strides_adjusted,
convolution->get_padding_below(), convolution->get_padding_below(),
convolution->get_padding_above(), convolution->get_padding_above(),
(dynamic_cast<const ngraph::op::QuantizedConvolution*>(node)) scale_val[0],
->get_scale(),
ops); ops);
} }
else if (std::is_same<OP, ngraph::op::QuantizedConvolutionRelu>()) else if (std::is_same<OP, ngraph::op::QuantizedConvolutionRelu>())
{ {
auto qcr = dynamic_cast<const ngraph::op::QuantizedConvolutionRelu*>(node);
auto scale_const_op = std::dynamic_pointer_cast<ngraph::op::Constant>(
qcr->get_arguments()[2]);
if (scale_const_op == nullptr)
{
throw ngraph_error("QuantizedConvolutionRelu scale must be a Constant");
}
auto scale_val = scale_const_op->get_vector<float>();
return build_quantized_convolution( return build_quantized_convolution(
data_desc, data_desc,
weights_desc, weights_desc,
...@@ -248,12 +268,21 @@ namespace ngraph ...@@ -248,12 +268,21 @@ namespace ngraph
window_dilation_strides_adjusted, window_dilation_strides_adjusted,
convolution->get_padding_below(), convolution->get_padding_below(),
convolution->get_padding_above(), convolution->get_padding_above(),
(dynamic_cast<const ngraph::op::QuantizedConvolutionRelu*>(node)) scale_val[0],
->get_scale(),
ops); ops);
} }
else if (std::is_same<OP, ngraph::op::QuantizedConvolutionBias>()) else if (std::is_same<OP, ngraph::op::QuantizedConvolutionBias>())
{ {
auto qcb = dynamic_cast<const ngraph::op::QuantizedConvolutionBias*>(node);
auto scale_const_op = std::dynamic_pointer_cast<ngraph::op::Constant>(
qcb->get_arguments()[3]);
if (scale_const_op == nullptr)
{
throw ngraph_error("QuantizedConvolutionBias scale must be a Constant");
}
auto scale_val = scale_const_op->get_vector<float>();
// conv+bias = cvt_to_int8(scale*(dst + bias)) // conv+bias = cvt_to_int8(scale*(dst + bias))
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2); auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
return build_quantized_convolution( return build_quantized_convolution(
...@@ -265,8 +294,7 @@ namespace ngraph ...@@ -265,8 +294,7 @@ namespace ngraph
window_dilation_strides_adjusted, window_dilation_strides_adjusted,
convolution->get_padding_below(), convolution->get_padding_below(),
convolution->get_padding_above(), convolution->get_padding_above(),
(dynamic_cast<const ngraph::op::QuantizedConvolutionBias*>(node)) scale_val[0],
->get_scale(),
ops); ops);
} }
else else
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment