Commit 90edb4f6 authored by Nishant Patel's avatar Nishant Patel Committed by Scott Cyphers

Add support for QConvInteger (#2679)

parent 14b9bab2
...@@ -94,6 +94,26 @@ namespace ngraph ...@@ -94,6 +94,26 @@ namespace ngraph
requantization_scale, requantization_scale,
false); false);
} }
shared_ptr<Node> QuantizedConvInteger(shared_ptr<Node> input,
shared_ptr<Node> filter,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides)
{
auto output_scale = make_constant(element::f32, Shape{}, 1);
return make_shared<op::QuantizedConvolution>(input,
filter,
window_movement_strides,
window_dilation_strides,
padding_below,
padding_above,
data_dilation_strides,
output_scale,
false);
}
} }
} }
} }
...@@ -48,6 +48,14 @@ namespace ngraph ...@@ -48,6 +48,14 @@ namespace ngraph
std::shared_ptr<Node> input_scale, std::shared_ptr<Node> input_scale,
std::shared_ptr<Node> filter_scale, std::shared_ptr<Node> filter_scale,
std::shared_ptr<Node> output_scale); std::shared_ptr<Node> output_scale);
std::shared_ptr<Node> QuantizedConvInteger(std::shared_ptr<Node> input,
std::shared_ptr<Node> filter,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides);
} }
} }
} }
...@@ -30,13 +30,15 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc ...@@ -30,13 +30,15 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc
const CoordinateDiff& padding_below, const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above, const CoordinateDiff& padding_above,
const Strides& data_dilation_strides, const Strides& data_dilation_strides,
const std::shared_ptr<Node> scale) const std::shared_ptr<Node> scale,
const bool requantize)
: Op("QuantizedConvolution", check_single_output_args({data_batch, filters, scale})) : Op("QuantizedConvolution", check_single_output_args({data_batch, filters, scale}))
, m_window_movement_strides(window_movement_strides) , m_window_movement_strides(window_movement_strides)
, m_window_dilation_strides(window_dilation_strides) , m_window_dilation_strides(window_dilation_strides)
, m_padding_below(padding_below) , m_padding_below(padding_below)
, m_padding_above(padding_above) , m_padding_above(padding_above)
, m_data_dilation_strides(data_dilation_strides) , m_data_dilation_strides(data_dilation_strides)
, m_requantize(requantize)
{ {
constructor_validate_and_infer_types(); constructor_validate_and_infer_types();
...@@ -45,8 +47,10 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc ...@@ -45,8 +47,10 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc
auto& data_batch_shape = data_batch->get_shape(); auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape(); auto& filters_shape = filters->get_shape();
auto output_et = requantize ? element::i8 : element::i32;
set_output_type(0, set_output_type(0,
element::i8, output_et,
util::infer_convolution_output_shape(this, util::infer_convolution_output_shape(this,
data_batch_shape, data_batch_shape,
filters_shape, filters_shape,
...@@ -76,5 +80,6 @@ shared_ptr<Node> op::QuantizedConvolution::copy_with_new_args(const NodeVector& ...@@ -76,5 +80,6 @@ shared_ptr<Node> op::QuantizedConvolution::copy_with_new_args(const NodeVector&
get_padding_below(), get_padding_below(),
get_padding_above(), get_padding_above(),
get_data_dilation_strides(), get_data_dilation_strides(),
new_args.at(2))); new_args.at(2),
m_requantize));
} }
...@@ -33,7 +33,8 @@ namespace ngraph ...@@ -33,7 +33,8 @@ namespace ngraph
const CoordinateDiff& padding_below, const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above, const CoordinateDiff& padding_above,
const Strides& data_dilation_strides, const Strides& data_dilation_strides,
const std::shared_ptr<Node> scale); const std::shared_ptr<Node> scale,
const bool requantize = true);
const Strides& get_window_movement_strides() const { return m_window_movement_strides; } const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
const Strides& get_window_dilation_strides() const { return m_window_dilation_strides; } const Strides& get_window_dilation_strides() const { return m_window_dilation_strides; }
const CoordinateDiff& get_padding_below() const { return m_padding_below; } const CoordinateDiff& get_padding_below() const { return m_padding_below; }
...@@ -41,6 +42,7 @@ namespace ngraph ...@@ -41,6 +42,7 @@ namespace ngraph
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; } const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
std::shared_ptr<Node> get_filters() { return get_argument(1); } std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); } std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
bool requantize() const { return m_requantize; }
virtual std::shared_ptr<Node> virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override; copy_with_new_args(const NodeVector& new_args) const override;
...@@ -50,6 +52,7 @@ namespace ngraph ...@@ -50,6 +52,7 @@ namespace ngraph
CoordinateDiff m_padding_below; CoordinateDiff m_padding_below;
CoordinateDiff m_padding_above; CoordinateDiff m_padding_above;
Strides m_data_dilation_strides; Strides m_data_dilation_strides;
bool m_requantize;
}; };
} }
} }
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "ngraph/builder/quantization.hpp" #include "ngraph/builder/quantization.hpp"
#include "ngraph/builder/quantization/quantized_linear_convolution.hpp"
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "ngraph/pass/constant_folding.hpp" #include "ngraph/pass/constant_folding.hpp"
...@@ -187,6 +188,39 @@ TEST(builder, scaled_QC) ...@@ -187,6 +188,39 @@ TEST(builder, scaled_QC)
read_vector<int8_t>(result)); read_vector<int8_t>(result));
} }
TEST(builder, scaled_QConvInteger)
{
Shape shape_a{1, 1, 3, 4}; // input shape
Shape shape_b{1, 1, 3, 3}; // filter shape
Shape shape_r{1, 1, 3, 4}; // output shape
vector<uint8_t> a_data = {1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4};
vector<int8_t> b_data = {1, 2, 3, 4, 5, 0, 0, 1, 2};
auto A = make_shared<op::Parameter>(element::u8, shape_a);
auto B = make_shared<op::Parameter>(element::i8, shape_b);
auto CV =
ngraph::builder::quantization::QuantizedConvInteger(A,
B,
Strides{1, 1}, // move_strides
Strides{1, 1}, // filter_dilation
CoordinateDiff{1, 1}, // below_pads
CoordinateDiff{1, 1}, // above_pads
Strides{1, 1}); // data_dilation
auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B});
constant_fold(f);
auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output
auto a = backend->create_tensor(element::u8, shape_a);
copy_data(a, a_data);
auto b = backend->create_tensor(element::i8, shape_b);
copy_data(b, b_data);
auto result = backend->create_tensor(element::i32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
EXPECT_EQ((vector<int32_t>{22, 34, 30, 32, 38, 72, 90, 43, 33, 52, 43, 39}),
read_vector<int32_t>(result));
}
TEST(builder, dynamic_scaled_QC) TEST(builder, dynamic_scaled_QC)
{ {
Shape shape_a{1, 1, 3, 4}; // input shape Shape shape_a{1, 1, 3, 4}; // input shape
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment