Commit b8106133 authored by Jayaram Bobba's avatar Jayaram Bobba Committed by Scott Cyphers

More quantized fusion patterns (#2480)

* Add QuantizedConcat

* Remove unused variables and add check for size of mins and maxes vector

* Resolve conflicts

* Merged with master and addressed some PR feedback

* Maxpool and Avgpool fusions. Exclude Q from conv+relu fusion

* Remove single-user check from fusions

* Quantized concat fusion

* workaround: do reshape sinking by default

* style fix

* check scales for QuantizedConcat

* use compare_constants

* remove stale comment

* Handle all concat cases from arg size 2 to 6

* addressed feedback
parent 3863180d
This diff is collapsed.
...@@ -111,12 +111,18 @@ public: ...@@ -111,12 +111,18 @@ public:
{ {
construct_qconv_relu(true); construct_qconv_relu(true);
construct_qconv_relu(false); construct_qconv_relu(false);
construct_qavg_pool();
construct_qmax_pool();
construct_qconcat();
construct_qconvb_add(); construct_qconvb_add();
construct_dq_q(); construct_dq_q();
} }
private: private:
void construct_qconv_relu(bool with_bias); void construct_qconv_relu(bool with_bias);
void construct_qavg_pool();
void construct_qmax_pool();
void construct_qconcat();
void construct_dq_q(); void construct_dq_q();
void construct_qconvb_add(); void construct_qconvb_add();
}; };
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "ngraph/op/batch_norm.hpp" #include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/concat.hpp" #include "ngraph/op/concat.hpp"
#include "ngraph/op/dequantize.hpp" #include "ngraph/op/dequantize.hpp"
#include "ngraph/op/experimental/quantized_concat.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp" #include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp" #include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/get_output_element.hpp" #include "ngraph/op/get_output_element.hpp"
...@@ -3604,6 +3605,134 @@ TEST(cpu_quant_fusion, qconvb_relu) ...@@ -3604,6 +3605,134 @@ TEST(cpu_quant_fusion, qconvb_relu)
EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0))); EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0)));
} }
TEST(cpu_quant_fusion, qavg_pool)
{
auto make_function = []() {
Shape shape_input{1, 2, 4, 4};
auto input = std::make_shared<op::Parameter>(element::f32, shape_input);
auto input_scale = op::Constant::create(element::f32, Shape{}, {2.0f});
auto weights_scale = op::Constant::create(element::f32, Shape{}, {2.0f});
auto int8_zero = op::Constant::create(element::i8, Shape{}, {0});
auto uint8_zero = op::Constant::create(element::u8, Shape{}, {0});
op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
auto q_input = std::make_shared<op::Quantize>(
input, input_scale, uint8_zero, element::u8, AxisSet{}, round_mode);
auto dq = std::make_shared<op::Dequantize>(
q_input, input_scale, uint8_zero, element::f32, AxisSet{});
auto avg_pool = std::make_shared<op::AvgPool>(dq, Shape{2, 2});
return make_shared<Function>(NodeVector{avg_pool}, ParameterVector{input});
};
auto cpu_f1 = make_function();
auto cpu_f2 = make_function();
test::Uniform<float> rng(4.0f, 4.0f);
vector<vector<float>> args;
for (shared_ptr<op::Parameter> param : cpu_f1->get_parameters())
{
vector<float> tensor_val(shape_size(param->get_shape()));
rng.initialize(tensor_val);
args.push_back(tensor_val);
}
set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:0", 1);
auto cpu1_results = execute(cpu_f1, args, "CPU");
set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:1", 1);
auto cpu2_results = execute(cpu_f2, args, "CPU");
EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0)));
}
TEST(cpu_quant_fusion, qmax_pool)
{
auto make_function = []() {
Shape shape_input{1, 2, 4, 4};
auto input = std::make_shared<op::Parameter>(element::f32, shape_input);
auto input_scale = op::Constant::create(element::f32, Shape{}, {2.0f});
auto weights_scale = op::Constant::create(element::f32, Shape{}, {2.0f});
auto int8_zero = op::Constant::create(element::i8, Shape{}, {0});
auto uint8_zero = op::Constant::create(element::u8, Shape{}, {0});
op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
auto q_input = std::make_shared<op::Quantize>(
input, input_scale, uint8_zero, element::u8, AxisSet{}, round_mode);
auto dq = std::make_shared<op::Dequantize>(
q_input, input_scale, uint8_zero, element::f32, AxisSet{});
auto maxpool = std::make_shared<op::MaxPool>(dq, Shape{2, 2});
return make_shared<Function>(NodeVector{maxpool}, ParameterVector{input});
};
auto cpu_f1 = make_function();
auto cpu_f2 = make_function();
test::Uniform<float> rng(1.0f, 10.0f);
vector<vector<float>> args;
for (shared_ptr<op::Parameter> param : cpu_f1->get_parameters())
{
vector<float> tensor_val(shape_size(param->get_shape()));
rng.initialize(tensor_val);
args.push_back(tensor_val);
}
set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:0", 1);
auto cpu1_results = execute(cpu_f1, args, "CPU");
set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:1", 1);
auto cpu2_results = execute(cpu_f2, args, "CPU");
EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0)));
}
TEST(cpu_quant_fusion, qconcat)
{
auto make_function = []() {
auto get_input_slice = [](std::shared_ptr<op::Parameter>& input) {
auto input_scale = op::Constant::create(element::f32, Shape{}, {2.0f});
auto int8_zero = op::Constant::create(element::i8, Shape{}, {0});
auto uint8_zero = op::Constant::create(element::u8, Shape{}, {0});
op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
auto q_input = std::make_shared<op::Quantize>(
input, input_scale, uint8_zero, element::u8, AxisSet{}, round_mode);
auto dq = std::make_shared<op::Dequantize>(
q_input, input_scale, uint8_zero, element::f32, AxisSet{});
return dq;
};
NodeVector concat_inputs, concats;
ParameterVector inputs;
Shape shape_input{1, 2, 4, 4};
inputs.push_back(std::make_shared<op::Parameter>(element::f32, shape_input));
concat_inputs.push_back(get_input_slice(inputs.back()));
// Concat2 -- Concat7
for (size_t i = 0; i < 6; i++)
{
inputs.push_back(std::make_shared<op::Parameter>(element::f32, shape_input));
concat_inputs.push_back(get_input_slice(inputs.back()));
concats.push_back(std::make_shared<op::Concat>(concat_inputs, 0));
}
return make_shared<Function>(concats, inputs);
};
auto cpu_f1 = make_function();
auto cpu_f2 = make_function();
test::Uniform<float> rng(2.0f, 2.0f);
vector<vector<float>> args;
for (shared_ptr<op::Parameter> param : cpu_f1->get_parameters())
{
vector<float> tensor_val(shape_size(param->get_shape()));
rng.initialize(tensor_val);
args.push_back(tensor_val);
}
set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:0", 1);
auto cpu1_results = execute(cpu_f1, args, "CPU");
set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:1", 1);
auto cpu2_results = execute(cpu_f2, args, "CPU");
// Expect Concat2 -- Concat6 to be fused and not Concat7
ASSERT_EQ(count_ops_of_type<op::QuantizedConcat>(cpu_f2), 5);
EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0)));
}
TEST(cpu_quant_fusion, dq_q) TEST(cpu_quant_fusion, dq_q)
{ {
auto make_function = [](bool match_scales = true, bool match_et = true) { auto make_function = [](bool match_scales = true, bool match_et = true) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment