Commit b9dc7fa9 authored by Amy Zhuang's avatar Amy Zhuang Committed by Scott Cyphers

Modify QuantizedConvolutionBias(Signed)Add fusion. (#3090)

* Modify QuantizedConvolutionBias(Signed)Add fusion.

* Add unit test.
parent 860d1e3a
...@@ -2222,6 +2222,32 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconvb_add() ...@@ -2222,6 +2222,32 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconvb_add()
std::dynamic_pointer_cast<ngraph::op::Add>(m.get_match_root()->get_argument(0)); std::dynamic_pointer_cast<ngraph::op::Add>(m.get_match_root()->get_argument(0));
auto dq_l_m = std::dynamic_pointer_cast<ngraph::op::Dequantize>(pattern_map[dq_l_label]); auto dq_l_m = std::dynamic_pointer_cast<ngraph::op::Dequantize>(pattern_map[dq_l_label]);
auto dq_r_m = std::dynamic_pointer_cast<ngraph::op::Dequantize>(pattern_map[dq_r_label]); auto dq_r_m = std::dynamic_pointer_cast<ngraph::op::Dequantize>(pattern_map[dq_r_label]);
// both left and right are QuantizedConvolutionBias
if (dq_r_m->get_argument(0)->description() == "QuantizedConvolutionBias")
{
for (auto user : m.get_match_root()->get_users())
{
auto q_m = std::dynamic_pointer_cast<ngraph::op::Quantize>(user);
if (q_m)
{
auto q_m_scale = q_m->get_argument(1);
auto dq_l_m_scale = dq_l_m->get_argument(1);
auto dq_r_m_scale = dq_r_m->get_argument(1);
if (!ngraph::compare_constants(q_m_scale, dq_l_m_scale) &&
ngraph::compare_constants(q_m_scale, dq_r_m_scale))
{
NGRAPH_DEBUG << "Scales of Q and DQ of right branch match";
// switch left and right branch
auto temp = dq_l_m;
dq_l_m = dq_r_m;
dq_r_m = temp;
}
break;
}
}
}
auto qconv = auto qconv =
std::static_pointer_cast<ngraph::op::QuantizedConvolutionBias>(dq_l_m->get_argument(0)); std::static_pointer_cast<ngraph::op::QuantizedConvolutionBias>(dq_l_m->get_argument(0));
auto inplace_input = dq_r_m->get_argument(0); auto inplace_input = dq_r_m->get_argument(0);
......
...@@ -3683,6 +3683,120 @@ TEST(cpu_quant_fusion, qconvba) ...@@ -3683,6 +3683,120 @@ TEST(cpu_quant_fusion, qconvba)
EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0))); EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0)));
} }
TEST(cpu_quant_fusion, qconvba_q)
{
auto make_function = []() {
Shape shape_input{1, 2, 2, 2};
Shape shape_weights{1, 2, 1, 1};
Shape shape_summand{1, 1, 2, 2};
auto input_l = std::make_shared<op::Parameter>(element::f32, shape_input);
auto weights_l = std::make_shared<op::Parameter>(element::f32, shape_weights);
auto bias_l = std::make_shared<op::Parameter>(element::f32, Shape{shape_weights[0]});
auto input_r = std::make_shared<op::Parameter>(element::f32, shape_input);
auto weights_r = std::make_shared<op::Parameter>(element::f32, shape_weights);
auto bias_r = std::make_shared<op::Parameter>(element::f32, Shape{shape_weights[0]});
auto input_scale_l = op::Constant::create(element::f32, Shape{}, {2.0f});
auto weights_scale_l = op::Constant::create(element::f32, Shape{}, {2.0f});
auto output_scale_l = op::Constant::create(element::f32, Shape{}, {4.0f});
auto input_scale_r = op::Constant::create(element::f32, Shape{}, {5.0f});
auto weights_scale_r = op::Constant::create(element::f32, Shape{}, {5.0f});
auto output_scale_r = op::Constant::create(element::f32, Shape{}, {20.0f});
auto int8_zero = op::Constant::create(element::i8, Shape{}, {0});
auto int32_zero = op::Constant::create(element::i32, Shape{}, {0});
auto uint8_zero = op::Constant::create(element::u8, Shape{}, {0});
op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
auto q_input_l = std::make_shared<op::Quantize>(
input_l, input_scale_l, uint8_zero, element::u8, AxisSet{}, round_mode);
auto q_weights_l = std::make_shared<op::Quantize>(
weights_l, weights_scale_l, int8_zero, element::i8, AxisSet{}, round_mode);
auto q_bias_l = std::make_shared<op::Quantize>(bias_l,
input_scale_l * weights_scale_l,
int32_zero,
element::i32,
AxisSet{},
round_mode);
auto q_input_r = std::make_shared<op::Quantize>(
input_r, input_scale_r, uint8_zero, element::u8, AxisSet{}, round_mode);
auto q_weights_r = std::make_shared<op::Quantize>(
weights_r, weights_scale_r, int8_zero, element::i8, AxisSet{}, round_mode);
auto q_bias_r = std::make_shared<op::Quantize>(bias_r,
input_scale_r * weights_scale_r,
int32_zero,
element::i32,
AxisSet{},
round_mode);
// Left Graph
auto requant_scale_l = (input_scale_l * weights_scale_l) / output_scale_l;
auto conv_l = std::make_shared<op::QuantizedConvolutionBias>(q_input_l,
q_weights_l,
q_bias_l,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1},
requant_scale_l);
auto dq_l = std::make_shared<op::Dequantize>(
conv_l, output_scale_l, int8_zero, element::f32, AxisSet{});
auto r_l = std::make_shared<op::Reshape>(dq_l, AxisVector{0, 1, 2, 3}, Shape{1, 2, 2});
auto b_l = std::make_shared<op::Broadcast>(r_l, Shape{1, 1, 2, 2}, AxisSet{0});
// Right Graph
auto requant_scale_r = (input_scale_r * weights_scale_r) / output_scale_r;
auto conv_r = std::make_shared<op::QuantizedConvolutionBias>(q_input_r,
q_weights_r,
q_bias_r,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1},
requant_scale_r);
auto dq_r = std::make_shared<op::Dequantize>(
conv_r, output_scale_r, int8_zero, element::f32, AxisSet{});
auto r_r = std::make_shared<op::Reshape>(dq_r, AxisVector{0, 1, 2, 3}, Shape{1, 2, 2});
auto b_r = std::make_shared<op::Broadcast>(r_r, Shape{1, 1, 2, 2}, AxisSet{0});
auto add = b_l + b_r;
auto relu = std::make_shared<op::Relu>(add);
auto q = std::make_shared<op::Quantize>(
relu, output_scale_r, uint8_zero, element::u8, AxisSet{}, round_mode);
auto dq = std::make_shared<op::Dequantize>(
q, output_scale_r, uint8_zero, element::f32, AxisSet{});
return make_shared<Function>(
NodeVector{dq},
ParameterVector{input_l, weights_l, bias_l, input_r, weights_r, bias_r});
};
auto cpu_f1 = make_function();
auto cpu_f2 = make_function();
test::Uniform<float> rng(2.0f, 2.0f);
vector<vector<float>> args;
for (shared_ptr<op::Parameter> param : cpu_f1->get_parameters())
{
vector<float> tensor_val(shape_size(param->get_shape()));
rng.initialize(tensor_val);
args.push_back(tensor_val);
}
// Disable CPUQuantFusion
set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:0", 1);
auto cpu1_results = execute(cpu_f1, args, "CPU");
// Enable CPUQuantFusion
set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:1", 1);
auto cpu2_results = execute(cpu_f2, args, "CPU");
EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0)));
auto backend = runtime::Backend::create("CPU");
auto fuse = make_function();
backend->compile(fuse);
ASSERT_EQ(count_ops_of_type<op::Quantize>(fuse), 6);
}
#ifndef NGRAPH_JSON_DISABLE #ifndef NGRAPH_JSON_DISABLE
// Tests that rely on deserializing json files // Tests that rely on deserializing json files
TEST(cpu_fusion, fuse_conv_bias) TEST(cpu_fusion, fuse_conv_bias)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment