Commit ed701920 authored by nishant.b.patel's avatar nishant.b.patel

Change pattern in cpu_fusion to reflect the new QC api

parent 90410792
......@@ -2063,14 +2063,18 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_update_slice()
this->add_matcher(m, callback);
}
#if 0
// QuantizedConvolution + Dequantize + Relu -> QuantizedConvolutionRelu + Dequantize
void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_bias)
{
Shape shape{2, 2, 1, 1};
auto data_batch = std::make_shared<pattern::op::Label>(element::u8, shape);
auto filters = std::make_shared<pattern::op::Label>(element::i8, shape);
auto input_scale = std::make_shared<pattern::op::Label>(element::f32, Shape{});
auto filter_scale = std::make_shared<pattern::op::Label>(element::f32, Shape{});
auto output_scale = std::make_shared<pattern::op::Label>(element::f32, Shape{});
auto requantization_scale = std::make_shared<pattern::op::Label>(element::f32, Shape{});
auto int8_zero = op::Constant::create(element::i8, Shape{}, {0});
auto uint8_zero = op::Constant::create(element::u8, Shape{}, {0});
auto dq_scale = std::make_shared<pattern::op::Label>(element::f32, Shape{});
auto dq_zp = std::make_shared<pattern::op::Label>(element::i8, Shape{});
......@@ -2098,7 +2102,14 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1},
requantization_scale);
input_scale,
uint8_zero,
filter_scale,
int8_zero,
output_scale,
int8_zero,
element::i8,
AxisSet{});
}
auto dq =
std::make_shared<ngraph::op::Dequantize>(qconv, dq_scale, dq_zp, element::f32, AxisSet{});
......@@ -2154,6 +2165,8 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
{
auto qconv_m =
std::static_pointer_cast<ngraph::op::QuantizedConvolution>(dq_m->get_argument(0));
auto requantization_scale =
qconv_m->get_argument(2) * qconv_m->get_argument(4) / qconv_m->get_argument(6);
qconv_n = std::make_shared<ngraph::op::QuantizedConvolutionRelu>(
qconv_m->get_argument(0),
qconv_m->get_argument(1),
......@@ -2162,7 +2175,7 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
qconv_m->get_padding_below(),
qconv_m->get_padding_above(),
qconv_m->get_data_dilation_strides(),
qconv_m->get_argument(2));
requantization_scale);
}
auto zp =
builder::make_constant<uint8_t>(element::u8, dq_m->get_argument(1)->get_shape(), 0);
......@@ -2183,7 +2196,6 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
}
this->add_matcher(m, callback);
}
#endif
// Dequantize + AvgPool -> QuantizedAvgPool + Dequantize
void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qavg_pool()
......
......@@ -117,8 +117,8 @@ public:
CPUQuantFusion()
: GraphRewrite()
{
//construct_qconv_relu(true);
//construct_qconv_relu(false);
construct_qconv_relu(true);
construct_qconv_relu(false);
construct_qavg_pool();
construct_qmax_pool();
construct_qconcat();
......@@ -128,7 +128,7 @@ public:
}
private:
// void construct_qconv_relu(bool with_bias);
void construct_qconv_relu(bool with_bias);
void construct_qavg_pool();
void construct_qmax_pool();
void construct_qconcat();
......
......@@ -3265,7 +3265,6 @@ TEST(cpu_fusion, rnn_input_fusion_inter_vs_cpu)
}
}
#if 0
TEST(cpu_quant_fusion, qconv_relu)
{
auto make_function = []() {
......@@ -3284,7 +3283,6 @@ TEST(cpu_quant_fusion, qconv_relu)
input, input_scale, uint8_zero, element::u8, AxisSet{}, round_mode);
auto q_weights = std::make_shared<op::Quantize>(
weights, weights_scale, int8_zero, element::i8, AxisSet{}, round_mode);
auto requant_scale = (input_scale * weights_scale) / output_scale;
auto conv = std::make_shared<op::QuantizedConvolution>(q_input,
q_weights,
Strides{1, 1},
......@@ -3292,7 +3290,14 @@ TEST(cpu_quant_fusion, qconv_relu)
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1},
requant_scale);
input_scale,
uint8_zero,
weights_scale,
int8_zero,
output_scale,
int8_zero,
element::i8,
AxisSet{});
auto dq = std::make_shared<op::Dequantize>(
conv, output_scale, int8_zero, element::f32, AxisSet{});
auto relu = std::make_shared<op::Relu>(dq);
......@@ -3322,7 +3327,7 @@ TEST(cpu_quant_fusion, qconv_relu)
// Expected output - [2, 2, ...]
EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0)));
}
#endif
TEST(cpu_quant_fusion, qconvb_relu)
{
auto make_function = []() {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment