Commit ed701920 authored by nishant.b.patel's avatar nishant.b.patel

Change pattern in cpu_fusion to reflect the new QC api

parent 90410792
...@@ -2063,14 +2063,18 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_update_slice() ...@@ -2063,14 +2063,18 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_update_slice()
this->add_matcher(m, callback); this->add_matcher(m, callback);
} }
#if 0
// QuantizedConvolution + Dequantize + Relu -> QuantizedConvolutionRelu + Dequantize // QuantizedConvolution + Dequantize + Relu -> QuantizedConvolutionRelu + Dequantize
void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_bias) void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_bias)
{ {
Shape shape{2, 2, 1, 1}; Shape shape{2, 2, 1, 1};
auto data_batch = std::make_shared<pattern::op::Label>(element::u8, shape); auto data_batch = std::make_shared<pattern::op::Label>(element::u8, shape);
auto filters = std::make_shared<pattern::op::Label>(element::i8, shape); auto filters = std::make_shared<pattern::op::Label>(element::i8, shape);
auto input_scale = std::make_shared<pattern::op::Label>(element::f32, Shape{});
auto filter_scale = std::make_shared<pattern::op::Label>(element::f32, Shape{});
auto output_scale = std::make_shared<pattern::op::Label>(element::f32, Shape{});
auto requantization_scale = std::make_shared<pattern::op::Label>(element::f32, Shape{}); auto requantization_scale = std::make_shared<pattern::op::Label>(element::f32, Shape{});
auto int8_zero = op::Constant::create(element::i8, Shape{}, {0});
auto uint8_zero = op::Constant::create(element::u8, Shape{}, {0});
auto dq_scale = std::make_shared<pattern::op::Label>(element::f32, Shape{}); auto dq_scale = std::make_shared<pattern::op::Label>(element::f32, Shape{});
auto dq_zp = std::make_shared<pattern::op::Label>(element::i8, Shape{}); auto dq_zp = std::make_shared<pattern::op::Label>(element::i8, Shape{});
...@@ -2098,7 +2102,14 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_ ...@@ -2098,7 +2102,14 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
CoordinateDiff{0, 0}, CoordinateDiff{0, 0},
CoordinateDiff{0, 0}, CoordinateDiff{0, 0},
Strides{1, 1}, Strides{1, 1},
requantization_scale); input_scale,
uint8_zero,
filter_scale,
int8_zero,
output_scale,
int8_zero,
element::i8,
AxisSet{});
} }
auto dq = auto dq =
std::make_shared<ngraph::op::Dequantize>(qconv, dq_scale, dq_zp, element::f32, AxisSet{}); std::make_shared<ngraph::op::Dequantize>(qconv, dq_scale, dq_zp, element::f32, AxisSet{});
...@@ -2154,6 +2165,8 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_ ...@@ -2154,6 +2165,8 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
{ {
auto qconv_m = auto qconv_m =
std::static_pointer_cast<ngraph::op::QuantizedConvolution>(dq_m->get_argument(0)); std::static_pointer_cast<ngraph::op::QuantizedConvolution>(dq_m->get_argument(0));
auto requantization_scale =
qconv_m->get_argument(2) * qconv_m->get_argument(4) / qconv_m->get_argument(6);
qconv_n = std::make_shared<ngraph::op::QuantizedConvolutionRelu>( qconv_n = std::make_shared<ngraph::op::QuantizedConvolutionRelu>(
qconv_m->get_argument(0), qconv_m->get_argument(0),
qconv_m->get_argument(1), qconv_m->get_argument(1),
...@@ -2162,7 +2175,7 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_ ...@@ -2162,7 +2175,7 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
qconv_m->get_padding_below(), qconv_m->get_padding_below(),
qconv_m->get_padding_above(), qconv_m->get_padding_above(),
qconv_m->get_data_dilation_strides(), qconv_m->get_data_dilation_strides(),
qconv_m->get_argument(2)); requantization_scale);
} }
auto zp = auto zp =
builder::make_constant<uint8_t>(element::u8, dq_m->get_argument(1)->get_shape(), 0); builder::make_constant<uint8_t>(element::u8, dq_m->get_argument(1)->get_shape(), 0);
...@@ -2183,7 +2196,6 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_ ...@@ -2183,7 +2196,6 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
} }
this->add_matcher(m, callback); this->add_matcher(m, callback);
} }
#endif
// Dequantize + AvgPool -> QuantizedAvgPool + Dequantize // Dequantize + AvgPool -> QuantizedAvgPool + Dequantize
void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qavg_pool() void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qavg_pool()
......
...@@ -117,8 +117,8 @@ public: ...@@ -117,8 +117,8 @@ public:
CPUQuantFusion() CPUQuantFusion()
: GraphRewrite() : GraphRewrite()
{ {
//construct_qconv_relu(true); construct_qconv_relu(true);
//construct_qconv_relu(false); construct_qconv_relu(false);
construct_qavg_pool(); construct_qavg_pool();
construct_qmax_pool(); construct_qmax_pool();
construct_qconcat(); construct_qconcat();
...@@ -128,7 +128,7 @@ public: ...@@ -128,7 +128,7 @@ public:
} }
private: private:
// void construct_qconv_relu(bool with_bias); void construct_qconv_relu(bool with_bias);
void construct_qavg_pool(); void construct_qavg_pool();
void construct_qmax_pool(); void construct_qmax_pool();
void construct_qconcat(); void construct_qconcat();
......
...@@ -3265,7 +3265,6 @@ TEST(cpu_fusion, rnn_input_fusion_inter_vs_cpu) ...@@ -3265,7 +3265,6 @@ TEST(cpu_fusion, rnn_input_fusion_inter_vs_cpu)
} }
} }
#if 0
TEST(cpu_quant_fusion, qconv_relu) TEST(cpu_quant_fusion, qconv_relu)
{ {
auto make_function = []() { auto make_function = []() {
...@@ -3284,7 +3283,6 @@ TEST(cpu_quant_fusion, qconv_relu) ...@@ -3284,7 +3283,6 @@ TEST(cpu_quant_fusion, qconv_relu)
input, input_scale, uint8_zero, element::u8, AxisSet{}, round_mode); input, input_scale, uint8_zero, element::u8, AxisSet{}, round_mode);
auto q_weights = std::make_shared<op::Quantize>( auto q_weights = std::make_shared<op::Quantize>(
weights, weights_scale, int8_zero, element::i8, AxisSet{}, round_mode); weights, weights_scale, int8_zero, element::i8, AxisSet{}, round_mode);
auto requant_scale = (input_scale * weights_scale) / output_scale;
auto conv = std::make_shared<op::QuantizedConvolution>(q_input, auto conv = std::make_shared<op::QuantizedConvolution>(q_input,
q_weights, q_weights,
Strides{1, 1}, Strides{1, 1},
...@@ -3292,7 +3290,14 @@ TEST(cpu_quant_fusion, qconv_relu) ...@@ -3292,7 +3290,14 @@ TEST(cpu_quant_fusion, qconv_relu)
CoordinateDiff{0, 0}, CoordinateDiff{0, 0},
CoordinateDiff{0, 0}, CoordinateDiff{0, 0},
Strides{1, 1}, Strides{1, 1},
requant_scale); input_scale,
uint8_zero,
weights_scale,
int8_zero,
output_scale,
int8_zero,
element::i8,
AxisSet{});
auto dq = std::make_shared<op::Dequantize>( auto dq = std::make_shared<op::Dequantize>(
conv, output_scale, int8_zero, element::f32, AxisSet{}); conv, output_scale, int8_zero, element::f32, AxisSet{});
auto relu = std::make_shared<op::Relu>(dq); auto relu = std::make_shared<op::Relu>(dq);
...@@ -3322,7 +3327,7 @@ TEST(cpu_quant_fusion, qconv_relu) ...@@ -3322,7 +3327,7 @@ TEST(cpu_quant_fusion, qconv_relu)
// Expected output - [2, 2, ...] // Expected output - [2, 2, ...]
EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0))); EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0)));
} }
#endif
TEST(cpu_quant_fusion, qconvb_relu) TEST(cpu_quant_fusion, qconvb_relu)
{ {
auto make_function = []() { auto make_function = []() {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment