Change pattern in cpu_fusion to reflect the new QC api

ed701920 · nishant.b.patel · 90410792 · ed701920 · ed701920 · ed701920
Commit ed701920 authored May 22, 2019 by nishant.b.patel
Hide whitespace changes
Inline Side-by-side

Showing with 28 additions and 11 deletions

cpu_fusion.cpp src/ngraph/runtime/cpu/pass/cpu_fusion.cpp +16 -4

cpu_fusion.hpp src/ngraph/runtime/cpu/pass/cpu_fusion.hpp +3 -3

cpu_fusion.cpp test/cpu_fusion.cpp +9 -4

No files found.
--- a/src/ngraph/runtime/cpu/pass/cpu_fusion.cpp
+++ b/src/ngraph/runtime/cpu/pass/cpu_fusion.cpp
@@ -2063,14 +2063,18 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_update_slice()
    this->add_matcher(m, callback);
 }
-#if 0
 // QuantizedConvolution + Dequantize + Relu -> QuantizedConvolutionRelu + Dequantize
 void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_bias)
 {
    Shape shape{2, 2, 1, 1};
    auto data_batch = std::make_shared<pattern::op::Label>(element::u8, shape);
    auto filters = std::make_shared<pattern::op::Label>(element::i8, shape);
+    auto input_scale = std::make_shared<pattern::op::Label>(element::f32, Shape{});
+    auto filter_scale = std::make_shared<pattern::op::Label>(element::f32, Shape{});
+    auto output_scale = std::make_shared<pattern::op::Label>(element::f32, Shape{});
    auto requantization_scale = std::make_shared<pattern::op::Label>(element::f32, Shape{});
+    auto int8_zero = op::Constant::create(element::i8, Shape{}, {0});
+    auto uint8_zero = op::Constant::create(element::u8, Shape{}, {0});
    auto dq_scale = std::make_shared<pattern::op::Label>(element::f32, Shape{});
    auto dq_zp = std::make_shared<pattern::op::Label>(element::i8, Shape{});
@@ -2098,7 +2102,14 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
                                                                   CoordinateDiff{0, 0},
                                                                   CoordinateDiff{0, 0},
                                                                   Strides{1, 1},
-                                                                   requantization_scale);
+                                                                   input_scale,
+                                                                   uint8_zero,
+                                                                   filter_scale,
+                                                                   int8_zero,
+                                                                   output_scale,
+                                                                   int8_zero,
+                                                                   element::i8,
+                                                                   AxisSet{});
    }
    auto dq =
        std::make_shared<ngraph::op::Dequantize>(qconv, dq_scale, dq_zp, element::f32, AxisSet{});
@@ -2154,6 +2165,8 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
        {
            auto qconv_m =
                std::static_pointer_cast<ngraph::op::QuantizedConvolution>(dq_m->get_argument(0));
+            auto requantization_scale =
+                qconv_m->get_argument(2) * qconv_m->get_argument(4) / qconv_m->get_argument(6);
            qconv_n = std::make_shared<ngraph::op::QuantizedConvolutionRelu>(
                qconv_m->get_argument(0),
                qconv_m->get_argument(1),
@@ -2162,7 +2175,7 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
                qconv_m->get_padding_below(),
                qconv_m->get_padding_above(),
                qconv_m->get_data_dilation_strides(),
-                qconv_m->get_argument(2));
+                requantization_scale);
        }
        auto zp =
            builder::make_constant<uint8_t>(element::u8, dq_m->get_argument(1)->get_shape(), 0);
@@ -2183,7 +2196,6 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
    }
    this->add_matcher(m, callback);
 }
-#endif
 // Dequantize + AvgPool -> QuantizedAvgPool + Dequantize
 void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qavg_pool()

--- a/src/ngraph/runtime/cpu/pass/cpu_fusion.hpp
+++ b/src/ngraph/runtime/cpu/pass/cpu_fusion.hpp
@@ -117,8 +117,8 @@ public:
    CPUQuantFusion()
        : GraphRewrite()
    {
-        //construct_qconv_relu(true);
+        construct_qconv_relu(true);
-        //construct_qconv_relu(false);
+        construct_qconv_relu(false);
        construct_qavg_pool();
        construct_qmax_pool();
        construct_qconcat();
@@ -128,7 +128,7 @@ public:
    }
 private:
-    //    void construct_qconv_relu(bool with_bias);
+    void construct_qconv_relu(bool with_bias);
    void construct_qavg_pool();
    void construct_qmax_pool();
    void construct_qconcat();

--- a/test/cpu_fusion.cpp
+++ b/test/cpu_fusion.cpp
@@ -3265,7 +3265,6 @@ TEST(cpu_fusion, rnn_input_fusion_inter_vs_cpu)
    }
 }
-#if 0
 TEST(cpu_quant_fusion, qconv_relu)
 {
    auto make_function = []() {
@@ -3284,7 +3283,6 @@ TEST(cpu_quant_fusion, qconv_relu)
            input, input_scale, uint8_zero, element::u8, AxisSet{}, round_mode);
        auto q_weights = std::make_shared<op::Quantize>(
            weights, weights_scale, int8_zero, element::i8, AxisSet{}, round_mode);
-        auto requant_scale = (input_scale * weights_scale) / output_scale;
        auto conv = std::make_shared<op::QuantizedConvolution>(q_input,
                                                               q_weights,
                                                               Strides{1, 1},
@@ -3292,7 +3290,14 @@ TEST(cpu_quant_fusion, qconv_relu)
                                                               CoordinateDiff{0, 0},
                                                               CoordinateDiff{0, 0},
                                                               Strides{1, 1},
-                                                               requant_scale);
+                                                               input_scale,
+                                                               uint8_zero,
+                                                               weights_scale,
+                                                               int8_zero,
+                                                               output_scale,
+                                                               int8_zero,
+                                                               element::i8,
+                                                               AxisSet{});
        auto dq = std::make_shared<op::Dequantize>(
            conv, output_scale, int8_zero, element::f32, AxisSet{});
        auto relu = std::make_shared<op::Relu>(dq);
@@ -3322,7 +3327,7 @@ TEST(cpu_quant_fusion, qconv_relu)
    // Expected output - [2, 2, ...]
    EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0)));
 }
-#endif
 TEST(cpu_quant_fusion, qconvb_relu)
 {
    auto make_function = []() {