More quantized fusion patterns (#2480)

* Add QuantizedConcat * Remove unused variables and add check for size of mins and maxes vector * Resolve conflicts * Merged with master and addressed some PR feedback * Maxpool and Avgpool fusions. Exclude Q from conv+relu fusion * Remove single-user check from fusions * Quantized concat fusion * workaround: do reshape sinking by default * style fix * check scales for QuantizedConcat * use compare_constants * remove stale comment * Handle all concat cases from arg size 2 to 6 * addressed feedback

More quantized fusion patterns (#2480)
* Add QuantizedConcat * Remove unused variables and add check for size of mins and maxes vector * Resolve conflicts * Merged with master and addressed some PR feedback * Maxpool and Avgpool fusions. Exclude Q from conv+relu fusion * Remove single-user check from fusions * Quantized concat fusion * workaround: do reshape sinking by default * style fix * check scales for QuantizedConcat * use compare_constants * remove stale comment * Handle all concat cases from arg size 2 to 6 * addressed feedback
b8106133 · Jayaram Bobba · Scott Cyphers · 3863180d · b8106133 · b8106133
Commit b8106133 authored Feb 26, 2019 by Jayaram Bobba Committed by Scott Cyphers Feb 26, 2019
Showing with 135 additions and 0 deletions

cpu_fusion.cpp src/ngraph/runtime/cpu/pass/cpu_fusion.cpp +0 -0

cpu_fusion.hpp src/ngraph/runtime/cpu/pass/cpu_fusion.hpp +6 -0

cpu_fusion.cpp test/cpu_fusion.cpp +129 -0

No files found.
--- a/src/ngraph/runtime/cpu/pass/cpu_fusion.cpp
+++ b/src/ngraph/runtime/cpu/pass/cpu_fusion.cpp
--- a/src/ngraph/runtime/cpu/pass/cpu_fusion.hpp
+++ b/src/ngraph/runtime/cpu/pass/cpu_fusion.hpp
@@ -111,12 +111,18 @@ public:
    {
        construct_qconv_relu(true);
        construct_qconv_relu(false);
+        construct_qavg_pool();
+        construct_qmax_pool();
+        construct_qconcat();
        construct_qconvb_add();
        construct_dq_q();
    }
 private:
    void construct_qconv_relu(bool with_bias);
+    void construct_qavg_pool();
+    void construct_qmax_pool();
+    void construct_qconcat();
    void construct_dq_q();
    void construct_qconvb_add();
 };
--- a/test/cpu_fusion.cpp
+++ b/test/cpu_fusion.cpp
@@ -30,6 +30,7 @@
 #include "ngraph/op/batch_norm.hpp"
 #include "ngraph/op/concat.hpp"
 #include "ngraph/op/dequantize.hpp"
+#include "ngraph/op/experimental/quantized_concat.hpp"
 #include "ngraph/op/experimental/quantized_conv.hpp"
 #include "ngraph/op/experimental/quantized_conv_bias.hpp"
 #include "ngraph/op/get_output_element.hpp"
@@ -3604,6 +3605,134 @@ TEST(cpu_quant_fusion, qconvb_relu)
    EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0)));
 }
+TEST(cpu_quant_fusion, qavg_pool)
+{
+    auto make_function = []() {
+        Shape shape_input{1, 2, 4, 4};
+        auto input = std::make_shared<op::Parameter>(element::f32, shape_input);
+        auto input_scale = op::Constant::create(element::f32, Shape{}, {2.0f});
+        auto weights_scale = op::Constant::create(element::f32, Shape{}, {2.0f});
+        auto int8_zero = op::Constant::create(element::i8, Shape{}, {0});
+        auto uint8_zero = op::Constant::create(element::u8, Shape{}, {0});
+        op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
+        auto q_input = std::make_shared<op::Quantize>(
+            input, input_scale, uint8_zero, element::u8, AxisSet{}, round_mode);
+        auto dq = std::make_shared<op::Dequantize>(
+            q_input, input_scale, uint8_zero, element::f32, AxisSet{});
+        auto avg_pool = std::make_shared<op::AvgPool>(dq, Shape{2, 2});
+        return make_shared<Function>(NodeVector{avg_pool}, ParameterVector{input});
+    };
+    auto cpu_f1 = make_function();
+    auto cpu_f2 = make_function();
+    test::Uniform<float> rng(4.0f, 4.0f);
+    vector<vector<float>> args;
+    for (shared_ptr<op::Parameter> param : cpu_f1->get_parameters())
+    {
+        vector<float> tensor_val(shape_size(param->get_shape()));
+        rng.initialize(tensor_val);
+        args.push_back(tensor_val);
+    }
+    set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:0", 1);
+    auto cpu1_results = execute(cpu_f1, args, "CPU");
+    set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:1", 1);
+    auto cpu2_results = execute(cpu_f2, args, "CPU");
+    EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0)));
+}
+TEST(cpu_quant_fusion, qmax_pool)
+{
+    auto make_function = []() {
+        Shape shape_input{1, 2, 4, 4};
+        auto input = std::make_shared<op::Parameter>(element::f32, shape_input);
+        auto input_scale = op::Constant::create(element::f32, Shape{}, {2.0f});
+        auto weights_scale = op::Constant::create(element::f32, Shape{}, {2.0f});
+        auto int8_zero = op::Constant::create(element::i8, Shape{}, {0});
+        auto uint8_zero = op::Constant::create(element::u8, Shape{}, {0});
+        op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
+        auto q_input = std::make_shared<op::Quantize>(
+            input, input_scale, uint8_zero, element::u8, AxisSet{}, round_mode);
+        auto dq = std::make_shared<op::Dequantize>(
+            q_input, input_scale, uint8_zero, element::f32, AxisSet{});
+        auto maxpool = std::make_shared<op::MaxPool>(dq, Shape{2, 2});
+        return make_shared<Function>(NodeVector{maxpool}, ParameterVector{input});
+    };
+    auto cpu_f1 = make_function();
+    auto cpu_f2 = make_function();
+    test::Uniform<float> rng(1.0f, 10.0f);
+    vector<vector<float>> args;
+    for (shared_ptr<op::Parameter> param : cpu_f1->get_parameters())
+    {
+        vector<float> tensor_val(shape_size(param->get_shape()));
+        rng.initialize(tensor_val);
+        args.push_back(tensor_val);
+    }
+    set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:0", 1);
+    auto cpu1_results = execute(cpu_f1, args, "CPU");
+    set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:1", 1);
+    auto cpu2_results = execute(cpu_f2, args, "CPU");
+    EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0)));
+}
+TEST(cpu_quant_fusion, qconcat)
+{
+    auto make_function = []() {
+        auto get_input_slice = [](std::shared_ptr<op::Parameter>& input) {
+            auto input_scale = op::Constant::create(element::f32, Shape{}, {2.0f});
+            auto int8_zero = op::Constant::create(element::i8, Shape{}, {0});
+            auto uint8_zero = op::Constant::create(element::u8, Shape{}, {0});
+            op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
+            auto q_input = std::make_shared<op::Quantize>(
+                input, input_scale, uint8_zero, element::u8, AxisSet{}, round_mode);
+            auto dq = std::make_shared<op::Dequantize>(
+                q_input, input_scale, uint8_zero, element::f32, AxisSet{});
+            return dq;
+        };
+        NodeVector concat_inputs, concats;
+        ParameterVector inputs;
+        Shape shape_input{1, 2, 4, 4};
+        inputs.push_back(std::make_shared<op::Parameter>(element::f32, shape_input));
+        concat_inputs.push_back(get_input_slice(inputs.back()));
+        // Concat2  -- Concat7
+        for (size_t i = 0; i < 6; i++)
+        {
+            inputs.push_back(std::make_shared<op::Parameter>(element::f32, shape_input));
+            concat_inputs.push_back(get_input_slice(inputs.back()));
+            concats.push_back(std::make_shared<op::Concat>(concat_inputs, 0));
+        }
+        return make_shared<Function>(concats, inputs);
+    };
+    auto cpu_f1 = make_function();
+    auto cpu_f2 = make_function();
+    test::Uniform<float> rng(2.0f, 2.0f);
+    vector<vector<float>> args;
+    for (shared_ptr<op::Parameter> param : cpu_f1->get_parameters())
+    {
+        vector<float> tensor_val(shape_size(param->get_shape()));
+        rng.initialize(tensor_val);
+        args.push_back(tensor_val);
+    }
+    set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:0", 1);
+    auto cpu1_results = execute(cpu_f1, args, "CPU");
+    set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:1", 1);
+    auto cpu2_results = execute(cpu_f2, args, "CPU");
+    // Expect Concat2 -- Concat6 to be fused and not Concat7
+    ASSERT_EQ(count_ops_of_type<op::QuantizedConcat>(cpu_f2), 5);
+    EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0)));
+}
 TEST(cpu_quant_fusion, dq_q)
 {
    auto make_function = [](bool match_scales = true, bool match_et = true) {