Merge branch 'master' into pruthvi/mutex_for_cpu_backend

0028f173 · Scott Cyphers · GitHub · 351d09c0 · b9dc7fa9 · 0028f173
Unverified Commit 0028f173 authored Jun 19, 2019 by Scott Cyphers Committed by GitHub Jun 19, 2019
5 changed files
--- a/python/test/conftest.py
+++ b/python/test/conftest.py
@@ -19,7 +19,7 @@ import test
 def pytest_addoption(parser):
    parser.addoption('--backend', default='INTERPRETER',
-                     choices=['INTERPRETER', 'CPU', 'GPU', 'NNP', 'PlaidML'],
+                     choices=['INTERPRETER', 'CPU', 'GPU', 'NNP', 'PlaidML', 'INTELGPU'],
                     help='Select from available backends')
@@ -31,20 +31,25 @@ def pytest_configure(config):
 def pytest_collection_modifyitems(config, items):
    backend_name = config.getvalue('backend')
-    gpu_skip = pytest.mark.skip(reason='Skipping test on the GPU backend.')
+    keywords = {
-    cpu_skip = pytest.mark.skip(reason='Skipping test on the CPU backend.')
+        'GPU': 'skip_on_gpu',
-    nnp_skip = pytest.mark.skip(reason='Skipping test on the NNP backend.')
+        'CPU': 'skip_on_cpu',
-    interpreter_skip = pytest.mark.skip(reason='Skipping test on the INTERPRETER backend.')
+        'NNP': 'skip_on_nnp',
-    plaidml_skip = pytest.mark.skip(reason='Skipping test on the PlaidML backend.')
+        'INTERPRETER': 'skip_on_interpreter',
+        'PlaidML': 'skip_on_plaidml',
+        'INTELGPU': 'skip_on_intelgpu',
+    }
+    skip_markers = {
+        'GPU': pytest.mark.skip(reason='Skipping test on the GPU backend.'),
+        'CPU': pytest.mark.skip(reason='Skipping test on the CPU backend.'),
+        'NNP': pytest.mark.skip(reason='Skipping test on the NNP backend.'),
+        'INTERPRETER': pytest.mark.skip(reason='Skipping test on the INTERPRETER backend.'),
+        'PlaidML': pytest.mark.skip(reason='Skipping test on the PlaidML backend.'),
+        'INTELGPU': pytest.mark.skip(reason='Skipping test on the INTELGPU backend.'),
+    }
    for item in items:
-        if backend_name == 'GPU' and 'skip_on_gpu' in item.keywords:
+        skip_this_backend = keywords[backend_name]
-            item.add_marker(gpu_skip)
+        if skip_this_backend in item.keywords:
-        if backend_name == 'CPU' and 'skip_on_cpu' in item.keywords:
+            item.add_marker(skip_markers[backend_name])
-            item.add_marker(cpu_skip)
-        if backend_name == 'NNP' and 'skip_on_nnp' in item.keywords:
-            item.add_marker(nnp_skip)
-        if backend_name == 'INTERPRETER' and 'skip_on_interpreter' in item.keywords:
-            item.add_marker(interpreter_skip)
-        if backend_name == 'PlaidML' and 'skip_on_plaidml' in item.keywords:
-            item.add_marker(plaidml_skip)
--- a/python/test/ngraph/test_ops_unary.py
+++ b/python/test/ngraph/test_ops_unary.py
@@ -33,7 +33,6 @@ from test.ngraph.util import run_op_numeric_data, run_op_node
    (ng.exp, np.exp, -100., 100.),
    (ng.floor, np.floor, -100., 100.),
    (ng.log, np.log, 0, 100.),
-    (ng.logical_not, np.logical_not, -10, 10),
    (ng.relu, lambda x: np.maximum(0, x), -100., 100.),
    (ng.sign, np.sign, -100., 100.),
    (ng.sin, np.sin, -100., 100.),
@@ -68,7 +67,6 @@ def test_unary_op_array(ng_api_fn, numpy_fn, range_start, range_end):
    (ng.exp, np.exp, np.float32(1.5)),
    (ng.floor, np.floor, np.float32(1.5)),
    (ng.log, np.log, np.float32(1.5)),
-    (ng.logical_not, np.logical_not, np.int32(0)),
    (ng.relu, lambda x: np.maximum(0, x), np.float32(-0.125)),
    (ng.sign, np.sign, np.float32(0.)),
    (ng.sin, np.sin, np.float32(np.pi / 4.0)),
@@ -86,3 +84,19 @@ def test_unary_op_scalar(ng_api_fn, numpy_fn, input_data):
    result = run_op_numeric_data(input_data, ng_api_fn)
    assert np.allclose(result, expected)
+@pytest.mark.parametrize('input_data', [
+    (np.array([True, False, True, False])),
+    (np.array(True)),
+    (np.array(False)),
+])
+@pytest.mark.skip_on_gpu
+def test_logical_not(input_data):
+    expected = np.logical_not(input_data)
+    result = run_op_node([input_data], ng.logical_not)[0]
+    assert np.array_equal(result, expected)
+    result = run_op_numeric_data(input_data, ng.logical_not)[0]
+    assert np.array_equal(result, expected)
--- a/python/test/test_ops.py
+++ b/python/test/test_ops.py
@@ -818,6 +818,7 @@ def test_slice():
 @pytest.mark.skip_on_gpu
+@pytest.mark.skip_on_intelgpu
 def test_replace_slice():
    element_type = Type.f32

--- a/src/ngraph/runtime/cpu/pass/cpu_fusion.cpp
+++ b/src/ngraph/runtime/cpu/pass/cpu_fusion.cpp
@@ -2222,6 +2222,32 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconvb_add()
            std::dynamic_pointer_cast<ngraph::op::Add>(m.get_match_root()->get_argument(0));
        auto dq_l_m = std::dynamic_pointer_cast<ngraph::op::Dequantize>(pattern_map[dq_l_label]);
        auto dq_r_m = std::dynamic_pointer_cast<ngraph::op::Dequantize>(pattern_map[dq_r_label]);
+        // both left and right are QuantizedConvolutionBias
+        if (dq_r_m->get_argument(0)->description() == "QuantizedConvolutionBias")
+        {
+            for (auto user : m.get_match_root()->get_users())
+            {
+                auto q_m = std::dynamic_pointer_cast<ngraph::op::Quantize>(user);
+                if (q_m)
+                {
+                    auto q_m_scale = q_m->get_argument(1);
+                    auto dq_l_m_scale = dq_l_m->get_argument(1);
+                    auto dq_r_m_scale = dq_r_m->get_argument(1);
+                    if (!ngraph::compare_constants(q_m_scale, dq_l_m_scale) &&
+                        ngraph::compare_constants(q_m_scale, dq_r_m_scale))
+                    {
+                        NGRAPH_DEBUG << "Scales of Q and DQ of right branch match";
+                        // switch left and right branch
+                        auto temp = dq_l_m;
+                        dq_l_m = dq_r_m;
+                        dq_r_m = temp;
+                    }
+                    break;
+                }
+            }
+        }
        auto qconv =
            std::static_pointer_cast<ngraph::op::QuantizedConvolutionBias>(dq_l_m->get_argument(0));
        auto inplace_input = dq_r_m->get_argument(0);

--- a/test/cpu_fusion.cpp
+++ b/test/cpu_fusion.cpp
@@ -3683,6 +3683,120 @@ TEST(cpu_quant_fusion, qconvba)
    EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0)));
 }
+TEST(cpu_quant_fusion, qconvba_q)
+{
+    auto make_function = []() {
+        Shape shape_input{1, 2, 2, 2};
+        Shape shape_weights{1, 2, 1, 1};
+        Shape shape_summand{1, 1, 2, 2};
+        auto input_l = std::make_shared<op::Parameter>(element::f32, shape_input);
+        auto weights_l = std::make_shared<op::Parameter>(element::f32, shape_weights);
+        auto bias_l = std::make_shared<op::Parameter>(element::f32, Shape{shape_weights[0]});
+        auto input_r = std::make_shared<op::Parameter>(element::f32, shape_input);
+        auto weights_r = std::make_shared<op::Parameter>(element::f32, shape_weights);
+        auto bias_r = std::make_shared<op::Parameter>(element::f32, Shape{shape_weights[0]});
+        auto input_scale_l = op::Constant::create(element::f32, Shape{}, {2.0f});
+        auto weights_scale_l = op::Constant::create(element::f32, Shape{}, {2.0f});
+        auto output_scale_l = op::Constant::create(element::f32, Shape{}, {4.0f});
+        auto input_scale_r = op::Constant::create(element::f32, Shape{}, {5.0f});
+        auto weights_scale_r = op::Constant::create(element::f32, Shape{}, {5.0f});
+        auto output_scale_r = op::Constant::create(element::f32, Shape{}, {20.0f});
+        auto int8_zero = op::Constant::create(element::i8, Shape{}, {0});
+        auto int32_zero = op::Constant::create(element::i32, Shape{}, {0});
+        auto uint8_zero = op::Constant::create(element::u8, Shape{}, {0});
+        op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
+        auto q_input_l = std::make_shared<op::Quantize>(
+            input_l, input_scale_l, uint8_zero, element::u8, AxisSet{}, round_mode);
+        auto q_weights_l = std::make_shared<op::Quantize>(
+            weights_l, weights_scale_l, int8_zero, element::i8, AxisSet{}, round_mode);
+        auto q_bias_l = std::make_shared<op::Quantize>(bias_l,
+                                                       input_scale_l * weights_scale_l,
+                                                       int32_zero,
+                                                       element::i32,
+                                                       AxisSet{},
+                                                       round_mode);
+        auto q_input_r = std::make_shared<op::Quantize>(
+            input_r, input_scale_r, uint8_zero, element::u8, AxisSet{}, round_mode);
+        auto q_weights_r = std::make_shared<op::Quantize>(
+            weights_r, weights_scale_r, int8_zero, element::i8, AxisSet{}, round_mode);
+        auto q_bias_r = std::make_shared<op::Quantize>(bias_r,
+                                                       input_scale_r * weights_scale_r,
+                                                       int32_zero,
+                                                       element::i32,
+                                                       AxisSet{},
+                                                       round_mode);
+        // Left Graph
+        auto requant_scale_l = (input_scale_l * weights_scale_l) / output_scale_l;
+        auto conv_l = std::make_shared<op::QuantizedConvolutionBias>(q_input_l,
+                                                                     q_weights_l,
+                                                                     q_bias_l,
+                                                                     Strides{1, 1},
+                                                                     Strides{1, 1},
+                                                                     CoordinateDiff{0, 0},
+                                                                     CoordinateDiff{0, 0},
+                                                                     Strides{1, 1},
+                                                                     requant_scale_l);
+        auto dq_l = std::make_shared<op::Dequantize>(
+            conv_l, output_scale_l, int8_zero, element::f32, AxisSet{});
+        auto r_l = std::make_shared<op::Reshape>(dq_l, AxisVector{0, 1, 2, 3}, Shape{1, 2, 2});
+        auto b_l = std::make_shared<op::Broadcast>(r_l, Shape{1, 1, 2, 2}, AxisSet{0});
+        // Right Graph
+        auto requant_scale_r = (input_scale_r * weights_scale_r) / output_scale_r;
+        auto conv_r = std::make_shared<op::QuantizedConvolutionBias>(q_input_r,
+                                                                     q_weights_r,
+                                                                     q_bias_r,
+                                                                     Strides{1, 1},
+                                                                     Strides{1, 1},
+                                                                     CoordinateDiff{0, 0},
+                                                                     CoordinateDiff{0, 0},
+                                                                     Strides{1, 1},
+                                                                     requant_scale_r);
+        auto dq_r = std::make_shared<op::Dequantize>(
+            conv_r, output_scale_r, int8_zero, element::f32, AxisSet{});
+        auto r_r = std::make_shared<op::Reshape>(dq_r, AxisVector{0, 1, 2, 3}, Shape{1, 2, 2});
+        auto b_r = std::make_shared<op::Broadcast>(r_r, Shape{1, 1, 2, 2}, AxisSet{0});
+        auto add = b_l + b_r;
+        auto relu = std::make_shared<op::Relu>(add);
+        auto q = std::make_shared<op::Quantize>(
+            relu, output_scale_r, uint8_zero, element::u8, AxisSet{}, round_mode);
+        auto dq = std::make_shared<op::Dequantize>(
+            q, output_scale_r, uint8_zero, element::f32, AxisSet{});
+        return make_shared<Function>(
+            NodeVector{dq},
+            ParameterVector{input_l, weights_l, bias_l, input_r, weights_r, bias_r});
+    };
+    auto cpu_f1 = make_function();
+    auto cpu_f2 = make_function();
+    test::Uniform<float> rng(2.0f, 2.0f);
+    vector<vector<float>> args;
+    for (shared_ptr<op::Parameter> param : cpu_f1->get_parameters())
+    {
+        vector<float> tensor_val(shape_size(param->get_shape()));
+        rng.initialize(tensor_val);
+        args.push_back(tensor_val);
+    }
+    // Disable CPUQuantFusion
+    set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:0", 1);
+    auto cpu1_results = execute(cpu_f1, args, "CPU");
+    // Enable CPUQuantFusion
+    set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:1", 1);
+    auto cpu2_results = execute(cpu_f2, args, "CPU");
+    EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0)));
+    auto backend = runtime::Backend::create("CPU");
+    auto fuse = make_function();
+    backend->compile(fuse);
+    ASSERT_EQ(count_ops_of_type<op::Quantize>(fuse), 6);
+}
 #ifndef NGRAPH_JSON_DISABLE
 // Tests that rely on deserializing json files
 TEST(cpu_fusion, fuse_conv_bias)