Fix to matmul bias column broadcast and modified unit tests (#627)

2f8b19a8 · Jayaram Bobba · Robert Kimball · 8520e846 · 2f8b19a8 · 2f8b19a8
Commit 2f8b19a8 authored Mar 11, 2018 by Jayaram Bobba Committed by Robert Kimball Mar 11, 2018
Show whitespace changes
Inline Side-by-side

Showing with 12 additions and 27 deletions

cpu_emitter.cpp src/ngraph/runtime/cpu/cpu_emitter.cpp +4 -11

cpu_fusion.cpp test/cpu_fusion.cpp +8 -16

No files found.
--- a/src/ngraph/runtime/cpu/cpu_emitter.cpp
+++ b/src/ngraph/runtime/cpu/cpu_emitter.cpp
@@ -231,10 +231,6 @@ namespace ngraph
            }
 #endif

-            //TODO: This could be further optimized to reduce the impact of memcpy by either
-            //a) emitting customized code for initializing output/bias
-            //b) emitting two cblas calls (one for gemm on W and x and the second for gemm on Bias and E^T + the result of the first gemm)
-            //@jbobba suggests b) is more efficient but we should benchmark both
            template <>
            void CPU_Emitter::EMITTER_DECL(ngraph::op::MatmulBias)
            {
@@ -323,13 +319,10 @@ namespace ngraph
                            writer << "};\n";

                            writer << "cblas::cblas_sgemm("
-                                   << "cblas::Layout::RowMajor, " << cnotranspose << ctranspose
-                                   << arg2_shape[0] << ", " << arg2_shape[1] << ", 1"
-                                   << ",\n"
-                                   << "        1.0f, ones_col," << max(1UL, arg2_shape[1]) << ", "
-                                   << args[2].get_name() << ", "
-                                   << "1"
-                                   << ", "
+                                   << "cblas::Layout::RowMajor, " << cnotranspose << cnotranspose
+                                   << arg2_shape[0] << ", " << arg2_shape[1] << ", 1,\n"
+                                   << "1.0f, " << args[2].get_name() << ", 1, "
+                                   << "ones_col, " << max(1UL, arg2_shape[1]) << ", "
                                   << "1.0f"
                                   << ",\n"
                                   << "        " << out[0].get_name() << ", "

--- a/test/cpu_fusion.cpp
+++ b/test/cpu_fusion.cpp
@@ -109,14 +109,10 @@ TEST(cpu_fusion, gemm_cpu_broadcast_row)
    auto A = make_shared<op::Parameter>(element::f32, shapeA);
    auto B = make_shared<op::Parameter>(element::f32, shapeB);

-    auto reshape_w = make_shared<op::Reshape>(A, AxisVector{1, 0}, Shape{2, 3});
-    auto reshape_x = make_shared<op::Reshape>(B, AxisVector{1, 0}, Shape{3, 2});
-
-    auto one = op::Constant::create<float>(element::f32, Shape{2}, std::vector<float>{1.0f, 1.0f});
+    auto bias = op::Constant::create<float>(element::f32, Shape{2}, std::vector<float>{2.0f, 3.0f});

-    auto broadcast = make_shared<op::Broadcast>(one, shapeC, AxisSet{0});
    auto cg = make_shared<op::MatmulBias>(
-        A, B, one, A->get_shape(), B->get_shape(), true, true, AxisSet{0});
+        A, B, bias, A->get_shape(), B->get_shape(), true, true, AxisSet{0});

    auto f = make_shared<Function>(cg, op::ParameterVector{A, B});

@@ -136,8 +132,8 @@ TEST(cpu_fusion, gemm_cpu_broadcast_row)
    copy_data(b, dataB);

    cf->call({a, b}, {result});
-    vector<float> expected{10, 28, 37, 109};
-    ASSERT_TRUE(read_vector<float>(result) == expected);
+    vector<float> expected{11, 30, 38, 111};
+    EXPECT_EQ(read_vector<float>(result), expected);
 }

 TEST(cpu_fusion, gemm_cpu_broadcast_column)
@@ -148,14 +144,10 @@ TEST(cpu_fusion, gemm_cpu_broadcast_column)
    auto A = make_shared<op::Parameter>(element::f32, shapeA);
    auto B = make_shared<op::Parameter>(element::f32, shapeB);

-    auto reshape_w = make_shared<op::Reshape>(A, AxisVector{1, 0}, Shape{2, 3});
-    auto reshape_x = make_shared<op::Reshape>(B, AxisVector{1, 0}, Shape{3, 2});
-
-    auto one = op::Constant::create<float>(element::f32, Shape{2}, std::vector<float>{1.0f, 1.0f});
+    auto bias = op::Constant::create<float>(element::f32, Shape{2}, std::vector<float>{2.0f, 3.0f});

-    auto broadcast = make_shared<op::Broadcast>(one, shapeC, AxisSet{1});
    auto cg = make_shared<op::MatmulBias>(
-        A, B, one, A->get_shape(), B->get_shape(), true, true, AxisSet{1});
+        A, B, bias, A->get_shape(), B->get_shape(), true, true, AxisSet{1});

    auto f = make_shared<Function>(cg, op::ParameterVector{A, B});

@@ -175,8 +167,8 @@ TEST(cpu_fusion, gemm_cpu_broadcast_column)
    copy_data(b, dataB);

    cf->call({a, b}, {result});
-    vector<float> expected{10, 28, 37, 109};
-    ASSERT_TRUE(read_vector<float>(result) == expected);
+    vector<float> expected{11, 29, 39, 111};
+    EXPECT_EQ(read_vector<float>(result), expected);
 }

 TEST(cpu_fusion, gemm_cpu_broadcast_matrix)