Commit 2f8b19a8 authored by Jayaram Bobba's avatar Jayaram Bobba Committed by Robert Kimball

Fix to matmul bias column broadcast and modified unit tests (#627)

parent 8520e846
...@@ -231,10 +231,6 @@ namespace ngraph ...@@ -231,10 +231,6 @@ namespace ngraph
} }
#endif #endif
//TODO: This could be further optimized to reduce the impact of memcpy by either
//a) emitting customized code for initializing output/bias
//b) emitting two cblas calls (one for gemm on W and x and the second for gemm on Bias and E^T + the result of the first gemm)
//@jbobba suggests b) is more efficient but we should benchmark both
template <> template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::MatmulBias) void CPU_Emitter::EMITTER_DECL(ngraph::op::MatmulBias)
{ {
...@@ -323,13 +319,10 @@ namespace ngraph ...@@ -323,13 +319,10 @@ namespace ngraph
writer << "};\n"; writer << "};\n";
writer << "cblas::cblas_sgemm(" writer << "cblas::cblas_sgemm("
<< "cblas::Layout::RowMajor, " << cnotranspose << ctranspose << "cblas::Layout::RowMajor, " << cnotranspose << cnotranspose
<< arg2_shape[0] << ", " << arg2_shape[1] << ", 1" << arg2_shape[0] << ", " << arg2_shape[1] << ", 1,\n"
<< ",\n" << "1.0f, " << args[2].get_name() << ", 1, "
<< " 1.0f, ones_col," << max(1UL, arg2_shape[1]) << ", " << "ones_col, " << max(1UL, arg2_shape[1]) << ", "
<< args[2].get_name() << ", "
<< "1"
<< ", "
<< "1.0f" << "1.0f"
<< ",\n" << ",\n"
<< " " << out[0].get_name() << ", " << " " << out[0].get_name() << ", "
......
...@@ -109,14 +109,10 @@ TEST(cpu_fusion, gemm_cpu_broadcast_row) ...@@ -109,14 +109,10 @@ TEST(cpu_fusion, gemm_cpu_broadcast_row)
auto A = make_shared<op::Parameter>(element::f32, shapeA); auto A = make_shared<op::Parameter>(element::f32, shapeA);
auto B = make_shared<op::Parameter>(element::f32, shapeB); auto B = make_shared<op::Parameter>(element::f32, shapeB);
auto reshape_w = make_shared<op::Reshape>(A, AxisVector{1, 0}, Shape{2, 3}); auto bias = op::Constant::create<float>(element::f32, Shape{2}, std::vector<float>{2.0f, 3.0f});
auto reshape_x = make_shared<op::Reshape>(B, AxisVector{1, 0}, Shape{3, 2});
auto one = op::Constant::create<float>(element::f32, Shape{2}, std::vector<float>{1.0f, 1.0f});
auto broadcast = make_shared<op::Broadcast>(one, shapeC, AxisSet{0});
auto cg = make_shared<op::MatmulBias>( auto cg = make_shared<op::MatmulBias>(
A, B, one, A->get_shape(), B->get_shape(), true, true, AxisSet{0}); A, B, bias, A->get_shape(), B->get_shape(), true, true, AxisSet{0});
auto f = make_shared<Function>(cg, op::ParameterVector{A, B}); auto f = make_shared<Function>(cg, op::ParameterVector{A, B});
...@@ -136,8 +132,8 @@ TEST(cpu_fusion, gemm_cpu_broadcast_row) ...@@ -136,8 +132,8 @@ TEST(cpu_fusion, gemm_cpu_broadcast_row)
copy_data(b, dataB); copy_data(b, dataB);
cf->call({a, b}, {result}); cf->call({a, b}, {result});
vector<float> expected{10, 28, 37, 109}; vector<float> expected{11, 30, 38, 111};
ASSERT_TRUE(read_vector<float>(result) == expected); EXPECT_EQ(read_vector<float>(result), expected);
} }
TEST(cpu_fusion, gemm_cpu_broadcast_column) TEST(cpu_fusion, gemm_cpu_broadcast_column)
...@@ -148,14 +144,10 @@ TEST(cpu_fusion, gemm_cpu_broadcast_column) ...@@ -148,14 +144,10 @@ TEST(cpu_fusion, gemm_cpu_broadcast_column)
auto A = make_shared<op::Parameter>(element::f32, shapeA); auto A = make_shared<op::Parameter>(element::f32, shapeA);
auto B = make_shared<op::Parameter>(element::f32, shapeB); auto B = make_shared<op::Parameter>(element::f32, shapeB);
auto reshape_w = make_shared<op::Reshape>(A, AxisVector{1, 0}, Shape{2, 3}); auto bias = op::Constant::create<float>(element::f32, Shape{2}, std::vector<float>{2.0f, 3.0f});
auto reshape_x = make_shared<op::Reshape>(B, AxisVector{1, 0}, Shape{3, 2});
auto one = op::Constant::create<float>(element::f32, Shape{2}, std::vector<float>{1.0f, 1.0f});
auto broadcast = make_shared<op::Broadcast>(one, shapeC, AxisSet{1});
auto cg = make_shared<op::MatmulBias>( auto cg = make_shared<op::MatmulBias>(
A, B, one, A->get_shape(), B->get_shape(), true, true, AxisSet{1}); A, B, bias, A->get_shape(), B->get_shape(), true, true, AxisSet{1});
auto f = make_shared<Function>(cg, op::ParameterVector{A, B}); auto f = make_shared<Function>(cg, op::ParameterVector{A, B});
...@@ -175,8 +167,8 @@ TEST(cpu_fusion, gemm_cpu_broadcast_column) ...@@ -175,8 +167,8 @@ TEST(cpu_fusion, gemm_cpu_broadcast_column)
copy_data(b, dataB); copy_data(b, dataB);
cf->call({a, b}, {result}); cf->call({a, b}, {result});
vector<float> expected{10, 28, 37, 109}; vector<float> expected{11, 29, 39, 111};
ASSERT_TRUE(read_vector<float>(result) == expected); EXPECT_EQ(read_vector<float>(result), expected);
} }
TEST(cpu_fusion, gemm_cpu_broadcast_matrix) TEST(cpu_fusion, gemm_cpu_broadcast_matrix)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment