add gpu product (#1040)

* add gpu product * enable test, change initial value for product

add gpu product (#1040)
* add gpu product * enable test, change initial value for product
60523801 · Fenglei · Robert Kimball · 2177cf5b · 60523801 · 60523801
Commit 60523801 authored May 25, 2018 by Fenglei Committed by Robert Kimball May 25, 2018
Showing with 49 additions and 18 deletions

gpu_emitter.cpp src/ngraph/runtime/gpu/gpu_emitter.cpp +44 -0

unit_test.manifest src/ngraph/runtime/gpu/unit_test.manifest +0 -14

backend_test.in.cpp test/backend_test.in.cpp +5 -4

No files found.
--- a/src/ngraph/runtime/gpu/gpu_emitter.cpp
+++ b/src/ngraph/runtime/gpu/gpu_emitter.cpp
@@ -1316,6 +1316,50 @@ CUDNN_SAFE_CALL(cudnnSetOpTensorDescriptor(opTensorDesc,
                return;
            }
+            template <>
+            void GPU_Emitter::EMITTER_DECL(ngraph::op::Product)
+            {
+                const ngraph::op::Product* product = static_cast<const ngraph::op::Product*>(node);
+                writer.block_begin("  // " + node->get_name());
+                {
+                    if (out[0].get_size() != 0)
+                    {
+                        // one of args[] axes has zero size, fill output with 1
+                        if (args[0].get_size() == 0)
+                        {
+                            writer << "float init_value = 1;\n";
+                            writer << "std::vector<float> temp(" << out[0].get_size()
+                                   << ", init_value);\n";
+                            writer << "runtime::gpu::cuda_memcpyHtD(" << out[0].get_name()
+                                   << ", (void*)temp.data(), " << out[0].get_size() << " * "
+                                   << out[0].get_element_type().size() << ");\n";
+                        }
+                        else if (args[0].get_shape().size() == out[0].get_shape().size())
+                        {
+                            kernel::emit_memcpyDtD(writer, out[0], args[0]);
+                        }
+                        // descriptors for tensors  with <= 4 dimensions
+                        else
+                        {
+                            auto& cudnn_emitter =
+                                external_function->get_primitive_emitter()->get_cudnn_emitter();
+                            auto index =
+                                cudnn_emitter->build_reduce_forward(external_function->ctx().get(),
+                                                                    CUDNN_REDUCE_TENSOR_MUL,
+                                                                    args[0].get_shape(),
+                                                                    product->get_reduction_axes());
+                            writer << "gpu::invoke_primitive(ctx, " << index << ", ";
+                            writer << "std::vector<void*>{" << args[0].get_name() << "}.data(), ";
+                            writer << "std::vector<void*>{" << out[0].get_name() << "}.data()";
+                            writer << ");\n";
+                        }
+                    }
+                }
+                writer.block_end();
+                return;
+            }
            template <>
            void GPU_Emitter::EMITTER_DECL(ngraph::op::Reduce)
            {

--- a/src/ngraph/runtime/gpu/unit_test.manifest
+++ b/src/ngraph/runtime/gpu/unit_test.manifest
@@ -83,20 +83,6 @@ one_hot_vector_1_barely_oob
 one_hot_vector_1_far_oob
 one_hot_vector_1_fp_nonint
 parameter_as_output
-product_3d_eliminate_zero_dim
-product_3d_to_matrix_least_sig
-product_3d_to_matrix_most_sig
-product_3d_to_scalar
-product_3d_to_vector
-product_matrix_cols_zero
-product_matrix_columns
-product_matrix_rows
-product_matrix_rows_zero
-product_matrix_to_scalar_zero_by_zero
-product_to_scalar
-product_trivial
-product_trivial_5d
-product_vector_zero
 reduce_window_emulating_max_pool_1d_1channel_1image
 reduce_window_emulating_max_pool_1d_1channel_2image
 reduce_window_emulating_max_pool_1d_2channel_2image

--- a/test/backend_test.in.cpp
+++ b/test/backend_test.in.cpp
@@ -6805,10 +6805,11 @@ NGRAPH_TEST(${BACKEND_NAME}, product_3d_to_scalar)
    auto result = backend->create_tensor(element::f32, shape_rt);
    backend->call(f, {result}, {a});
-    EXPECT_EQ((vector<float>{1.0f * 10.0f * 9.0f * 4.0f * 13.0f * 6.0f * 7.0f * 12.0f * 3.0f *
+    EXPECT_TRUE(test::all_close(vector<float>{1.0f * 10.0f * 9.0f * 4.0f * 13.0f * 6.0f * 7.0f *
-                             2.0f * 11.0f * 8.0f * 5.0f * 14.0f * 5.0f * 8.0f * 11.0f * 2.0f *
+                                              12.0f * 3.0f * 2.0f * 11.0f * 8.0f * 5.0f * 14.0f *
-                             3.0f * 12.0f * 7.0f * 6.0f * 13.0f * 4.0f * 9.0f * 10.0f * 1.0f}),
+                                              5.0f * 8.0f * 11.0f * 2.0f * 3.0f * 12.0f * 7.0f *
-              read_vector<float>(result));
+                                              6.0f * 13.0f * 4.0f * 9.0f * 10.0f * 1.0f},
+                                read_vector<float>(result)));
 }
 NGRAPH_TEST(${BACKEND_NAME}, product_3d_eliminate_zero_dim)