fix bug and enable some reshape tests (#565)

* fix bug and enable some tests * eliminate duplicated code, change some parameter names

fix bug and enable some reshape tests (#565)
* fix bug and enable some tests * eliminate duplicated code, change some parameter names
f4ff1c3b · Fenglei · Robert Kimball · 59e76787 · f4ff1c3b · f4ff1c3b
Commit f4ff1c3b authored Mar 01, 2018 by Fenglei Committed by Robert Kimball Mar 01, 2018
4 changed files
--- a/src/ngraph/runtime/gpu/gpu_emitter.cpp
+++ b/src/ngraph/runtime/gpu/gpu_emitter.cpp
@@ -51,29 +51,6 @@
 using namespace std;
 using namespace ngraph;
-#define NVRTC_SAFE_CALL(x)                                                                         \
-    do                                                                                             \
-    {                                                                                              \
-        nvrtcResult result = x;                                                                    \
-        if (result != NVRTC_SUCCESS)                                                               \
-        {                                                                                          \
-            throw std::runtime_error("\nerror: " #x " failed with error " +                        \
-                                     nvrtcGetErrorString(result));                                 \
-        }                                                                                          \
-    } while (0)
-#define CUDA_SAFE_CALL(x)                                                                          \
-    do                                                                                             \
-    {                                                                                              \
-        CUresult result = x;                                                                       \
-        if (result != CUDA_SUCCESS)                                                                \
-        {                                                                                          \
-            const char* msg;                                                                       \
-            cuGetErrorName(result, &msg);                                                          \
-            throw std::runtime_error("\nerror: " #x " failed with error " + std::string(msg);      \
-        }                                                                                          \
-    } while (0)
 void runtime::gpu::GPU_Emitter::EmitNop(codegen::CodeWriter& writer,
                                        const ngraph::Node* n,
                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
@@ -523,7 +500,6 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer,
    {
        result_shape_product *= i;
    }
    // If there is no layout change or we are just going from 1^n to 1^m or a zero-size tensor,
    //  we can just copy.
    if (same_layout || result_shape_product < 2)
@@ -531,7 +507,7 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer,
        writer << "{   // " << n->get_name() << " 1\n";
        writer.indent++;
        writer << "runtime::gpu::cuda_memcpyDtD(" << out[0].get_name() << ", " << args[0].get_name()
-               << ", " << out[0].get_size() << "," << out[0].get_element_type().size() << ");\n";
+               << ", " << out[0].get_size() << " * " << out[0].get_element_type().size() << ");\n";
        writer.indent--;
        writer << "}\n";
    }
@@ -541,8 +517,9 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer,
        // TODO Assert arg0_shape[0] == arg1_shape[0]?
        writer << "{   // " << n->get_name() << "\n";
        writer.indent++;
-        writer << "static const float alpha = 1.0;\n";
+        writer << "const float alpha = 1.0;\n";
-        writer << "static const float beta = 0.0;\n";
+        writer << "const float beta = 0;\n";
+        writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_HOST);\n";
        writer << "cublasSgeam("
               << "cublas_handle,"
               << "CUBLAS_OP_T,"
@@ -551,7 +528,8 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer,
               << args[0].get_name() << "," << arg_shape[1] << ","
               << "&beta," // beta
               << args[0].get_name() << "," << arg_shape[1] << "," << out[0].get_name() << ","
-               << out[0].get_shape()[1] << ");\n";
+               << result_shape[1] << ");\n";
+        writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE);\n";
        writer.indent--;
        writer << "}\n";
    }

--- a/src/ngraph/runtime/gpu/gpu_util.cpp
+++ b/src/ngraph/runtime/gpu/gpu_util.cpp
@@ -54,18 +54,17 @@ void* runtime::gpu::create_gpu_buffer(size_t buffer_size)
    return allocated_buffer_pool;
 }
-void runtime::gpu::cuda_memcpyDtD(void* d, void* s, size_t element_count, size_t element_size)
+void runtime::gpu::cuda_memcpyDtD(void* dst, void* src, size_t buffer_size)
 {
-    size_t size_in_bytes = element_size * element_count;
+    cudaMemcpy(dst, src, buffer_size, cudaMemcpyDeviceToDevice);
-    cudaMemcpy(d, s, size_in_bytes, cudaMemcpyDeviceToDevice);
 }
-void runtime::gpu::cuda_memcpyHtD(void* d, void* s, size_t buffer_size)
+void runtime::gpu::cuda_memcpyHtD(void* dst, void* src, size_t buffer_size)
 {
-    cudaMemcpy(d, s, buffer_size, cudaMemcpyHostToDevice);
+    cudaMemcpy(dst, src, buffer_size, cudaMemcpyHostToDevice);
 }
-void runtime::gpu::cuda_memset(void* d, int value, size_t buffer_size)
+void runtime::gpu::cuda_memset(void* dst, int value, size_t buffer_size)
 {
-    cudaMemset(d, value, buffer_size);
+    cudaMemset(dst, value, buffer_size);
 }
--- a/src/ngraph/runtime/gpu/gpu_util.hpp
+++ b/src/ngraph/runtime/gpu/gpu_util.hpp
@@ -61,9 +61,9 @@ namespace ngraph
            void print_gpu_f32_tensor(void* p, size_t element_count, size_t element_size);
            void check_cuda_errors(CUresult err);
            void* create_gpu_buffer(size_t buffer_size);
-            void cuda_memcpyDtD(void* d, void* s, size_t element_count, size_t element_size);
+            void cuda_memcpyDtD(void* dst, void* src, size_t buffer_size);
-            void cuda_memcpyHtD(void* d, void* s, size_t buffer_size);
+            void cuda_memcpyHtD(void* dst, void* src, size_t buffer_size);
-            void cuda_memset(void* d, int value, size_t buffer_size);
+            void cuda_memset(void* dst, int value, size_t buffer_size);
        }
    }
 }
--- a/test/backend_test.in.cpp
+++ b/test/backend_test.in.cpp
@@ -2373,7 +2373,6 @@ TEST(${BACKEND_NAME}, reshape_s2t)
 TEST(${BACKEND_NAME}, reshape_v2m_col)
 {
-    SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
    Shape shape_a{3};
    auto A = make_shared<op::Parameter>(element::f32, shape_a);
    Shape shape_r{3, 1};
@@ -2396,7 +2395,6 @@ TEST(${BACKEND_NAME}, reshape_v2m_col)
 TEST(${BACKEND_NAME}, reshape_v2m_row)
 {
-    SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
    Shape shape_a{3};
    auto A = make_shared<op::Parameter>(element::f32, shape_a);
    Shape shape_r{1, 3};
@@ -2442,7 +2440,6 @@ TEST(${BACKEND_NAME}, reshape_v2t_middle)
 TEST(${BACKEND_NAME}, reshape_m2m_same)
 {
-    SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
    Shape shape_a{3, 3};
    auto A = make_shared<op::Parameter>(element::f32, shape_a);
    Shape shape_r{3, 3};
@@ -2465,7 +2462,6 @@ TEST(${BACKEND_NAME}, reshape_m2m_same)
 TEST(${BACKEND_NAME}, reshape_m2m_transpose)
 {
-    SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
    Shape shape_a{3, 3};
    auto A = make_shared<op::Parameter>(element::f32, shape_a);
    Shape shape_r{3, 3};
@@ -2488,7 +2484,6 @@ TEST(${BACKEND_NAME}, reshape_m2m_transpose)
 TEST(${BACKEND_NAME}, reshape_m2m_dim_change_transpose)
 {
-    SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
    Shape shape_a{3, 2};
    auto A = make_shared<op::Parameter>(element::f32, shape_a);
    Shape shape_r{2, 3};