Commit f4ff1c3b authored by Fenglei's avatar Fenglei Committed by Robert Kimball

fix bug and enable some reshape tests (#565)

* fix bug and enable some tests

* eliminate duplicated code, change some parameter names
parent 59e76787
...@@ -51,29 +51,6 @@ ...@@ -51,29 +51,6 @@
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
#define NVRTC_SAFE_CALL(x) \
do \
{ \
nvrtcResult result = x; \
if (result != NVRTC_SUCCESS) \
{ \
throw std::runtime_error("\nerror: " #x " failed with error " + \
nvrtcGetErrorString(result)); \
} \
} while (0)
#define CUDA_SAFE_CALL(x) \
do \
{ \
CUresult result = x; \
if (result != CUDA_SUCCESS) \
{ \
const char* msg; \
cuGetErrorName(result, &msg); \
throw std::runtime_error("\nerror: " #x " failed with error " + std::string(msg); \
} \
} while (0)
void runtime::gpu::GPU_Emitter::EmitNop(codegen::CodeWriter& writer, void runtime::gpu::GPU_Emitter::EmitNop(codegen::CodeWriter& writer,
const ngraph::Node* n, const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args, const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
...@@ -523,7 +500,6 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer, ...@@ -523,7 +500,6 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer,
{ {
result_shape_product *= i; result_shape_product *= i;
} }
// If there is no layout change or we are just going from 1^n to 1^m or a zero-size tensor, // If there is no layout change or we are just going from 1^n to 1^m or a zero-size tensor,
// we can just copy. // we can just copy.
if (same_layout || result_shape_product < 2) if (same_layout || result_shape_product < 2)
...@@ -531,7 +507,7 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer, ...@@ -531,7 +507,7 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer,
writer << "{ // " << n->get_name() << " 1\n"; writer << "{ // " << n->get_name() << " 1\n";
writer.indent++; writer.indent++;
writer << "runtime::gpu::cuda_memcpyDtD(" << out[0].get_name() << ", " << args[0].get_name() writer << "runtime::gpu::cuda_memcpyDtD(" << out[0].get_name() << ", " << args[0].get_name()
<< ", " << out[0].get_size() << "," << out[0].get_element_type().size() << ");\n"; << ", " << out[0].get_size() << " * " << out[0].get_element_type().size() << ");\n";
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
} }
...@@ -541,8 +517,9 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer, ...@@ -541,8 +517,9 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer,
// TODO Assert arg0_shape[0] == arg1_shape[0]? // TODO Assert arg0_shape[0] == arg1_shape[0]?
writer << "{ // " << n->get_name() << "\n"; writer << "{ // " << n->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "static const float alpha = 1.0;\n"; writer << "const float alpha = 1.0;\n";
writer << "static const float beta = 0.0;\n"; writer << "const float beta = 0;\n";
writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_HOST);\n";
writer << "cublasSgeam(" writer << "cublasSgeam("
<< "cublas_handle," << "cublas_handle,"
<< "CUBLAS_OP_T," << "CUBLAS_OP_T,"
...@@ -551,7 +528,8 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer, ...@@ -551,7 +528,8 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer,
<< args[0].get_name() << "," << arg_shape[1] << "," << args[0].get_name() << "," << arg_shape[1] << ","
<< "&beta," // beta << "&beta," // beta
<< args[0].get_name() << "," << arg_shape[1] << "," << out[0].get_name() << "," << args[0].get_name() << "," << arg_shape[1] << "," << out[0].get_name() << ","
<< out[0].get_shape()[1] << ");\n"; << result_shape[1] << ");\n";
writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE);\n";
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
} }
......
...@@ -54,18 +54,17 @@ void* runtime::gpu::create_gpu_buffer(size_t buffer_size) ...@@ -54,18 +54,17 @@ void* runtime::gpu::create_gpu_buffer(size_t buffer_size)
return allocated_buffer_pool; return allocated_buffer_pool;
} }
void runtime::gpu::cuda_memcpyDtD(void* d, void* s, size_t element_count, size_t element_size) void runtime::gpu::cuda_memcpyDtD(void* dst, void* src, size_t buffer_size)
{ {
size_t size_in_bytes = element_size * element_count; cudaMemcpy(dst, src, buffer_size, cudaMemcpyDeviceToDevice);
cudaMemcpy(d, s, size_in_bytes, cudaMemcpyDeviceToDevice);
} }
void runtime::gpu::cuda_memcpyHtD(void* d, void* s, size_t buffer_size) void runtime::gpu::cuda_memcpyHtD(void* dst, void* src, size_t buffer_size)
{ {
cudaMemcpy(d, s, buffer_size, cudaMemcpyHostToDevice); cudaMemcpy(dst, src, buffer_size, cudaMemcpyHostToDevice);
} }
void runtime::gpu::cuda_memset(void* d, int value, size_t buffer_size) void runtime::gpu::cuda_memset(void* dst, int value, size_t buffer_size)
{ {
cudaMemset(d, value, buffer_size); cudaMemset(dst, value, buffer_size);
} }
...@@ -61,9 +61,9 @@ namespace ngraph ...@@ -61,9 +61,9 @@ namespace ngraph
void print_gpu_f32_tensor(void* p, size_t element_count, size_t element_size); void print_gpu_f32_tensor(void* p, size_t element_count, size_t element_size);
void check_cuda_errors(CUresult err); void check_cuda_errors(CUresult err);
void* create_gpu_buffer(size_t buffer_size); void* create_gpu_buffer(size_t buffer_size);
void cuda_memcpyDtD(void* d, void* s, size_t element_count, size_t element_size); void cuda_memcpyDtD(void* dst, void* src, size_t buffer_size);
void cuda_memcpyHtD(void* d, void* s, size_t buffer_size); void cuda_memcpyHtD(void* dst, void* src, size_t buffer_size);
void cuda_memset(void* d, int value, size_t buffer_size); void cuda_memset(void* dst, int value, size_t buffer_size);
} }
} }
} }
...@@ -2373,7 +2373,6 @@ TEST(${BACKEND_NAME}, reshape_s2t) ...@@ -2373,7 +2373,6 @@ TEST(${BACKEND_NAME}, reshape_s2t)
TEST(${BACKEND_NAME}, reshape_v2m_col) TEST(${BACKEND_NAME}, reshape_v2m_col)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape_a{3}; Shape shape_a{3};
auto A = make_shared<op::Parameter>(element::f32, shape_a); auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_r{3, 1}; Shape shape_r{3, 1};
...@@ -2396,7 +2395,6 @@ TEST(${BACKEND_NAME}, reshape_v2m_col) ...@@ -2396,7 +2395,6 @@ TEST(${BACKEND_NAME}, reshape_v2m_col)
TEST(${BACKEND_NAME}, reshape_v2m_row) TEST(${BACKEND_NAME}, reshape_v2m_row)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape_a{3}; Shape shape_a{3};
auto A = make_shared<op::Parameter>(element::f32, shape_a); auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_r{1, 3}; Shape shape_r{1, 3};
...@@ -2442,7 +2440,6 @@ TEST(${BACKEND_NAME}, reshape_v2t_middle) ...@@ -2442,7 +2440,6 @@ TEST(${BACKEND_NAME}, reshape_v2t_middle)
TEST(${BACKEND_NAME}, reshape_m2m_same) TEST(${BACKEND_NAME}, reshape_m2m_same)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape_a{3, 3}; Shape shape_a{3, 3};
auto A = make_shared<op::Parameter>(element::f32, shape_a); auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_r{3, 3}; Shape shape_r{3, 3};
...@@ -2465,7 +2462,6 @@ TEST(${BACKEND_NAME}, reshape_m2m_same) ...@@ -2465,7 +2462,6 @@ TEST(${BACKEND_NAME}, reshape_m2m_same)
TEST(${BACKEND_NAME}, reshape_m2m_transpose) TEST(${BACKEND_NAME}, reshape_m2m_transpose)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape_a{3, 3}; Shape shape_a{3, 3};
auto A = make_shared<op::Parameter>(element::f32, shape_a); auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_r{3, 3}; Shape shape_r{3, 3};
...@@ -2488,7 +2484,6 @@ TEST(${BACKEND_NAME}, reshape_m2m_transpose) ...@@ -2488,7 +2484,6 @@ TEST(${BACKEND_NAME}, reshape_m2m_transpose)
TEST(${BACKEND_NAME}, reshape_m2m_dim_change_transpose) TEST(${BACKEND_NAME}, reshape_m2m_dim_change_transpose)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape_a{3, 2}; Shape shape_a{3, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a); auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_r{2, 3}; Shape shape_r{2, 3};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment