Commit 400f7f2c authored by fenglei.tian's avatar fenglei.tian

clean up code

parent 4eaf5711
...@@ -91,9 +91,9 @@ namespace ngraph ...@@ -91,9 +91,9 @@ namespace ngraph
// Create an instance of nvrtcProgram with the code string. // Create an instance of nvrtcProgram with the code string.
nvrtcProgram prog; nvrtcProgram prog;
NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog, // prog i NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog,
op_abs, // buffer op_abs,
"op_abs.cu", // name "op_abs.cu",
0, // numHeaders 0, // numHeaders
NULL, // headers NULL, // headers
NULL)); // includeNames NULL)); // includeNames
...@@ -101,10 +101,9 @@ namespace ngraph ...@@ -101,10 +101,9 @@ namespace ngraph
const char *opts[] = {"--gpu-architecture=compute_35", const char *opts[] = {"--gpu-architecture=compute_35",
"--relocatable-device-code=true"}; "--relocatable-device-code=true"};
nvrtcResult compileResult = nvrtcCompileProgram(prog, // prog nvrtcResult compileResult = nvrtcCompileProgram(prog,
2, // numOptions 2,
opts); // options opts);
// Obtain compilation log from the program.
size_t logSize; size_t logSize;
...@@ -134,32 +133,15 @@ namespace ngraph ...@@ -134,32 +133,15 @@ namespace ngraph
CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, ptx, 0, 0, 0)); CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
CUDA_SAFE_CALL(cuModuleGetFunction(&cuda_op_abs_kernel, module, "cuda_op_abs")); CUDA_SAFE_CALL(cuModuleGetFunction(&cuda_op_abs_kernel, module, "cuda_op_abs"));
size_t numBlocks = 4;
size_t numThreads = 4;
size_t nt = numBlocks * numThreads;
size_t bufferSize = nt * sizeof(float);
float *hOut = new float[nt];
float *hIn = new float[nt];
for(int i = 0; i< nt; i++) hIn[i] = -i;
// void *dOut, *dIn;
// cudaMalloc((void**) &dIn, 64);
// cudaMalloc((void**) &dOut, 64);
CUdeviceptr dPtrIn, dPtrOut; CUdeviceptr dPtrIn, dPtrOut;
dPtrIn = (CUdeviceptr)in; dPtrIn = (CUdeviceptr)in;
dPtrOut = (CUdeviceptr)out; dPtrOut = (CUdeviceptr)out;
void *argsList[] = {&dPtrIn, &dPtrOut, &nt}; void *argsList[] = {&dPtrIn, &dPtrOut, &count};
// cudaLaunchKernel(cuda_op_obs_kernel,
// {4, 1, 1},
// {1, 1, 1},
// argslist, 0, NULL);
// void *argsList[] = {dIn, dOut, &nt};
CUDA_SAFE_CALL( CUDA_SAFE_CALL(
cuLaunchKernel(cuda_op_abs_kernel, cuLaunchKernel(cuda_op_abs_kernel,
4 , 1, 1, // grid dim count ,1, 1, // grid dim
4, 1, 1, // block dim 1, 1, 1, // block dim
0, NULL, // shared mem and stream 0, NULL, // shared mem and stream
argsList, 0)); // arguments argsList, 0)); // arguments
CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output. CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
......
...@@ -93,35 +93,6 @@ void runtime::gpu::GPU_Emitter::EmitAbs(codegen::CodeWriter& writer, ...@@ -93,35 +93,6 @@ void runtime::gpu::GPU_Emitter::EmitAbs(codegen::CodeWriter& writer,
writer << "ngraph::runtime::gpu::cuda::kernel::emit_abs((void*) " << args[0].get_name() << ", (void*) " << out[0].get_name() << ", count);\n"; writer << "ngraph::runtime::gpu::cuda::kernel::emit_abs((void*) " << args[0].get_name() << ", (void*) " << out[0].get_name() << ", count);\n";
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
// ngraph::runtime::gpu::cuda::kernel::emit_abs((void*) , (void*) ((float*)(outputs[0])), count);
//Generate input for execution, and create output buffers.
// size_t nt = 4; //numBlocks * numThreads;
// size_t bufferSize = nt * sizeof(float);
// float *hOut = new float[nt];
// float *hIn = new float[nt];
// for(int i = 0; i< nt; i++) hIn[i] = -i;
//
// CUdeviceptr dOut, dIn;
// cuMemAlloc(&dOut, bufferSize); // Execute parent kernel.
// cuMemAlloc(&dIn, bufferSize); // Execute parent kernel.
// cuMemcpyHtoD(dIn, hIn, bufferSize);
//
// ngraph::runtime::gpu::cuda::kernel::emit_abs((void*) dIn , (void*) dOut, nt);
//void *argst[] = {&dIn, &dOut, &nt};
// CUDA_SAFE_CALL(
// cuLaunchKernel(kernel,
// numBlocks , 1, 1, // grid dim
// numThreads, 1, 1, // block dim
// 0, NULL, // shared mem and stream
// argst, 0)); // arguments
//CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
//cuMemcpyDtoH(hOut, dOut, bufferSize);
//for (size_t i = 0; i < nt; ++i) { std::cout << hOut[i] << '\n'; } // Release resources.
//cuMemFree(dOut);
//cuModuleUnload(module);
} }
void runtime::gpu::GPU_Emitter::EmitAdd(codegen::CodeWriter& writer, void runtime::gpu::GPU_Emitter::EmitAdd(codegen::CodeWriter& writer,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment