Commit 400f7f2c authored by fenglei.tian's avatar fenglei.tian

clean up code

parent 4eaf5711
......@@ -91,9 +91,9 @@ namespace ngraph
// Create an instance of nvrtcProgram with the code string.
nvrtcProgram prog;
NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog, // prog i
op_abs, // buffer
"op_abs.cu", // name
NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog,
op_abs,
"op_abs.cu",
0, // numHeaders
NULL, // headers
NULL)); // includeNames
......@@ -101,10 +101,9 @@ namespace ngraph
const char *opts[] = {"--gpu-architecture=compute_35",
"--relocatable-device-code=true"};
nvrtcResult compileResult = nvrtcCompileProgram(prog, // prog
2, // numOptions
opts); // options
// Obtain compilation log from the program.
nvrtcResult compileResult = nvrtcCompileProgram(prog,
2,
opts);
size_t logSize;
......@@ -134,32 +133,15 @@ namespace ngraph
CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
CUDA_SAFE_CALL(cuModuleGetFunction(&cuda_op_abs_kernel, module, "cuda_op_abs"));
size_t numBlocks = 4;
size_t numThreads = 4;
size_t nt = numBlocks * numThreads;
size_t bufferSize = nt * sizeof(float);
float *hOut = new float[nt];
float *hIn = new float[nt];
for(int i = 0; i< nt; i++) hIn[i] = -i;
// void *dOut, *dIn;
// cudaMalloc((void**) &dIn, 64);
// cudaMalloc((void**) &dOut, 64);
CUdeviceptr dPtrIn, dPtrOut;
dPtrIn = (CUdeviceptr)in;
dPtrOut = (CUdeviceptr)out;
void *argsList[] = {&dPtrIn, &dPtrOut, &nt};
// cudaLaunchKernel(cuda_op_obs_kernel,
// {4, 1, 1},
// {1, 1, 1},
// argslist, 0, NULL);
// void *argsList[] = {dIn, dOut, &nt};
void *argsList[] = {&dPtrIn, &dPtrOut, &count};
CUDA_SAFE_CALL(
cuLaunchKernel(cuda_op_abs_kernel,
4 , 1, 1, // grid dim
4, 1, 1, // block dim
count ,1, 1, // grid dim
1, 1, 1, // block dim
0, NULL, // shared mem and stream
argsList, 0)); // arguments
CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
......
......@@ -93,35 +93,6 @@ void runtime::gpu::GPU_Emitter::EmitAbs(codegen::CodeWriter& writer,
writer << "ngraph::runtime::gpu::cuda::kernel::emit_abs((void*) " << args[0].get_name() << ", (void*) " << out[0].get_name() << ", count);\n";
writer.indent--;
writer << "}\n";
// ngraph::runtime::gpu::cuda::kernel::emit_abs((void*) , (void*) ((float*)(outputs[0])), count);
//Generate input for execution, and create output buffers.
// size_t nt = 4; //numBlocks * numThreads;
// size_t bufferSize = nt * sizeof(float);
// float *hOut = new float[nt];
// float *hIn = new float[nt];
// for(int i = 0; i< nt; i++) hIn[i] = -i;
//
// CUdeviceptr dOut, dIn;
// cuMemAlloc(&dOut, bufferSize); // Execute parent kernel.
// cuMemAlloc(&dIn, bufferSize); // Execute parent kernel.
// cuMemcpyHtoD(dIn, hIn, bufferSize);
//
// ngraph::runtime::gpu::cuda::kernel::emit_abs((void*) dIn , (void*) dOut, nt);
//void *argst[] = {&dIn, &dOut, &nt};
// CUDA_SAFE_CALL(
// cuLaunchKernel(kernel,
// numBlocks , 1, 1, // grid dim
// numThreads, 1, 1, // block dim
// 0, NULL, // shared mem and stream
// argst, 0)); // arguments
//CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
//cuMemcpyDtoH(hOut, dOut, bufferSize);
//for (size_t i = 0; i < nt; ++i) { std::cout << hOut[i] << '\n'; } // Release resources.
//cuMemFree(dOut);
//cuModuleUnload(module);
}
void runtime::gpu::GPU_Emitter::EmitAdd(codegen::CodeWriter& writer,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment