clean up code

400f7f2c · fenglei.tian · 4eaf5711 · 400f7f2c · 400f7f2c
Commit 400f7f2c authored Feb 12, 2018 by fenglei.tian
Show whitespace changes
Inline Side-by-side

Showing with 9 additions and 56 deletions

gpu_cuda_kernel_emitters.cpp src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.cpp +9 -27

gpu_emitter.cpp src/ngraph/runtime/gpu/gpu_emitter.cpp +0 -29

No files found.
--- a/src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.cpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.cpp
@@ -91,9 +91,9 @@ namespace ngraph
                            // Create an instance of nvrtcProgram with the code string. 

                        nvrtcProgram prog; 
-    NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog, // prog i
-                op_abs, // buffer 
-                "op_abs.cu", // name 
+                        NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog,
+                                    op_abs, 
+                                    "op_abs.cu", 
                                    0, // numHeaders 
                                    NULL, // headers 
                                    NULL)); // includeNames
@@ -101,10 +101,9 @@ namespace ngraph

                        const char *opts[] = {"--gpu-architecture=compute_35",
                            "--relocatable-device-code=true"};
-    nvrtcResult compileResult = nvrtcCompileProgram(prog, // prog 
-            2, // numOptions 
-            opts); // options
-    // Obtain compilation log from the program. 
+                        nvrtcResult compileResult = nvrtcCompileProgram(prog, 
+                                2, 
+                                opts); 

                        size_t logSize; 

@@ -134,32 +133,15 @@ namespace ngraph
                        CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
                        CUDA_SAFE_CALL(cuModuleGetFunction(&cuda_op_abs_kernel, module, "cuda_op_abs"));

-    size_t numBlocks = 4;
-    size_t numThreads = 4; 
-    size_t nt = numBlocks * numThreads; 
-    size_t bufferSize = nt * sizeof(float); 
-    float *hOut = new float[nt]; 
-    float *hIn = new float[nt]; 
-    for(int i = 0; i< nt; i++) hIn[i] = -i;
-    
-//    void *dOut, *dIn;
-//    cudaMalloc((void**) &dIn, 64);
-//    cudaMalloc((void**) &dOut, 64);   
                        CUdeviceptr dPtrIn, dPtrOut;
                        dPtrIn = (CUdeviceptr)in;
                        dPtrOut = (CUdeviceptr)out;

-                        void *argsList[] = {&dPtrIn, &dPtrOut, &nt};
-  //  cudaLaunchKernel(cuda_op_obs_kernel,
-  //                   {4, 1, 1},
-  //                   {1, 1, 1},
-  //                    argslist, 0, NULL);
- 
-                 //       void *argsList[] = {dIn, dOut, &nt};
+                        void *argsList[] = {&dPtrIn, &dPtrOut, &count};
                        CUDA_SAFE_CALL(
                                cuLaunchKernel(cuda_op_abs_kernel, 
-                        4 , 1, 1, // grid dim 
-                        4, 1, 1, // block dim 
+                                    count ,1, 1, // grid dim 
+                                    1, 1, 1, // block dim 
                                    0, NULL, // shared mem and stream 
                                    argsList, 0)); // arguments 
                        CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output. 

--- a/src/ngraph/runtime/gpu/gpu_emitter.cpp
+++ b/src/ngraph/runtime/gpu/gpu_emitter.cpp
@@ -93,35 +93,6 @@ void runtime::gpu::GPU_Emitter::EmitAbs(codegen::CodeWriter& writer,
    writer << "ngraph::runtime::gpu::cuda::kernel::emit_abs((void*) " << args[0].get_name() <<  ", (void*) " << out[0].get_name() << ", count);\n";
    writer.indent--;
    writer << "}\n";
-
-//   ngraph::runtime::gpu::cuda::kernel::emit_abs((void*) , (void*) ((float*)(outputs[0])), count);
-    //Generate input for execution, and create output buffers. 
-//    size_t nt = 4; //numBlocks * numThreads; 
-//    size_t bufferSize = nt * sizeof(float); 
-//    float *hOut = new float[nt]; 
-//    float *hIn = new float[nt]; 
-//    for(int i = 0; i< nt; i++) hIn[i] = -i;
-//    
-//    CUdeviceptr dOut, dIn;
-//    cuMemAlloc(&dOut, bufferSize); // Execute parent kernel. 
-//    cuMemAlloc(&dIn, bufferSize); // Execute parent kernel. 
-//    cuMemcpyHtoD(dIn, hIn, bufferSize); 
-//    
-//    ngraph::runtime::gpu::cuda::kernel::emit_abs((void*) dIn , (void*) dOut, nt);
-
-
-    //void *argst[] = {&dIn, &dOut, &nt};
-    // CUDA_SAFE_CALL(
-    // cuLaunchKernel(kernel, 
-    //    numBlocks , 1, 1, // grid dim 
-    //    numThreads, 1, 1, // block dim 
-    //    0, NULL, // shared mem and stream 
-    //   argst, 0)); // arguments 
-    //CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output. 
-    //cuMemcpyDtoH(hOut, dOut, bufferSize); 
-    //for (size_t i = 0; i < nt; ++i) { std::cout << hOut[i] << '\n'; } // Release resources. 
-    //cuMemFree(dOut); 
-    //cuModuleUnload(module); 
 }

 void runtime::gpu::GPU_Emitter::EmitAdd(codegen::CodeWriter& writer,