cuda kernel tests

89a56b08 · Fenglei Tian · b6c34bd3 · 89a56b08 · 89a56b08 · 89a56b08
Commit 89a56b08 authored Feb 09, 2018 by Fenglei Tian
Showing with 10 additions and 67 deletions

CMakeLists.txt src/ngraph/CMakeLists.txt +1 -0

gpu_cuda_kernel_emitters.cpp src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.cpp +8 -67

gpu_external_function.cpp src/ngraph/runtime/gpu/gpu_external_function.cpp +1 -0

No files found.
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -203,6 +203,7 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
            runtime/gpu/gpu_tensor_view.cpp
            runtime/gpu/gpu_tensor_view_wrapper.cpp
            runtime/gpu/gpu_util.cpp
+            runtime/gpu/gpu_cuda_kernel_emitter.cpp
            )
        set_property(SOURCE codegen/compiler.cpp APPEND_STRING PROPERTY COMPILE_DEFINITIONS
        "CUDA_HEADER_PATHS=\"${CUDA_INCLUDE_DIRS}\";")

--- a/src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.cpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.cpp
@@ -77,73 +77,14 @@ namespace ngraph

                    void emit_abs(void* in, void* out, size_t count)
                    {
-   const char *op_abs = R"(  
-  extern "C" __global__  
-  void cuda_op_abs(float* in, float* out, size_t n)  
-  {  
-    size_t tid = blockIdx.x * blockDim.x + threadIdx.x;  
-    if(tid < n) 
-    {
-      out[tid] = fabsf(in[tid]);  
-    }
-  })";
-
-        size_t numBlocks = 4; size_t numThreads = 4; 
-    // Create an instance of nvrtcProgram with the code string. 
-
-    nvrtcProgram prog; 
-    NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog, // prog i
-                op_abs, // buffer 
-                "op_abs.cu", // name 
-                0, // numHeaders 
-                NULL, // headers 
-                NULL)); // includeNames
-
-
-    const char *opts[] = {"--gpu-architecture=compute_35",
-        "--relocatable-device-code=true"};
-    nvrtcResult compileResult = nvrtcCompileProgram(prog, // prog 
-            2, // numOptions 
-            opts); // options
-    // Obtain compilation log from the program. 
-
-    size_t logSize; 
-
-    NVRTC_SAFE_CALL(nvrtcGetProgramLogSize(prog, &logSize)); 
-    char *log = new char[logSize]; 
-    NVRTC_SAFE_CALL(nvrtcGetProgramLog(prog, log)); 
-    std::cout << log << '\n'; 
-    delete[] log; 
-
-    if (compileResult != NVRTC_SUCCESS) {
-        exit(1);
-    }
-
-    size_t ptxSize; 
-    NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &ptxSize)); 
-    char *ptx = new char[ptxSize]; 
-    NVRTC_SAFE_CALL(nvrtcGetPTX(prog, ptx)); // Destroy the program. 
-    NVRTC_SAFE_CALL(nvrtcDestroyProgram(&prog)); // Load the generated PTX and get a handle to the parent kernel. 
-
-    CUdevice cuDevice; 
-    CUcontext context; 
-    CUmodule module; 
-    CUfunction cuda_op_abs_kernel; 
-    CUDA_SAFE_CALL( cuInit(0)); 
-    CUDA_SAFE_CALL(cuDeviceGet(&cuDevice, 0)); 
-    CUDA_SAFE_CALL(cuCtxCreate(&context, 0, cuDevice)); 
-    CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
-    CUDA_SAFE_CALL(cuModuleGetFunction(&cuda_op_abs_kernel, module, "cuda_op_abs"));
-
-    void *argsList[] = {In, Out, &count};
-    CUDA_SAFE_CALL(
-    cuLaunchKernel(cuda_op_abs_kernel, 
-       count , 1, 1, // grid dim 
-       1, 1, 1, // block dim 
-       0, NULL, // shared mem and stream 
-      argsList, 0)); // arguments 
-    CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output. 
-    
+                        void *argsList[] = {In, Out, &count};
+                        CUDA_SAFE_CALL(
+                        cuLaunchKernel(cuda_op_abs_kernel, 
+                        count , 1, 1, // grid dim 
+                        1, 1, 1, // block dim 
+                        0, NULL, // shared mem and stream 
+                        argsList, 0)); // arguments 
+                        CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output. 
                    }
                    void emit_broadcast(codegen::CodeWriter& writer,
                            const std::string& element_type,

--- a/src/ngraph/runtime/gpu/gpu_external_function.cpp
+++ b/src/ngraph/runtime/gpu/gpu_external_function.cpp
@@ -249,6 +249,7 @@ void runtime::gpu::GPU_ExternalFunction::compile()
    #include "ngraph/runtime/aligned_buffer.hpp"
    #include "ngraph/runtime/gpu/gpu_util.hpp"
    #include "ngraph/util.hpp"
+    #include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
 )";

    string pch_header_source = writer.get_code();