fix bugs and apply clang

ee220ffb · fenglei.tian · a574bdaf · ee220ffb · ee220ffb · ee220ffb
Commit ee220ffb authored Feb 26, 2018 by fenglei.tian
11 changed files
--- a/src/ngraph/runtime/gpu/gpu_cuda_context_manager.cpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_context_manager.cpp
@@ -14,8 +14,6 @@
 * limitations under the License.
 *******************************************************************************/

-#pragma once
-
 #include <memory>
 #include <string>

@@ -27,7 +25,7 @@ namespace ngraph
    {
        namespace gpu
        {
-            static CudaContextManager::CudaContextManager& instance()
+            CudaContextManager& CudaContextManager::instance()
            {
                static CudaContextManager manager;
                return manager;

--- a/src/ngraph/runtime/gpu/gpu_cuda_function_builder.cpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_function_builder.cpp
@@ -14,11 +14,10 @@
 * limitations under the License.
 *******************************************************************************/

-#pragma once
-
 #include <string>

 #include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp"
+#include "ngraph/runtime/gpu/gpu_cuda_function_builder.hpp"
 #include "ngraph/runtime/gpu/gpu_util.hpp"

 namespace ngraph
@@ -27,21 +26,20 @@ namespace ngraph
    {
        namespace gpu
        {
-            static std::shared_ptr<CUfunction> CudaFunctionBuilder::get(const std::string& name,
-                                                    const std::string& kernel,
-                                                    int number_of_options,
-                                                    const char** options)
+            std::shared_ptr<CUfunction> CudaFunctionBuilder::get(const std::string& name,
+                                                                 const std::string& kernel,
+                                                                 int number_of_options,
+                                                                 const char** options)
            {
                nvrtcProgram prog;
                NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog,
-                                                    kernel.c_str(),
-                                                    "op.cu",
-                                                    0,      // numHeaders
-                                                    NULL,   // headers
-                                                    NULL)); // includeNames
+                                                   kernel.c_str(),
+                                                   "op.cu",
+                                                   0,      // numHeaders
+                                                   NULL,   // headers
+                                                   NULL)); // includeNames

-                nvrtcResult compile_result =
-                    nvrtcCompileProgram(prog, number_of_options, options);
+                nvrtcResult compile_result = nvrtcCompileProgram(prog, number_of_options, options);

                if (compile_result != NVRTC_SUCCESS)
                {

--- a/src/ngraph/runtime/gpu/gpu_cuda_function_builder.hpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_function_builder.hpp
@@ -33,38 +33,7 @@ namespace ngraph
                static std::shared_ptr<CUfunction> get(const std::string& name,
                                                       const std::string& kernel,
                                                       int number_of_options,
-                                                       const char** options)
-                {
-                    nvrtcProgram prog;
-                    NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog,
-                                                       kernel.c_str(),
-                                                       "op.cu",
-                                                       0,      // numHeaders
-                                                       NULL,   // headers
-                                                       NULL)); // includeNames
-
-                    nvrtcResult compile_result =
-                        nvrtcCompileProgram(prog, number_of_options, options);
-
-                    if (compile_result != NVRTC_SUCCESS)
-                    {
-                        throw std::runtime_error("compile error: \n" + kernel + "\n options");
-                    }
-
-                    size_t ptx_size;
-                    NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &ptx_size));
-                    char* ptx = new char[ptx_size];
-                    NVRTC_SAFE_CALL(nvrtcGetPTX(
-                        prog,
-                        ptx)); // Load the generated PTX and get a handle to the parent kernel.
-                    NVRTC_SAFE_CALL(nvrtcDestroyProgram(&prog)); // Destroy the program.
-
-                    CUmodule module;
-                    CUfunction function;
-                    CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
-                    CUDA_SAFE_CALL(cuModuleGetFunction(&function, module, name.c_str()));
-                    return std::make_shared<CUfunction>(function);
-                }
+                                                       const char** options);
            };
        }
    }

--- a/src/ngraph/runtime/gpu/gpu_cuda_function_pool.cpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_function_pool.cpp
@@ -14,8 +14,6 @@
 * limitations under the License.
 *******************************************************************************/

-#pragma once
-
 #include <string>
 #include <unordered_map>

@@ -27,7 +25,7 @@ namespace ngraph
    {
        namespace gpu
        {
-            static CudaFunctionPool::CudaFunctionPool& instance()
+            CudaFunctionPool& CudaFunctionPool::instance()
            {
                static CudaFunctionPool pool;
                return pool;

--- a/src/ngraph/runtime/gpu/gpu_cuda_function_pool.hpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_function_pool.hpp
@@ -38,6 +38,7 @@ namespace ngraph

                void set(std::string& name, std::shared_ptr<CUfunction> function);
                std::shared_ptr<CUfunction> get(std::string& name);
+
            protected:
                CudaFunctionPool() {}
                ~CudaFunctionPool() {}

--- a/src/ngraph/runtime/gpu/gpu_cuda_kernel_builder.cpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_kernel_builder.cpp
@@ -14,8 +14,6 @@
 * limitations under the License.
 *******************************************************************************/

-#pragma once
-
 #include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp"

 namespace ngraph
@@ -24,47 +22,48 @@ namespace ngraph
    {
        namespace gpu
        {
-            static void CudaKernelBuilder::get_1_element_op(const std::string& name,
-                                            const std::string& data_type,
-                                            const std::string& op,
-                                            std::string& kernel)
+            void CudaKernelBuilder::get_1_element_op(const std::string& name,
+                                                     const std::string& data_type,
+                                                     const std::string& op,
+                                                     std::string& kernel)
            {
                kernel = R"(  
 extern "C" __global__
-void cuda_)" + name + "(" + data_type +
-                            "* in, " + data_type + "* out, size_t n)\n" + R"({  
+void cuda_)" + name + "(" +
+                         data_type + "* in, " + data_type + "* out, size_t n)\n" + R"({  
 size_t tid = blockIdx.x * blockDim.x + threadIdx.x;  
 if(tid < n) 
 {
 out[tid] =)" + op + "(in[tid]);\n" +
-                            R"(}
+                         R"(}
 })";
                return;
            }

-            static void CudaKernelBuilder::get_2_element_op(const std::string& name,
-                                            const std::string& data_type,
-                                            const std::string& op,
-                                            std::string& kernel)
+            void CudaKernelBuilder::get_2_element_op(const std::string& name,
+                                                     const std::string& data_type,
+                                                     const std::string& op,
+                                                     std::string& kernel)
            {
                kernel = R"(  
 extern "C" __global__
 void )" + name + "(" + data_type +
-                            "* in1, " + data_type + "* in2, " + data_type + "* out, size_t n)\n" +
-                            R"({  
+                         "* in1, " + data_type + "* in2, " + data_type + "* out, size_t n)\n" +
+                         R"({  
 size_t tid = blockIdx.x * blockDim.x + threadIdx.x;  
 if(tid < n) 
 {
-out[tid] = in1[tid] )" + op + "in2[tid]\n" +
-                            R"(}
+out[tid] = in1[tid] )" + op +
+                         "in2[tid]\n" +
+                         R"(}
 })";
                return;
            }

-            static void CudaKernelBuilder::get_n_element_op(const std::string& name,
-                                            const std::string& data_type,
-                                            const std::vector<std::string>& ops,
-                                            std::string& kernel)
+            void CudaKernelBuilder::get_n_element_op(const std::string& name,
+                                                     const std::string& data_type,
+                                                     const std::vector<std::string>& ops,
+                                                     std::string& kernel)
            {
                kernel = "";
                return;

--- a/src/ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp
@@ -17,6 +17,7 @@
 #pragma once

 #include <string>
+#include <vector>

 namespace ngraph
 {
@@ -41,6 +42,7 @@ namespace ngraph
                                             const std::string& data_type,
                                             const std::vector<std::string>& ops,
                                             std::string& kernel);
+            };
        }
    }
 }
--- a/src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.cpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.cpp
@@ -28,44 +28,38 @@ namespace ngraph
    {
        namespace gpu
        {
-            namespace cuda
+            void emit_abs(void* in, void* out, size_t count)
            {
-                namespace kernel
+                std::string name = "abs";
+                // Create an instance of nvrtcProgram with the code string.
+                if (CudaFunctionPool::instance().get(name) == nullptr)
                {
-                    void emit_abs(void* in, void* out, size_t count)
-                    {
-                        std::string name = "abs";
-                        // Create an instance of nvrtcProgram with the code string.
-                        if (CudaFunctionPool::instance().get(name) == nullptr)
-                        {
-                            const char* opts[] = {"--gpu-architecture=compute_35",
-                                                  "--relocatable-device-code=true"};
-                            std::string kernel;
-                            CudaKernelBuilder::get_1_element_op(name, "float", "fabsf", kernel);
-                            CudaFunctionPool::instance().set(
-                                name, CudaFunctionBuilder::get("cuda_" + name, kernel, 2, opts));
-                        }
+                    const char* opts[] = {"--gpu-architecture=compute_35",
+                                          "--relocatable-device-code=true"};
+                    std::string kernel;
+                    CudaKernelBuilder::get_1_element_op(name, "float", "fabsf", kernel);
+                    CudaFunctionPool::instance().set(
+                        name, CudaFunctionBuilder::get("cuda_" + name, kernel, 2, opts));
+                }

-                        //convert runtime ptr to driver api ptr
-                        CUdeviceptr d_ptr_in, d_ptr_out;
-                        d_ptr_in = (CUdeviceptr)in;
-                        d_ptr_out = (CUdeviceptr)out;
+                //convert runtime ptr to driver api ptr
+                CUdeviceptr d_ptr_in, d_ptr_out;
+                d_ptr_in = (CUdeviceptr)in;
+                d_ptr_out = (CUdeviceptr)out;

-                        void* args_list[] = {&d_ptr_in, &d_ptr_out, &count};
-                        CUDA_SAFE_CALL(cuLaunchKernel(*CudaFunctionPool::instance().get(name).get(),
-                                                      count,
-                                                      1,
-                                                      1, // grid dim
-                                                      1,
-                                                      1,
-                                                      1, // block dim
-                                                      0,
-                                                      NULL, // shared mem and stream
-                                                      args_list,
-                                                      0));  // arguments
-                        CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
-                    }
-                }
+                void* args_list[] = {&d_ptr_in, &d_ptr_out, &count};
+                CUDA_SAFE_CALL(cuLaunchKernel(*CudaFunctionPool::instance().get(name).get(),
+                                              count,
+                                              1,
+                                              1, // grid dim
+                                              1,
+                                              1,
+                                              1, // block dim
+                                              0,
+                                              NULL, // shared mem and stream
+                                              args_list,
+                                              0));  // arguments
+                CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
            }
        }
    }

--- a/src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp
@@ -25,13 +25,7 @@ namespace ngraph
    {
        namespace gpu
        {
-            namespace cuda
-            {
-                namespace kernel
-                {
-                    void emit_abs(void* in, void* out, size_t count);
-                }
-            }
+            void emit_abs(void* in, void* out, size_t count);
        }
    }
 }
--- a/src/ngraph/runtime/gpu/gpu_emitter.cpp
+++ b/src/ngraph/runtime/gpu/gpu_emitter.cpp
@@ -90,8 +90,8 @@ void runtime::gpu::GPU_Emitter::EmitAbs(codegen::CodeWriter& writer,
    writer.indent++;
    writer << "int count = " << out[0].get_size() << ";\n";
    writer << "if(count == 0) return;\n";
-    writer << "ngraph::runtime::gpu::cuda::kernel::emit_abs((void*) " << args[0].get_name()
-           << ", (void*) " << out[0].get_name() << ", count);\n";
+    writer << "ngraph::runtime::gpu::emit_abs((void*) " << args[0].get_name() << ", (void*) "
+           << out[0].get_name() << ", count);\n";
    writer.indent--;
    writer << "}\n";
 }

--- a/src/ngraph/runtime/gpu/gpu_util.hpp
+++ b/src/ngraph/runtime/gpu/gpu_util.hpp
@@ -16,6 +16,11 @@

 #pragma once

+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
 #include <cublas_v2.h>
 #include <cuda.h>
 #include <cuda_runtime.h>