Updated gpu cpp files with consistent use of namespaces (cosmetic) (#629)

* Updated namespace use in cpp files.

Updated gpu cpp files with consistent use of namespaces (cosmetic) (#629)
* Updated namespace use in cpp files.
b5467550 · Chris Sullivan · GitHub · a32fdab5 · b5467550 · b5467550
Unverified Commit b5467550 authored Mar 13, 2018 by Chris Sullivan Committed by GitHub Mar 13, 2018
6 changed files
--- a/src/ngraph/runtime/gpu/gpu_cuda_context_manager.cpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_context_manager.cpp
@@ -19,25 +19,18 @@

 #include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp"

-namespace ngraph
+using namespace ngraph;
+
+runtime::gpu::CudaContextManager& runtime::gpu::CudaContextManager::instance()
 {
-    namespace runtime
-    {
-        namespace gpu
-        {
-            CudaContextManager& CudaContextManager::instance()
-            {
-                static CudaContextManager manager;
-                return manager;
-            }
+    static CudaContextManager manager;
+    return manager;
+}

-            CudaContextManager::CudaContextManager()
-            {
-                CUDA_SAFE_CALL(cuInit(0));
-                CUDA_SAFE_CALL(cuDeviceGet(&m_device, 0));
-                CUDA_SAFE_CALL(cuCtxCreate(&m_context, 0, m_device));
-                m_context_ptr = std::make_shared<CUcontext>(m_context);
-            }
-        }
-    }
+runtime::gpu::CudaContextManager::CudaContextManager()
+{
+    CUDA_SAFE_CALL(cuInit(0));
+    CUDA_SAFE_CALL(cuDeviceGet(&m_device, 0));
+    CUDA_SAFE_CALL(cuCtxCreate(&m_context, 0, m_device));
+    m_context_ptr = std::make_shared<CUcontext>(m_context);
 }
--- a/src/ngraph/runtime/gpu/gpu_cuda_function_builder.cpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_function_builder.cpp
@@ -20,46 +20,39 @@
 #include "ngraph/runtime/gpu/gpu_cuda_function_builder.hpp"
 #include "ngraph/runtime/gpu/gpu_util.hpp"

-namespace ngraph
+using namespace ngraph;
+
+std::shared_ptr<CUfunction> runtime::gpu::CudaFunctionBuilder::get(const std::string& name,
+                                                                   const std::string& kernel,
+                                                                   int number_of_options,
+                                                                   const char** options)
 {
-    namespace runtime
-    {
-        namespace gpu
-        {
-            std::shared_ptr<CUfunction> CudaFunctionBuilder::get(const std::string& name,
-                                                                 const std::string& kernel,
-                                                                 int number_of_options,
-                                                                 const char** options)
-            {
-                nvrtcProgram prog;
-                NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog,
-                                                   kernel.c_str(),
-                                                   "op.cu",
-                                                   0,      // numHeaders
-                                                   NULL,   // headers
-                                                   NULL)); // includeNames
+    nvrtcProgram prog;
+    NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog,
+                                       kernel.c_str(),
+                                       "op.cu",
+                                       0,      // numHeaders
+                                       NULL,   // headers
+                                       NULL)); // includeNames

-                nvrtcResult compile_result = nvrtcCompileProgram(prog, number_of_options, options);
+    nvrtcResult compile_result = nvrtcCompileProgram(prog, number_of_options, options);

-                if (compile_result != NVRTC_SUCCESS)
-                {
-                    throw std::runtime_error("compile error: \n" + kernel + "\n options");
-                }
+    if (compile_result != NVRTC_SUCCESS)
+    {
+        throw std::runtime_error("compile error: \n" + kernel + "\n options");
+    }

-                size_t ptx_size;
-                NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &ptx_size));
-                char* ptx = new char[ptx_size];
-                NVRTC_SAFE_CALL(nvrtcGetPTX(
-                    prog,
+    size_t ptx_size;
+    NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &ptx_size));
+    char* ptx = new char[ptx_size];
+    NVRTC_SAFE_CALL(
+        nvrtcGetPTX(prog,
                    ptx)); // Load the generated PTX and get a handle to the parent kernel.
-                NVRTC_SAFE_CALL(nvrtcDestroyProgram(&prog)); // Destroy the program.
+    NVRTC_SAFE_CALL(nvrtcDestroyProgram(&prog)); // Destroy the program.

-                CUmodule module;
-                CUfunction function;
-                CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
-                CUDA_SAFE_CALL(cuModuleGetFunction(&function, module, name.c_str()));
-                return std::make_shared<CUfunction>(function);
-            }
-        }
-    }
+    CUmodule module;
+    CUfunction function;
+    CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
+    CUDA_SAFE_CALL(cuModuleGetFunction(&function, module, name.c_str()));
+    return std::make_shared<CUfunction>(function);
 }
--- a/src/ngraph/runtime/gpu/gpu_cuda_function_pool.cpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_function_pool.cpp
@@ -26,40 +26,31 @@

 static const std::string s_output_dir = "gpu_codegen";

-namespace ngraph
+using namespace ngraph;
+
+runtime::gpu::CudaFunctionPool& runtime::gpu::CudaFunctionPool::instance()
 {
-    namespace runtime
-    {
-        namespace gpu
-        {
-            CudaFunctionPool& CudaFunctionPool::instance()
-            {
-                static CudaFunctionPool pool;
-                return pool;
-            }
+    static CudaFunctionPool pool;
+    return pool;
+}

-            void CudaFunctionPool::set(const std::string& name, const std::string& kernel)
-            {
-                const char* opts[] = {"--gpu-architecture=compute_35",
-                                      "--relocatable-device-code=true"};
-                std::string filename =
-                    file_util::path_join(s_output_dir, "cuda_kernel_" + name + "_codegen.cu");
-                std::ofstream out(filename);
-                out << kernel;
-                out.close();
-                m_function_map.insert(
-                    {name, CudaFunctionBuilder::get("cuda_" + name, kernel, 2, opts)});
-            }
+void runtime::gpu::CudaFunctionPool::set(const std::string& name, const std::string& kernel)
+{
+    const char* opts[] = {"--gpu-architecture=compute_35", "--relocatable-device-code=true"};
+    std::string filename =
+        file_util::path_join(s_output_dir, "cuda_kernel_" + name + "_codegen.cu");
+    std::ofstream out(filename);
+    out << kernel;
+    out.close();
+    m_function_map.insert({name, CudaFunctionBuilder::get("cuda_" + name, kernel, 2, opts)});
+}

-            std::shared_ptr<CUfunction> CudaFunctionPool::get(const std::string& name)
-            {
-                auto it = m_function_map.find(name);
-                if (it != m_function_map.end())
-                {
-                    return (*it).second;
-                }
-                return nullptr;
-            }
-        }
+std::shared_ptr<CUfunction> runtime::gpu::CudaFunctionPool::get(const std::string& name)
+{
+    auto it = m_function_map.find(name);
+    if (it != m_function_map.end())
+    {
+        return (*it).second;
    }
+    return nullptr;
 }
--- a/src/ngraph/runtime/gpu/gpu_cuda_kernel_builder.cpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_kernel_builder.cpp
@@ -16,74 +16,67 @@
 #include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp"
 #include "ngraph/codegen/code_writer.hpp"

-namespace ngraph
+using namespace ngraph;
+
+void runtime::gpu::CudaKernelBuilder::get_elementwise_op(codegen::CodeWriter& writer,
+                                                         const std::string& name,
+                                                         const std::string& data_type,
+                                                         const std::string& op,
+                                                         const size_t& num_inputs)
 {
-    namespace runtime
+    writer << "extern \"C\" __global__ void cuda_" << name << "(";
+    for (size_t i = 0; i < num_inputs; i++)
+    {
+        writer << data_type << "* in" << i << ", ";
+    }
+    writer << data_type << "* out,"
+           << "size_t n)\n";
+    writer << "{\n";
+    writer.indent++;
    {
-        namespace gpu
+        writer << "size_t tid = blockIdx.x * blockDim.x + threadIdx.x; \n";
+        writer << "if (tid < n)\n";
+        writer << "{\n";
+        writer.indent++;
        {
-            void CudaKernelBuilder::get_elementwise_op(codegen::CodeWriter& writer,
-                                                       const std::string& name,
-                                                       const std::string& data_type,
-                                                       const std::string& op,
-                                                       const size_t& num_inputs)
+            writer << "out[tid] = " << op << "(";
+            for (size_t i = 0; i < num_inputs - 1; i++)
            {
-                writer << "extern \"C\" __global__ void cuda_" << name << "(";
-                for (size_t i = 0; i < num_inputs; i++)
-                {
-                    writer << data_type << "* in" << i << ", ";
-                }
-                writer << data_type << "* out,"
-                       << "size_t n)\n";
-                writer << "{\n";
-                writer.indent++;
-                {
-                    writer << "size_t tid = blockIdx.x * blockDim.x + threadIdx.x; \n";
-                    writer << "if (tid < n)\n";
-                    writer << "{\n";
-                    writer.indent++;
-                    {
-                        writer << "out[tid] = " << op << "(";
-                        for (size_t i = 0; i < num_inputs - 1; i++)
-                        {
-                            writer << "in" << i << "[tid], ";
-                        }
-                        writer << "in" << num_inputs - 1 << "[tid]);\n";
-                    }
-                    writer.indent--;
-                    writer << "}\n";
-                }
-                writer.indent--;
-                writer << "}\n";
-
-                return;
+                writer << "in" << i << "[tid], ";
            }
+            writer << "in" << num_inputs - 1 << "[tid]);\n";
+        }
+        writer.indent--;
+        writer << "}\n";
+    }
+    writer.indent--;
+    writer << "}\n";

-            void CudaKernelBuilder::get_device_helper(codegen::CodeWriter& writer,
-                                                      const std::string& name,
-                                                      const std::string& data_type,
-                                                      const std::string& math_kernel,
-                                                      const size_t& num_inputs)
-            {
-                if (math_kernel.size())
-                {
-                    writer << "__device__ " << data_type << " " << name << "(";
-                    for (size_t i = 0; i < num_inputs - 1; i++)
-                    {
-                        writer << data_type << " x" << i << ", ";
-                    }
-                    writer << data_type << " x" << num_inputs - 1;
-                    writer << ")\n";
-                    writer << "{\n";
-                    writer.indent++;
-                    {
-                        writer << "return " + math_kernel << ";\n";
-                    }
-                    writer.indent--;
-                    writer << "}\n";
-                }
-                return;
-            }
+    return;
+}
+
+void runtime::gpu::CudaKernelBuilder::get_device_helper(codegen::CodeWriter& writer,
+                                                        const std::string& name,
+                                                        const std::string& data_type,
+                                                        const std::string& math_kernel,
+                                                        const size_t& num_inputs)
+{
+    if (math_kernel.size())
+    {
+        writer << "__device__ " << data_type << " " << name << "(";
+        for (size_t i = 0; i < num_inputs - 1; i++)
+        {
+            writer << data_type << " x" << i << ", ";
+        }
+        writer << data_type << " x" << num_inputs - 1;
+        writer << ")\n";
+        writer << "{\n";
+        writer.indent++;
+        {
+            writer << "return " + math_kernel << ";\n";
        }
+        writer.indent--;
+        writer << "}\n";
    }
+    return;
 }
--- a/src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.cpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.cpp
@@ -20,26 +20,22 @@
 #include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
 #include "ngraph/runtime/gpu/gpu_cuda_kernel_ops.hpp"

-namespace ngraph
+using namespace ngraph;
+void runtime::gpu::emit_broadcast(
+    void* in, void* out, size_t repeat_size, size_t repeat_times, size_t count)
 {
-    namespace runtime
+    std::string name = "broadcast";
+    // Create an instance of nvrtcProgram with the code string.
+    if (CudaFunctionPool::instance().get(name) == nullptr)
    {
-        namespace gpu
-        {
-            void emit_broadcast(
-                void* in, void* out, size_t repeat_size, size_t repeat_times, size_t count)
-            {
-                std::string name = "broadcast";
-                // Create an instance of nvrtcProgram with the code string.
-                if (CudaFunctionPool::instance().get(name) == nullptr)
-                {
-                    std::string kernel;
-                    std::string data_type("float");
+        std::string kernel;
+        std::string data_type("float");

-                    kernel = R"(
+        kernel = R"(
 extern "C" __global__
-void cuda_)" + name + "(" + data_type +
-                             "* in, " + data_type + "* out, size_t m, size_t k, size_t n)\n" + R"(
+void cuda_)" + name +
+                 "(" + data_type + "* in, " + data_type + "* out, size_t m, size_t k, size_t n)\n" +
+                 R"(
 {
    size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
    if(tid < n)
@@ -48,28 +44,25 @@ void cuda_)" + name + "(" + data_type +
        out[tid] = in[idx];
    }
 })";
-                    CudaFunctionPool::instance().set(name, kernel);
-                }
+        CudaFunctionPool::instance().set(name, kernel);
+    }

-                //convert runtime ptr to driver api ptr
-                CUdeviceptr d_ptr_in, d_ptr_out;
-                d_ptr_in = CUdeviceptr(in);
-                d_ptr_out = CUdeviceptr(out);
+    //convert runtime ptr to driver api ptr
+    CUdeviceptr d_ptr_in, d_ptr_out;
+    d_ptr_in = CUdeviceptr(in);
+    d_ptr_out = CUdeviceptr(out);

-                void* args_list[] = {&d_ptr_in, &d_ptr_out, &repeat_size, &repeat_times, &count};
-                CUDA_SAFE_CALL(cuLaunchKernel(*CudaFunctionPool::instance().get(name).get(),
-                                              static_cast<unsigned int>(count),
-                                              1,
-                                              1, // grid dim
-                                              1,
-                                              1,
-                                              1, // block dim
-                                              0,
-                                              NULL, // shared mem and stream
-                                              args_list,
-                                              0));  // arguments
-                CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
-            }
-        }
-    }
+    void* args_list[] = {&d_ptr_in, &d_ptr_out, &repeat_size, &repeat_times, &count};
+    CUDA_SAFE_CALL(cuLaunchKernel(*CudaFunctionPool::instance().get(name).get(),
+                                  static_cast<unsigned int>(count),
+                                  1,
+                                  1, // grid dim
+                                  1,
+                                  1,
+                                  1, // block dim
+                                  0,
+                                  NULL, // shared mem and stream
+                                  args_list,
+                                  0));  // arguments
+    CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
 }
--- a/src/ngraph/runtime/gpu/gpu_external_function.cpp
+++ b/src/ngraph/runtime/gpu/gpu_external_function.cpp