Unverified Commit b5467550 authored by Chris Sullivan's avatar Chris Sullivan Committed by GitHub

Updated gpu cpp files with consistent use of namespaces (cosmetic) (#629)

* Updated namespace use in cpp files.
parent a32fdab5
......@@ -19,25 +19,18 @@
#include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp"
namespace ngraph
using namespace ngraph;
runtime::gpu::CudaContextManager& runtime::gpu::CudaContextManager::instance()
{
namespace runtime
{
namespace gpu
{
CudaContextManager& CudaContextManager::instance()
{
static CudaContextManager manager;
return manager;
}
}
CudaContextManager::CudaContextManager()
{
runtime::gpu::CudaContextManager::CudaContextManager()
{
CUDA_SAFE_CALL(cuInit(0));
CUDA_SAFE_CALL(cuDeviceGet(&m_device, 0));
CUDA_SAFE_CALL(cuCtxCreate(&m_context, 0, m_device));
m_context_ptr = std::make_shared<CUcontext>(m_context);
}
}
}
}
......@@ -20,17 +20,13 @@
#include "ngraph/runtime/gpu/gpu_cuda_function_builder.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp"
namespace ngraph
{
namespace runtime
{
namespace gpu
{
std::shared_ptr<CUfunction> CudaFunctionBuilder::get(const std::string& name,
using namespace ngraph;
std::shared_ptr<CUfunction> runtime::gpu::CudaFunctionBuilder::get(const std::string& name,
const std::string& kernel,
int number_of_options,
const char** options)
{
{
nvrtcProgram prog;
NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog,
kernel.c_str(),
......@@ -49,8 +45,8 @@ namespace ngraph
size_t ptx_size;
NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &ptx_size));
char* ptx = new char[ptx_size];
NVRTC_SAFE_CALL(nvrtcGetPTX(
prog,
NVRTC_SAFE_CALL(
nvrtcGetPTX(prog,
ptx)); // Load the generated PTX and get a handle to the parent kernel.
NVRTC_SAFE_CALL(nvrtcDestroyProgram(&prog)); // Destroy the program.
......@@ -59,7 +55,4 @@ namespace ngraph
CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
CUDA_SAFE_CALL(cuModuleGetFunction(&function, module, name.c_str()));
return std::make_shared<CUfunction>(function);
}
}
}
}
......@@ -26,40 +26,31 @@
static const std::string s_output_dir = "gpu_codegen";
namespace ngraph
using namespace ngraph;
runtime::gpu::CudaFunctionPool& runtime::gpu::CudaFunctionPool::instance()
{
namespace runtime
{
namespace gpu
{
CudaFunctionPool& CudaFunctionPool::instance()
{
static CudaFunctionPool pool;
return pool;
}
}
void CudaFunctionPool::set(const std::string& name, const std::string& kernel)
{
const char* opts[] = {"--gpu-architecture=compute_35",
"--relocatable-device-code=true"};
void runtime::gpu::CudaFunctionPool::set(const std::string& name, const std::string& kernel)
{
const char* opts[] = {"--gpu-architecture=compute_35", "--relocatable-device-code=true"};
std::string filename =
file_util::path_join(s_output_dir, "cuda_kernel_" + name + "_codegen.cu");
std::ofstream out(filename);
out << kernel;
out.close();
m_function_map.insert(
{name, CudaFunctionBuilder::get("cuda_" + name, kernel, 2, opts)});
}
m_function_map.insert({name, CudaFunctionBuilder::get("cuda_" + name, kernel, 2, opts)});
}
std::shared_ptr<CUfunction> CudaFunctionPool::get(const std::string& name)
{
std::shared_ptr<CUfunction> runtime::gpu::CudaFunctionPool::get(const std::string& name)
{
auto it = m_function_map.find(name);
if (it != m_function_map.end())
{
return (*it).second;
}
return nullptr;
}
}
}
}
......@@ -16,18 +16,14 @@
#include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp"
#include "ngraph/codegen/code_writer.hpp"
namespace ngraph
{
namespace runtime
{
namespace gpu
{
void CudaKernelBuilder::get_elementwise_op(codegen::CodeWriter& writer,
using namespace ngraph;
void runtime::gpu::CudaKernelBuilder::get_elementwise_op(codegen::CodeWriter& writer,
const std::string& name,
const std::string& data_type,
const std::string& op,
const size_t& num_inputs)
{
{
writer << "extern \"C\" __global__ void cuda_" << name << "(";
for (size_t i = 0; i < num_inputs; i++)
{
......@@ -57,14 +53,14 @@ namespace ngraph
writer << "}\n";
return;
}
}
void CudaKernelBuilder::get_device_helper(codegen::CodeWriter& writer,
void runtime::gpu::CudaKernelBuilder::get_device_helper(codegen::CodeWriter& writer,
const std::string& name,
const std::string& data_type,
const std::string& math_kernel,
const size_t& num_inputs)
{
{
if (math_kernel.size())
{
writer << "__device__ " << data_type << " " << name << "(";
......@@ -83,7 +79,4 @@ namespace ngraph
writer << "}\n";
}
return;
}
}
}
}
......@@ -20,15 +20,10 @@
#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_ops.hpp"
namespace ngraph
{
namespace runtime
{
namespace gpu
{
void emit_broadcast(
using namespace ngraph;
void runtime::gpu::emit_broadcast(
void* in, void* out, size_t repeat_size, size_t repeat_times, size_t count)
{
{
std::string name = "broadcast";
// Create an instance of nvrtcProgram with the code string.
if (CudaFunctionPool::instance().get(name) == nullptr)
......@@ -38,8 +33,9 @@ namespace ngraph
kernel = R"(
extern "C" __global__
void cuda_)" + name + "(" + data_type +
"* in, " + data_type + "* out, size_t m, size_t k, size_t n)\n" + R"(
void cuda_)" + name +
"(" + data_type + "* in, " + data_type + "* out, size_t m, size_t k, size_t n)\n" +
R"(
{
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < n)
......@@ -69,7 +65,4 @@ void cuda_)" + name + "(" + data_type +
args_list,
0)); // arguments
CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
}
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment