Commit ee220ffb authored by fenglei.tian's avatar fenglei.tian

fix bugs and apply clang

parent a574bdaf
...@@ -14,8 +14,6 @@ ...@@ -14,8 +14,6 @@
* limitations under the License. * limitations under the License.
*******************************************************************************/ *******************************************************************************/
#pragma once
#include <memory> #include <memory>
#include <string> #include <string>
...@@ -27,7 +25,7 @@ namespace ngraph ...@@ -27,7 +25,7 @@ namespace ngraph
{ {
namespace gpu namespace gpu
{ {
static CudaContextManager::CudaContextManager& instance() CudaContextManager& CudaContextManager::instance()
{ {
static CudaContextManager manager; static CudaContextManager manager;
return manager; return manager;
......
...@@ -14,11 +14,10 @@ ...@@ -14,11 +14,10 @@
* limitations under the License. * limitations under the License.
*******************************************************************************/ *******************************************************************************/
#pragma once
#include <string> #include <string>
#include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp" #include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_function_builder.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp" #include "ngraph/runtime/gpu/gpu_util.hpp"
namespace ngraph namespace ngraph
...@@ -27,21 +26,20 @@ namespace ngraph ...@@ -27,21 +26,20 @@ namespace ngraph
{ {
namespace gpu namespace gpu
{ {
static std::shared_ptr<CUfunction> CudaFunctionBuilder::get(const std::string& name, std::shared_ptr<CUfunction> CudaFunctionBuilder::get(const std::string& name,
const std::string& kernel, const std::string& kernel,
int number_of_options, int number_of_options,
const char** options) const char** options)
{ {
nvrtcProgram prog; nvrtcProgram prog;
NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog, NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog,
kernel.c_str(), kernel.c_str(),
"op.cu", "op.cu",
0, // numHeaders 0, // numHeaders
NULL, // headers NULL, // headers
NULL)); // includeNames NULL)); // includeNames
nvrtcResult compile_result = nvrtcResult compile_result = nvrtcCompileProgram(prog, number_of_options, options);
nvrtcCompileProgram(prog, number_of_options, options);
if (compile_result != NVRTC_SUCCESS) if (compile_result != NVRTC_SUCCESS)
{ {
......
...@@ -33,38 +33,7 @@ namespace ngraph ...@@ -33,38 +33,7 @@ namespace ngraph
static std::shared_ptr<CUfunction> get(const std::string& name, static std::shared_ptr<CUfunction> get(const std::string& name,
const std::string& kernel, const std::string& kernel,
int number_of_options, int number_of_options,
const char** options) const char** options);
{
nvrtcProgram prog;
NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog,
kernel.c_str(),
"op.cu",
0, // numHeaders
NULL, // headers
NULL)); // includeNames
nvrtcResult compile_result =
nvrtcCompileProgram(prog, number_of_options, options);
if (compile_result != NVRTC_SUCCESS)
{
throw std::runtime_error("compile error: \n" + kernel + "\n options");
}
size_t ptx_size;
NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &ptx_size));
char* ptx = new char[ptx_size];
NVRTC_SAFE_CALL(nvrtcGetPTX(
prog,
ptx)); // Load the generated PTX and get a handle to the parent kernel.
NVRTC_SAFE_CALL(nvrtcDestroyProgram(&prog)); // Destroy the program.
CUmodule module;
CUfunction function;
CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
CUDA_SAFE_CALL(cuModuleGetFunction(&function, module, name.c_str()));
return std::make_shared<CUfunction>(function);
}
}; };
} }
} }
......
...@@ -14,8 +14,6 @@ ...@@ -14,8 +14,6 @@
* limitations under the License. * limitations under the License.
*******************************************************************************/ *******************************************************************************/
#pragma once
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
...@@ -27,7 +25,7 @@ namespace ngraph ...@@ -27,7 +25,7 @@ namespace ngraph
{ {
namespace gpu namespace gpu
{ {
static CudaFunctionPool::CudaFunctionPool& instance() CudaFunctionPool& CudaFunctionPool::instance()
{ {
static CudaFunctionPool pool; static CudaFunctionPool pool;
return pool; return pool;
......
...@@ -38,6 +38,7 @@ namespace ngraph ...@@ -38,6 +38,7 @@ namespace ngraph
void set(std::string& name, std::shared_ptr<CUfunction> function); void set(std::string& name, std::shared_ptr<CUfunction> function);
std::shared_ptr<CUfunction> get(std::string& name); std::shared_ptr<CUfunction> get(std::string& name);
protected: protected:
CudaFunctionPool() {} CudaFunctionPool() {}
~CudaFunctionPool() {} ~CudaFunctionPool() {}
......
...@@ -14,8 +14,6 @@ ...@@ -14,8 +14,6 @@
* limitations under the License. * limitations under the License.
*******************************************************************************/ *******************************************************************************/
#pragma once
#include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp" #include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp"
namespace ngraph namespace ngraph
...@@ -24,47 +22,48 @@ namespace ngraph ...@@ -24,47 +22,48 @@ namespace ngraph
{ {
namespace gpu namespace gpu
{ {
static void CudaKernelBuilder::get_1_element_op(const std::string& name, void CudaKernelBuilder::get_1_element_op(const std::string& name,
const std::string& data_type, const std::string& data_type,
const std::string& op, const std::string& op,
std::string& kernel) std::string& kernel)
{ {
kernel = R"( kernel = R"(
extern "C" __global__ extern "C" __global__
void cuda_)" + name + "(" + data_type + void cuda_)" + name + "(" +
"* in, " + data_type + "* out, size_t n)\n" + R"({ data_type + "* in, " + data_type + "* out, size_t n)\n" + R"({
size_t tid = blockIdx.x * blockDim.x + threadIdx.x; size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < n) if(tid < n)
{ {
out[tid] =)" + op + "(in[tid]);\n" + out[tid] =)" + op + "(in[tid]);\n" +
R"(} R"(}
})"; })";
return; return;
} }
static void CudaKernelBuilder::get_2_element_op(const std::string& name, void CudaKernelBuilder::get_2_element_op(const std::string& name,
const std::string& data_type, const std::string& data_type,
const std::string& op, const std::string& op,
std::string& kernel) std::string& kernel)
{ {
kernel = R"( kernel = R"(
extern "C" __global__ extern "C" __global__
void )" + name + "(" + data_type + void )" + name + "(" + data_type +
"* in1, " + data_type + "* in2, " + data_type + "* out, size_t n)\n" + "* in1, " + data_type + "* in2, " + data_type + "* out, size_t n)\n" +
R"({ R"({
size_t tid = blockIdx.x * blockDim.x + threadIdx.x; size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < n) if(tid < n)
{ {
out[tid] = in1[tid] )" + op + "in2[tid]\n" + out[tid] = in1[tid] )" + op +
R"(} "in2[tid]\n" +
R"(}
})"; })";
return; return;
} }
static void CudaKernelBuilder::get_n_element_op(const std::string& name, void CudaKernelBuilder::get_n_element_op(const std::string& name,
const std::string& data_type, const std::string& data_type,
const std::vector<std::string>& ops, const std::vector<std::string>& ops,
std::string& kernel) std::string& kernel)
{ {
kernel = ""; kernel = "";
return; return;
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#pragma once #pragma once
#include <string> #include <string>
#include <vector>
namespace ngraph namespace ngraph
{ {
...@@ -41,6 +42,7 @@ namespace ngraph ...@@ -41,6 +42,7 @@ namespace ngraph
const std::string& data_type, const std::string& data_type,
const std::vector<std::string>& ops, const std::vector<std::string>& ops,
std::string& kernel); std::string& kernel);
};
} }
} }
} }
...@@ -28,44 +28,38 @@ namespace ngraph ...@@ -28,44 +28,38 @@ namespace ngraph
{ {
namespace gpu namespace gpu
{ {
namespace cuda void emit_abs(void* in, void* out, size_t count)
{ {
namespace kernel std::string name = "abs";
// Create an instance of nvrtcProgram with the code string.
if (CudaFunctionPool::instance().get(name) == nullptr)
{ {
void emit_abs(void* in, void* out, size_t count) const char* opts[] = {"--gpu-architecture=compute_35",
{ "--relocatable-device-code=true"};
std::string name = "abs"; std::string kernel;
// Create an instance of nvrtcProgram with the code string. CudaKernelBuilder::get_1_element_op(name, "float", "fabsf", kernel);
if (CudaFunctionPool::instance().get(name) == nullptr) CudaFunctionPool::instance().set(
{ name, CudaFunctionBuilder::get("cuda_" + name, kernel, 2, opts));
const char* opts[] = {"--gpu-architecture=compute_35", }
"--relocatable-device-code=true"};
std::string kernel;
CudaKernelBuilder::get_1_element_op(name, "float", "fabsf", kernel);
CudaFunctionPool::instance().set(
name, CudaFunctionBuilder::get("cuda_" + name, kernel, 2, opts));
}
//convert runtime ptr to driver api ptr //convert runtime ptr to driver api ptr
CUdeviceptr d_ptr_in, d_ptr_out; CUdeviceptr d_ptr_in, d_ptr_out;
d_ptr_in = (CUdeviceptr)in; d_ptr_in = (CUdeviceptr)in;
d_ptr_out = (CUdeviceptr)out; d_ptr_out = (CUdeviceptr)out;
void* args_list[] = {&d_ptr_in, &d_ptr_out, &count}; void* args_list[] = {&d_ptr_in, &d_ptr_out, &count};
CUDA_SAFE_CALL(cuLaunchKernel(*CudaFunctionPool::instance().get(name).get(), CUDA_SAFE_CALL(cuLaunchKernel(*CudaFunctionPool::instance().get(name).get(),
count, count,
1, 1,
1, // grid dim 1, // grid dim
1, 1,
1, 1,
1, // block dim 1, // block dim
0, 0,
NULL, // shared mem and stream NULL, // shared mem and stream
args_list, args_list,
0)); // arguments 0)); // arguments
CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output. CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
}
}
} }
} }
} }
......
...@@ -25,13 +25,7 @@ namespace ngraph ...@@ -25,13 +25,7 @@ namespace ngraph
{ {
namespace gpu namespace gpu
{ {
namespace cuda void emit_abs(void* in, void* out, size_t count);
{
namespace kernel
{
void emit_abs(void* in, void* out, size_t count);
}
}
} }
} }
} }
...@@ -90,8 +90,8 @@ void runtime::gpu::GPU_Emitter::EmitAbs(codegen::CodeWriter& writer, ...@@ -90,8 +90,8 @@ void runtime::gpu::GPU_Emitter::EmitAbs(codegen::CodeWriter& writer,
writer.indent++; writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n"; writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
writer << "ngraph::runtime::gpu::cuda::kernel::emit_abs((void*) " << args[0].get_name() writer << "ngraph::runtime::gpu::emit_abs((void*) " << args[0].get_name() << ", (void*) "
<< ", (void*) " << out[0].get_name() << ", count);\n"; << out[0].get_name() << ", count);\n";
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
} }
......
...@@ -16,6 +16,11 @@ ...@@ -16,6 +16,11 @@
#pragma once #pragma once
#include <memory>
#include <stdexcept>
#include <string>
#include <vector>
#include <cublas_v2.h> #include <cublas_v2.h>
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment