Commit ee220ffb authored by fenglei.tian's avatar fenglei.tian

fix bugs and apply clang

parent a574bdaf
...@@ -14,8 +14,6 @@ ...@@ -14,8 +14,6 @@
* limitations under the License. * limitations under the License.
*******************************************************************************/ *******************************************************************************/
#pragma once
#include <memory> #include <memory>
#include <string> #include <string>
...@@ -27,7 +25,7 @@ namespace ngraph ...@@ -27,7 +25,7 @@ namespace ngraph
{ {
namespace gpu namespace gpu
{ {
static CudaContextManager::CudaContextManager& instance() CudaContextManager& CudaContextManager::instance()
{ {
static CudaContextManager manager; static CudaContextManager manager;
return manager; return manager;
......
...@@ -14,11 +14,10 @@ ...@@ -14,11 +14,10 @@
* limitations under the License. * limitations under the License.
*******************************************************************************/ *******************************************************************************/
#pragma once
#include <string> #include <string>
#include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp" #include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_function_builder.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp" #include "ngraph/runtime/gpu/gpu_util.hpp"
namespace ngraph namespace ngraph
...@@ -27,7 +26,7 @@ namespace ngraph ...@@ -27,7 +26,7 @@ namespace ngraph
{ {
namespace gpu namespace gpu
{ {
static std::shared_ptr<CUfunction> CudaFunctionBuilder::get(const std::string& name, std::shared_ptr<CUfunction> CudaFunctionBuilder::get(const std::string& name,
const std::string& kernel, const std::string& kernel,
int number_of_options, int number_of_options,
const char** options) const char** options)
...@@ -40,8 +39,7 @@ namespace ngraph ...@@ -40,8 +39,7 @@ namespace ngraph
NULL, // headers NULL, // headers
NULL)); // includeNames NULL)); // includeNames
nvrtcResult compile_result = nvrtcResult compile_result = nvrtcCompileProgram(prog, number_of_options, options);
nvrtcCompileProgram(prog, number_of_options, options);
if (compile_result != NVRTC_SUCCESS) if (compile_result != NVRTC_SUCCESS)
{ {
......
...@@ -33,38 +33,7 @@ namespace ngraph ...@@ -33,38 +33,7 @@ namespace ngraph
static std::shared_ptr<CUfunction> get(const std::string& name, static std::shared_ptr<CUfunction> get(const std::string& name,
const std::string& kernel, const std::string& kernel,
int number_of_options, int number_of_options,
const char** options) const char** options);
{
nvrtcProgram prog;
NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog,
kernel.c_str(),
"op.cu",
0, // numHeaders
NULL, // headers
NULL)); // includeNames
nvrtcResult compile_result =
nvrtcCompileProgram(prog, number_of_options, options);
if (compile_result != NVRTC_SUCCESS)
{
throw std::runtime_error("compile error: \n" + kernel + "\n options");
}
size_t ptx_size;
NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &ptx_size));
char* ptx = new char[ptx_size];
NVRTC_SAFE_CALL(nvrtcGetPTX(
prog,
ptx)); // Load the generated PTX and get a handle to the parent kernel.
NVRTC_SAFE_CALL(nvrtcDestroyProgram(&prog)); // Destroy the program.
CUmodule module;
CUfunction function;
CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
CUDA_SAFE_CALL(cuModuleGetFunction(&function, module, name.c_str()));
return std::make_shared<CUfunction>(function);
}
}; };
} }
} }
......
...@@ -14,8 +14,6 @@ ...@@ -14,8 +14,6 @@
* limitations under the License. * limitations under the License.
*******************************************************************************/ *******************************************************************************/
#pragma once
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
...@@ -27,7 +25,7 @@ namespace ngraph ...@@ -27,7 +25,7 @@ namespace ngraph
{ {
namespace gpu namespace gpu
{ {
static CudaFunctionPool::CudaFunctionPool& instance() CudaFunctionPool& CudaFunctionPool::instance()
{ {
static CudaFunctionPool pool; static CudaFunctionPool pool;
return pool; return pool;
......
...@@ -38,6 +38,7 @@ namespace ngraph ...@@ -38,6 +38,7 @@ namespace ngraph
void set(std::string& name, std::shared_ptr<CUfunction> function); void set(std::string& name, std::shared_ptr<CUfunction> function);
std::shared_ptr<CUfunction> get(std::string& name); std::shared_ptr<CUfunction> get(std::string& name);
protected: protected:
CudaFunctionPool() {} CudaFunctionPool() {}
~CudaFunctionPool() {} ~CudaFunctionPool() {}
......
...@@ -14,8 +14,6 @@ ...@@ -14,8 +14,6 @@
* limitations under the License. * limitations under the License.
*******************************************************************************/ *******************************************************************************/
#pragma once
#include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp" #include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp"
namespace ngraph namespace ngraph
...@@ -24,15 +22,15 @@ namespace ngraph ...@@ -24,15 +22,15 @@ namespace ngraph
{ {
namespace gpu namespace gpu
{ {
static void CudaKernelBuilder::get_1_element_op(const std::string& name, void CudaKernelBuilder::get_1_element_op(const std::string& name,
const std::string& data_type, const std::string& data_type,
const std::string& op, const std::string& op,
std::string& kernel) std::string& kernel)
{ {
kernel = R"( kernel = R"(
extern "C" __global__ extern "C" __global__
void cuda_)" + name + "(" + data_type + void cuda_)" + name + "(" +
"* in, " + data_type + "* out, size_t n)\n" + R"({ data_type + "* in, " + data_type + "* out, size_t n)\n" + R"({
size_t tid = blockIdx.x * blockDim.x + threadIdx.x; size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < n) if(tid < n)
{ {
...@@ -42,7 +40,7 @@ out[tid] =)" + op + "(in[tid]);\n" + ...@@ -42,7 +40,7 @@ out[tid] =)" + op + "(in[tid]);\n" +
return; return;
} }
static void CudaKernelBuilder::get_2_element_op(const std::string& name, void CudaKernelBuilder::get_2_element_op(const std::string& name,
const std::string& data_type, const std::string& data_type,
const std::string& op, const std::string& op,
std::string& kernel) std::string& kernel)
...@@ -55,13 +53,14 @@ void )" + name + "(" + data_type + ...@@ -55,13 +53,14 @@ void )" + name + "(" + data_type +
size_t tid = blockIdx.x * blockDim.x + threadIdx.x; size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < n) if(tid < n)
{ {
out[tid] = in1[tid] )" + op + "in2[tid]\n" + out[tid] = in1[tid] )" + op +
"in2[tid]\n" +
R"(} R"(}
})"; })";
return; return;
} }
static void CudaKernelBuilder::get_n_element_op(const std::string& name, void CudaKernelBuilder::get_n_element_op(const std::string& name,
const std::string& data_type, const std::string& data_type,
const std::vector<std::string>& ops, const std::vector<std::string>& ops,
std::string& kernel) std::string& kernel)
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#pragma once #pragma once
#include <string> #include <string>
#include <vector>
namespace ngraph namespace ngraph
{ {
...@@ -41,6 +42,7 @@ namespace ngraph ...@@ -41,6 +42,7 @@ namespace ngraph
const std::string& data_type, const std::string& data_type,
const std::vector<std::string>& ops, const std::vector<std::string>& ops,
std::string& kernel); std::string& kernel);
};
} }
} }
} }
...@@ -27,10 +27,6 @@ namespace ngraph ...@@ -27,10 +27,6 @@ namespace ngraph
namespace runtime namespace runtime
{ {
namespace gpu namespace gpu
{
namespace cuda
{
namespace kernel
{ {
void emit_abs(void* in, void* out, size_t count) void emit_abs(void* in, void* out, size_t count)
{ {
...@@ -67,6 +63,4 @@ namespace ngraph ...@@ -67,6 +63,4 @@ namespace ngraph
} }
} }
} }
}
}
} }
...@@ -24,14 +24,8 @@ namespace ngraph ...@@ -24,14 +24,8 @@ namespace ngraph
namespace runtime namespace runtime
{ {
namespace gpu namespace gpu
{
namespace cuda
{
namespace kernel
{ {
void emit_abs(void* in, void* out, size_t count); void emit_abs(void* in, void* out, size_t count);
} }
} }
}
}
} }
...@@ -90,8 +90,8 @@ void runtime::gpu::GPU_Emitter::EmitAbs(codegen::CodeWriter& writer, ...@@ -90,8 +90,8 @@ void runtime::gpu::GPU_Emitter::EmitAbs(codegen::CodeWriter& writer,
writer.indent++; writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n"; writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
writer << "ngraph::runtime::gpu::cuda::kernel::emit_abs((void*) " << args[0].get_name() writer << "ngraph::runtime::gpu::emit_abs((void*) " << args[0].get_name() << ", (void*) "
<< ", (void*) " << out[0].get_name() << ", count);\n"; << out[0].get_name() << ", count);\n";
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
} }
......
...@@ -16,6 +16,11 @@ ...@@ -16,6 +16,11 @@
#pragma once #pragma once
#include <memory>
#include <stdexcept>
#include <string>
#include <vector>
#include <cublas_v2.h> #include <cublas_v2.h>
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment