Commit 1a91c924 authored by fenglei.tian's avatar fenglei.tian

Merge branch 'tfl/gpu_add_nvrtc' of github.com:NervanaSystems/private-ngraph-cpp…

Merge branch 'tfl/gpu_add_nvrtc' of github.com:NervanaSystems/private-ngraph-cpp into tfl/gpu_add_nvrtc
parents d23e5a70 e332d92b
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <string>
namespace ngraph
{
namespace runtime
{
namespace gpu
{
class Cuda_context_manager
{
public:
static Cuda_context_manager& Instance()
{
static Cuda_context_manager manager;
return pool;
}
Cuda_context_manager(Cuda_context_manager const&) = delete;
Cuda_context_manager(Cuda_context_manager&&) = delete;
Cuda_context_manager& operator=(Cuda_context_manager const&) = delete;
Cuda_context_manager& operator=(Cuda_context_manager &&) = delete;
std::shared_ptr<CUcontext> GetContext()
{
return context_ptr;
}
protected:
Cuda_context_manager()
{
CUDA_SAFE_CALL(cuInit(0));
CUDA_SAFE_CALL(cuDeviceGet(&cuDevice, 0));
CUDA_SAFE_CALL(cuCtxCreate(&context, 0, cuDevice));
context_ptr = std::make_shared<CUcontext>(context);
}
~Cuda_context_manager(){}
CUdevice cuDevice;
CUcontext context;
std::shared_ptr<CUcontext> context_ptr;
}
}
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <string>
#include "ngraph/runtime/gpu/gpu_util.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp"
namespace ngraph
{
namespace runtime
{
namespace gpu
{
class Cuda_function_builder
{
public:
static std::shared_ptr<CUfuction> get(std::string& kernel, std::string& name, int number_of_options, std::string options)
{
nvrtcProgram prog;
NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog,
kernel.c_str(),
"op.cu",
0, // numHeaders
NULL, // headers
NULL)); // includeNames
nvrtcResult compileResult = nvrtcCompileProgram(prog,
number_of_options,
options);
if (compileResult != NVRTC_SUCCESS) {
// size_t logSize;
// NVRTC_SAFE_CALL(nvrtcGetProgramLogSize(prog, &logSize));
// char *log = new char[logSize];
// NVRTC_SAFE_CALL(nvrtcGetProgramLog(prog, log));
// std::cout << log << '\n';
// delete[] log;
throw std::runtime_error("compile error: \n" + kernel + "\n options" + options);
}
size_t ptxSize;
NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &ptxSize));
char *ptx = new char[ptxSize];
NVRTC_SAFE_CALL(nvrtcGetPTX(prog, ptx)); // Load the generated PTX and get a handle to the parent kernel.
NVRTC_SAFE_CALL(nvrtcDestroyProgram(&prog)); // Destroy the program.
CUmodule module;
CUfunction function;
CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
CUDA_SAFE_CALL(cuModuleGetFunction(&function, module, name));
return std::make_shared<CUfunction>(function);
}
}
}
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <string>
namespace ngraph
{
namespace runtime
{
namespace gpu
{
class Cuda_function_pool
{
public:
static Cuda_function_pool& Instance()
{
static Cuda_function_pool pool;
return pool;
}
Cuda_function_pool(Cuda_function_pool const&) = delete;
Cuda_function_pool(Cuda_function_pool&&) = delete;
Cuda_function_pool& operator=(Cuda_function_pool const&) = delete;
Cuda_function_pool& operator=(Cuda_function_pool &&) = delete;
void Set(std::string& name, std::shared_ptr<CUfunction> function)
{
CUfunction_map.insert({name,function});
}
std::shared_ptr<CUfunction> Get(std::string& name)
{
auto it = CUfunction_map.find(name);
if(it != CUfunction_map.end())
{
return (*it).second;
}
return nullptr;
}
protected:
Cuda_function_pool(){}
~Cuda_function_pool(){}
std::unordered_map<std::string, std::shared_ptr<CUfunction>> CUfunction_map;
}
}
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <string>
namespace ngraph
{
namespace runtime
{
namespace gpu
{
class Cuda_kernel_builder
{
static std::string get_1_element_op(std::string& name, std::string& data_type, std::string& op, std::string& kernel)
{
kernel = R"(
extern "C" __global__
void cuda_op_)" + name + "(" + data_type + "* in, " + data_type + "* out, size_t n)\n"
+R"({
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < n)
{
out[tid] = " + op + "(in[tid]);\n"
+R"(}
})";
return;
}
static std::string get_2_element_op(std::string& name, std::string& data_type, std::string op, std::string& kernel)
{
kernel = R"(
extern "C" __global__
void cuda_op_)" + name + "(" + data_type + "* in1, " + data_type + "* in2, " + data_type + "* out, size_t n)\n"
+R"({
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < n)
{
out[tid] = in1[tid] )" + op + "in2[tid]\n"
+R"(}
})";
return;
}
static std::string get_n_element_op(std::string& name, std::string& data_type, std::vector<std::string> ops, std::string& kernel)
{
kernel = "";
return;
}
}
}
}
}
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <algorithm>
#include <map>
#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
#include <nvrtc.h>
#include <cuda.h>
......@@ -22,47 +25,10 @@
#include <cublas_v2.h>
#include <cudnn_v7.h>
#include "ngraph/node.hpp"
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/concatenate.hpp"
#include "ngraph/ops/constant.hpp"
#include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/dot.hpp"
#include "ngraph/ops/function_call.hpp"
#include "ngraph/ops/get_output_element.hpp"
#include "ngraph/ops/max_pool.hpp"
#include "ngraph/ops/one_hot.hpp"
#include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/replace_slice.hpp"
#include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/reverse.hpp"
#include "ngraph/ops/slice.hpp"
#include "ngraph/ops/sum.hpp"
#include "ngraph/util.hpp"
#define NVRTC_SAFE_CALL(x) \
do { \
nvrtcResult result = x; \
if (result != NVRTC_SUCCESS) { \
std::cerr << "\nerror: " #x " failed with error " \
<< nvrtcGetErrorString(result) << '\n'; \
exit(1); \
} \
} while(0)
#define CUDA_SAFE_CALL(x) \
do { \
CUresult result = x; \
if (result != CUDA_SUCCESS) { \
const char *msg; \
cuGetErrorName(result, &msg); \
std::cerr << "\nerror: " #x " failed with error " \
<< msg << '\n'; \
exit(1); \
} \
} while(0)
#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
#include "ngraph/runtime/gpu/gpu_cude_kernel_builder.hpp"
#include "ngraph/runtime/gpu/gpu_cude_function_builder.hpp"
#include "ngraph/runtime/gpu/gpu_cude_function_pool.hpp"
namespace ngraph
{
......@@ -77,69 +43,25 @@ namespace ngraph
void emit_abs(void* in, void* out, size_t count)
{
const char *op_abs = R"(
extern "C" __global__
void cuda_op_abs(float* in, float* out, size_t n)
{
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < n)
{
out[tid] = fabsf(in[tid]);
}
})";
// Create an instance of nvrtcProgram with the code string.
nvrtcProgram prog;
NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog,
op_abs,
"op_abs.cu",
0, // numHeaders
NULL, // headers
NULL)); // includeNames
const char *opts[] = {"--gpu-architecture=compute_35",
std::string name = "abs";
// Create an instance of nvrtcProgram with the code string.
if(Cuda_function_pool::Instance().get(name) == nullptr)
{
const char *opts[] = {"--gpu-architecture=compute_35",
"--relocatable-device-code=true"};
nvrtcResult compileResult = nvrtcCompileProgram(prog,
2,
opts);
size_t logSize;
NVRTC_SAFE_CALL(nvrtcGetProgramLogSize(prog, &logSize));
char *log = new char[logSize];
NVRTC_SAFE_CALL(nvrtcGetProgramLog(prog, log));
std::cout << log << '\n';
delete[] log;
if (compileResult != NVRTC_SUCCESS) {
exit(1);
std::string kernel;
Cuda_kernel_builder::get_1_element_op(name, "float", "fabsf",kernel);
Cuda_function_pool::Instance().set(name, Cuda_function_builder(name, kernel, 2, opts));
}
size_t ptxSize;
NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &ptxSize));
char *ptx = new char[ptxSize];
NVRTC_SAFE_CALL(nvrtcGetPTX(prog, ptx)); // Destroy the program.
NVRTC_SAFE_CALL(nvrtcDestroyProgram(&prog)); // Load the generated PTX and get a handle to the parent kernel.
CUdevice cuDevice;
CUcontext context;
CUmodule module;
CUfunction cuda_op_abs_kernel;
CUDA_SAFE_CALL( cuInit(0));
CUDA_SAFE_CALL(cuDeviceGet(&cuDevice, 0));
CUDA_SAFE_CALL(cuCtxCreate(&context, 0, cuDevice));
CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
CUDA_SAFE_CALL(cuModuleGetFunction(&cuda_op_abs_kernel, module, "cuda_op_abs"));
//convert runtime ptr to driver api ptr
CUdeviceptr dPtrIn, dPtrOut;
dPtrIn = (CUdeviceptr)in;
dPtrOut = (CUdeviceptr)out;
void *argsList[] = {&dPtrIn, &dPtrOut, &count};
CUDA_SAFE_CALL(
cuLaunchKernel(cuda_op_abs_kernel,
cuLaunchKernel(cudCuda_function_pool::Instance().get(name).get(),
count ,1, 1, // grid dim
1, 1, 1, // block dim
0, NULL, // shared mem and stream
......
......@@ -16,6 +16,28 @@
#pragma once
#define NVRTC_SAFE_CALL(x) \
do { \
nvrtcResult result = x; \
if (result != NVRTC_SUCCESS) { \
std::cerr << "\nerror: " #x " failed with error " \
<< nvrtcGetErrorString(result) << '\n'; \
exit(1); \
} \
} while(0)
#define CUDA_SAFE_CALL(x) \
do { \
CUresult result = x; \
if (result != CUDA_SUCCESS) { \
const char *msg; \
cuGetErrorName(result, &msg); \
std::cerr << "\nerror: " #x " failed with error " \
<< msg << '\n'; \
exit(1); \
} \
} while(0)
namespace ngraph
{
namespace runtime
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment