Commit 8e78e24c authored by Fenglei Tian's avatar Fenglei Tian

add cpp files

parent ba29c1f2
......@@ -241,6 +241,10 @@ endif()
runtime/gpu/gpu_tensor_view_wrapper.cpp
runtime/gpu/gpu_util.cpp
runtime/gpu/gpu_cuda_kernel_emitters.cpp
runtime/gpu/gpu_cuda_kernel_builder.cpp
runtime/gpu/gpu_cuda_function_builder.cpp
runtime/gpu/gpu_cuda_function_pool.cpp
runtime/gpu/gpu_cuda_context_manager.cpp
)
set_property(SOURCE codegen/compiler.cpp APPEND_STRING PROPERTY COMPILE_DEFINITIONS
"CUDA_HEADER_PATHS=\"${CUDA_INCLUDE_DIRS}\";")
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <memory>
#include <string>
#include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp"
namespace ngraph
{
namespace runtime
{
namespace gpu
{
static CudaContextManager::CudaContextManager& instance()
{
static CudaContextManager manager;
return manager;
}
CudaContextManager::CudaContextManager()
{
CUDA_SAFE_CALL(cuInit(0));
CUDA_SAFE_CALL(cuDeviceGet(&m_device, 0));
CUDA_SAFE_CALL(cuCtxCreate(&m_context, 0, m_device));
m_context_ptr = std::make_shared<CUcontext>(m_context);
}
}
}
}
......@@ -30,12 +30,7 @@ namespace ngraph
class CudaContextManager
{
public:
static CudaContextManager& instance()
{
static CudaContextManager manager;
return manager;
}
static CudaContextManager& instance();
CudaContextManager(CudaContextManager const&) = delete;
CudaContextManager(CudaContextManager&&) = delete;
CudaContextManager& operator=(CudaContextManager const&) = delete;
......@@ -43,13 +38,7 @@ namespace ngraph
std::shared_ptr<CUcontext> get_context() { return m_context_ptr; }
protected:
CudaContextManager()
{
CUDA_SAFE_CALL(cuInit(0));
CUDA_SAFE_CALL(cuDeviceGet(&m_device, 0));
CUDA_SAFE_CALL(cuCtxCreate(&m_context, 0, m_device));
m_context_ptr = std::make_shared<CUcontext>(m_context);
}
CudaContextManager();
~CudaContextManager() {}
CUdevice m_device;
CUcontext m_context;
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <string>
#include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp"
namespace ngraph
{
namespace runtime
{
namespace gpu
{
static std::shared_ptr<CUfunction> CudaFunctionBuilder::get(const std::string& name,
const std::string& kernel,
int number_of_options,
const char** options)
{
nvrtcProgram prog;
NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog,
kernel.c_str(),
"op.cu",
0, // numHeaders
NULL, // headers
NULL)); // includeNames
nvrtcResult compile_result =
nvrtcCompileProgram(prog, number_of_options, options);
if (compile_result != NVRTC_SUCCESS)
{
throw std::runtime_error("compile error: \n" + kernel + "\n options");
}
size_t ptx_size;
NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &ptx_size));
char* ptx = new char[ptx_size];
NVRTC_SAFE_CALL(nvrtcGetPTX(
prog,
ptx)); // Load the generated PTX and get a handle to the parent kernel.
NVRTC_SAFE_CALL(nvrtcDestroyProgram(&prog)); // Destroy the program.
CUmodule module;
CUfunction function;
CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
CUDA_SAFE_CALL(cuModuleGetFunction(&function, module, name.c_str()));
return std::make_shared<CUfunction>(function);
}
}
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <string>
#include <unordered_map>
#include "ngraph/runtime/gpu/gpu_cuda_function_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace gpu
{
static CudaFunctionPool::CudaFunctionPool& instance()
{
static CudaFunctionPool pool;
return pool;
}
void CudaFunctionPool::set(std::string& name, std::shared_ptr<CUfunction> function)
{
m_function_map.insert({name, function});
}
std::shared_ptr<CUfunction> CudaFunctionPool::get(std::string& name)
{
auto it = m_function_map.find(name);
if (it != m_function_map.end())
{
return (*it).second;
}
return nullptr;
}
}
}
}
......@@ -30,32 +30,14 @@ namespace ngraph
class CudaFunctionPool
{
public:
static CudaFunctionPool& instance()
{
static CudaFunctionPool pool;
return pool;
}
static CudaFunctionPool& instance();
CudaFunctionPool(CudaFunctionPool const&) = delete;
CudaFunctionPool(CudaFunctionPool&&) = delete;
CudaFunctionPool& operator=(CudaFunctionPool const&) = delete;
CudaFunctionPool& operator=(CudaFunctionPool&&) = delete;
void set(std::string& name, std::shared_ptr<CUfunction> function)
{
m_function_map.insert({name, function});
}
std::shared_ptr<CUfunction> get(std::string& name)
{
auto it = m_function_map.find(name);
if (it != m_function_map.end())
{
return (*it).second;
}
return nullptr;
}
void set(std::string& name, std::shared_ptr<CUfunction> function);
std::shared_ptr<CUfunction> get(std::string& name);
protected:
CudaFunctionPool() {}
~CudaFunctionPool() {}
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp"
namespace ngraph
{
namespace runtime
{
namespace gpu
{
static void CudaKernelBuilder::get_1_element_op(const std::string& name,
const std::string& data_type,
const std::string& op,
std::string& kernel)
{
kernel = R"(
extern "C" __global__
void cuda_)" + name + "(" + data_type +
"* in, " + data_type + "* out, size_t n)\n" + R"({
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < n)
{
out[tid] =)" + op + "(in[tid]);\n" +
R"(}
})";
return;
}
static void CudaKernelBuilder::get_2_element_op(const std::string& name,
const std::string& data_type,
const std::string& op,
std::string& kernel)
{
kernel = R"(
extern "C" __global__
void )" + name + "(" + data_type +
"* in1, " + data_type + "* in2, " + data_type + "* out, size_t n)\n" +
R"({
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < n)
{
out[tid] = in1[tid] )" + op + "in2[tid]\n" +
R"(}
})";
return;
}
static void CudaKernelBuilder::get_n_element_op(const std::string& name,
const std::string& data_type,
const std::vector<std::string>& ops,
std::string& kernel)
{
kernel = "";
return;
}
}
}
}
......@@ -30,49 +30,17 @@ namespace ngraph
static void get_1_element_op(const std::string& name,
const std::string& data_type,
const std::string& op,
std::string& kernel)
{
kernel = R"(
extern "C" __global__
void cuda_)" + name + "(" + data_type +
"* in, " + data_type + "* out, size_t n)\n" + R"({
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < n)
{
out[tid] =)" + op + "(in[tid]);\n" +
R"(}
})";
return;
}
std::string& kernel);
static void get_2_element_op(const std::string& name,
const std::string& data_type,
const std::string& op,
std::string& kernel)
{
kernel = R"(
extern "C" __global__
void )" + name + "(" + data_type +
"* in1, " + data_type + "* in2, " + data_type + "* out, size_t n)\n" +
R"({
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < n)
{
out[tid] = in1[tid] )" + op + "in2[tid]\n" +
R"(}
})";
return;
}
std::string& kernel);
static void get_n_element_op(const std::string& name,
const std::string& data_type,
const std::vector<std::string>& ops,
std::string& kernel)
{
kernel = "";
return;
}
};
std::string& kernel);
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment