Commit ca07b7f0 authored by fenglei.tian's avatar fenglei.tian

fix compiling bugs

parent 1a91c924
......@@ -25,39 +25,41 @@
using namespace std;
using namespace ngraph;
#define NVRTC_SAFE_CALL(x) \
do { \
nvrtcResult result = x; \
if (result != NVRTC_SUCCESS) { \
std::cerr << "\nerror: " #x " failed with error " \
<< nvrtcGetErrorString(result) << '\n'; \
exit(1); \
} \
} while(0)
#define NVRTC_SAFE_CALL(x) \
do \
{ \
nvrtcResult result = x; \
if (result != NVRTC_SUCCESS) \
{ \
std::cerr << "\nerror: " #x " failed with error " << nvrtcGetErrorString(result) \
<< '\n'; \
exit(1); \
} \
} while (0)
#define CUDA_SAFE_CALL(x) \
do { \
CUresult result = x; \
if (result != CUDA_SUCCESS) { \
const char *msg; \
cuGetErrorName(result, &msg); \
std::cerr << "\nerror: " #x " failed with error " \
<< msg << '\n'; \
exit(1); \
} \
} while(0)
#define CUDA_SAFE_CALL(x) \
do \
{ \
CUresult result = x; \
if (result != CUDA_SUCCESS) \
{ \
const char* msg; \
cuGetErrorName(result, &msg); \
std::cerr << "\nerror: " #x " failed with error " << msg << '\n'; \
exit(1); \
} \
} while (0)
runtime::gpu::GPU_CallFrame::GPU_CallFrame(std::shared_ptr<GPU_ExternalFunction> external_function,
EntryPoint compiled_function)
: m_external_function(external_function)
, m_compiled_function(compiled_function)
{
CUdevice cuDevice;
CUcontext context;
CUmodule module;
CUfunction cuda_op_abs_kernel;
CUDA_SAFE_CALL( cuInit(0));
CUDA_SAFE_CALL(cuInit(0));
CUDA_SAFE_CALL(cuDeviceGet(&cuDevice, 0));
CUDA_SAFE_CALL(cuCtxCreate(&context, 0, cuDevice));
cublasStatus_t cublasStatus = cublasCreate(&m_cublas_handle);
......
......@@ -17,6 +17,7 @@
#pragma once
#include <string>
#include "ngraph/runtime/gpu/gpu_util.hpp"
namespace ngraph
{
......@@ -26,24 +27,20 @@ namespace ngraph
{
class Cuda_context_manager
{
public:
public:
static Cuda_context_manager& Instance()
{
static Cuda_context_manager manager;
return pool;
return manager;
}
Cuda_context_manager(Cuda_context_manager const&) = delete;
Cuda_context_manager(Cuda_context_manager&&) = delete;
Cuda_context_manager& operator=(Cuda_context_manager const&) = delete;
Cuda_context_manager& operator=(Cuda_context_manager &&) = delete;
Cuda_context_manager& operator=(Cuda_context_manager&&) = delete;
std::shared_ptr<CUcontext> GetContext()
{
return context_ptr;
}
protected:
std::shared_ptr<CUcontext> GetContext() { return context_ptr; }
protected:
Cuda_context_manager()
{
CUDA_SAFE_CALL(cuInit(0));
......@@ -51,11 +48,11 @@ namespace ngraph
CUDA_SAFE_CALL(cuCtxCreate(&context, 0, cuDevice));
context_ptr = std::make_shared<CUcontext>(context);
}
~Cuda_context_manager(){}
~Cuda_context_manager() {}
CUdevice cuDevice;
CUcontext context;
std::shared_ptr<CUcontext> context_ptr;
}
};
}
}
}
......@@ -18,8 +18,8 @@
#include <string>
#include "ngraph/runtime/gpu/gpu_util.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp"
namespace ngraph
{
......@@ -29,44 +29,49 @@ namespace ngraph
{
class Cuda_function_builder
{
public:
static std::shared_ptr<CUfuction> get(std::string& kernel, std::string& name, int number_of_options, std::string options)
public:
static std::shared_ptr<CUfunction> Get(std::string& kernel,
std::string& name,
int number_of_options,
const char** options)
{
nvrtcProgram prog;
NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog,
kernel.c_str(),
"op.cu",
0, // numHeaders
NULL, // headers
NULL)); // includeNames
nvrtcProgram prog;
NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog,
kernel.c_str(),
"op.cu",
0, // numHeaders
NULL, // headers
NULL)); // includeNames
nvrtcResult compileResult =
nvrtcCompileProgram(prog, number_of_options, options);
nvrtcResult compileResult = nvrtcCompileProgram(prog,
number_of_options,
options);
if (compileResult != NVRTC_SUCCESS)
{
// size_t logSize;
// NVRTC_SAFE_CALL(nvrtcGetProgramLogSize(prog, &logSize));
// char *log = new char[logSize];
// NVRTC_SAFE_CALL(nvrtcGetProgramLog(prog, log));
// std::cout << log << '\n';
// delete[] log;
throw std::runtime_error("compile error: \n" + kernel + "\n options");
}
if (compileResult != NVRTC_SUCCESS) {
// size_t logSize;
// NVRTC_SAFE_CALL(nvrtcGetProgramLogSize(prog, &logSize));
// char *log = new char[logSize];
// NVRTC_SAFE_CALL(nvrtcGetProgramLog(prog, log));
// std::cout << log << '\n';
// delete[] log;
throw std::runtime_error("compile error: \n" + kernel + "\n options" + options);
}
size_t ptxSize;
NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &ptxSize));
char* ptx = new char[ptxSize];
NVRTC_SAFE_CALL(nvrtcGetPTX(
prog,
ptx)); // Load the generated PTX and get a handle to the parent kernel.
NVRTC_SAFE_CALL(nvrtcDestroyProgram(&prog)); // Destroy the program.
size_t ptxSize;
NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &ptxSize));
char *ptx = new char[ptxSize];
NVRTC_SAFE_CALL(nvrtcGetPTX(prog, ptx)); // Load the generated PTX and get a handle to the parent kernel.
NVRTC_SAFE_CALL(nvrtcDestroyProgram(&prog)); // Destroy the program.
CUmodule module;
CUfunction function;
CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
CUDA_SAFE_CALL(cuModuleGetFunction(&function, module, name));
return std::make_shared<CUfunction>(function);
CUmodule module;
CUfunction function;
CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
CUDA_SAFE_CALL(cuModuleGetFunction(&function, module, name.c_str()));
return std::make_shared<CUfunction>(function);
}
}
};
}
}
}
......@@ -17,6 +17,9 @@
#pragma once
#include <string>
#include <unordered_map>
#include "ngraph/runtime/gpu/gpu_util.hpp"
namespace ngraph
{
......@@ -26,7 +29,7 @@ namespace ngraph
{
class Cuda_function_pool
{
public:
public:
static Cuda_function_pool& Instance()
{
static Cuda_function_pool pool;
......@@ -36,29 +39,28 @@ namespace ngraph
Cuda_function_pool(Cuda_function_pool const&) = delete;
Cuda_function_pool(Cuda_function_pool&&) = delete;
Cuda_function_pool& operator=(Cuda_function_pool const&) = delete;
Cuda_function_pool& operator=(Cuda_function_pool &&) = delete;
Cuda_function_pool& operator=(Cuda_function_pool&&) = delete;
void Set(std::string& name, std::shared_ptr<CUfunction> function)
{
CUfunction_map.insert({name,function});
CUfunction_map.insert({name, function});
}
std::shared_ptr<CUfunction> Get(std::string& name)
{
auto it = CUfunction_map.find(name);
if(it != CUfunction_map.end())
if (it != CUfunction_map.end())
{
return (*it).second;
}
return nullptr;
}
protected:
Cuda_function_pool(){}
~Cuda_function_pool(){}
protected:
Cuda_function_pool() {}
~Cuda_function_pool() {}
std::unordered_map<std::string, std::shared_ptr<CUfunction>> CUfunction_map;
}
};
}
}
}
......@@ -26,12 +26,16 @@ namespace ngraph
{
class Cuda_kernel_builder
{
static std::string get_1_element_op(std::string& name, std::string& data_type, std::string& op, std::string& kernel)
public:
static void Get_1_element_op(const std::string& name,
const std::string& data_type,
const std::string& op,
std::string& kernel)
{
kernel = R"(
extern "C" __global__
void cuda_op_)" + name + "(" + data_type + "* in, " + data_type + "* out, size_t n)\n"
+R"({
void cuda_op_)" + name + "(" +
data_type + "* in, " + data_type + "* out, size_t n)\n" + R"({
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < n)
{
......@@ -41,27 +45,34 @@ out[tid] = " + op + "(in[tid]);\n"
return;
}
static std::string get_2_element_op(std::string& name, std::string& data_type, std::string op, std::string& kernel)
static void Get_2_element_op(const std::string& name,
const std::string& data_type,
const std::string op,
std::string& kernel)
{
kernel = R"(
extern "C" __global__
void cuda_op_)" + name + "(" + data_type + "* in1, " + data_type + "* in2, " + data_type + "* out, size_t n)\n"
+R"({
void cuda_op_)" + name + "(" +
data_type + "* in1, " + data_type + "* in2, " + data_type +
"* out, size_t n)\n" + R"({
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < n)
{
out[tid] = in1[tid] )" + op + "in2[tid]\n"
+R"(}
out[tid] = in1[tid] )" + op + "in2[tid]\n" +
R"(}
})";
return;
}
static std::string get_n_element_op(std::string& name, std::string& data_type, std::vector<std::string> ops, std::string& kernel)
static void Get_n_element_op(const std::string& name,
const std::string& data_type,
const std::vector<std::string> ops,
std::string& kernel)
{
kernel = "";
return;
}
}
};
}
}
}
......@@ -17,18 +17,16 @@
#include <algorithm>
#include <map>
#include <nvrtc.h>
#include <cublas_v2.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cudnn_v7.h>
#include <nvrtc.h>
#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
#include "ngraph/runtime/gpu/gpu_cude_kernel_builder.hpp"
#include "ngraph/runtime/gpu/gpu_cude_function_builder.hpp"
#include "ngraph/runtime/gpu/gpu_cude_function_pool.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_function_builder.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_function_pool.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp"
namespace ngraph
{
......@@ -40,42 +38,48 @@ namespace ngraph
{
namespace kernel
{
void emit_abs(void* in, void* out, size_t count)
{
std::string name = "abs";
// Create an instance of nvrtcProgram with the code string.
if(Cuda_function_pool::Instance().get(name) == nullptr)
// Create an instance of nvrtcProgram with the code string.
if (Cuda_function_pool::Instance().Get(name) == nullptr)
{
const char *opts[] = {"--gpu-architecture=compute_35",
"--relocatable-device-code=true"};
const char* opts[] = {"--gpu-architecture=compute_35",
"--relocatable-device-code=true"};
std::string kernel;
Cuda_kernel_builder::get_1_element_op(name, "float", "fabsf",kernel);
Cuda_function_pool::Instance().set(name, Cuda_function_builder(name, kernel, 2, opts));
Cuda_kernel_builder::Get_1_element_op(name, "float", "fabsf", kernel);
Cuda_function_pool::Instance().Set(
name, Cuda_function_builder::Get(name, kernel, 2, opts));
}
//convert runtime ptr to driver api ptr
CUdeviceptr dPtrIn, dPtrOut;
dPtrIn = (CUdeviceptr)in;
dPtrOut = (CUdeviceptr)out;
void *argsList[] = {&dPtrIn, &dPtrOut, &count};
void* argsList[] = {&dPtrIn, &dPtrOut, &count};
CUDA_SAFE_CALL(
cuLaunchKernel(cudCuda_function_pool::Instance().get(name).get(),
count ,1, 1, // grid dim
1, 1, 1, // block dim
0, NULL, // shared mem and stream
argsList, 0)); // arguments
CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
cuLaunchKernel(*Cuda_function_pool::Instance().Get(name).get(),
count,
1,
1, // grid dim
1,
1,
1, // block dim
0,
NULL, // shared mem and stream
argsList,
0)); // arguments
CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
}
void emit_broadcast(codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const AxisSet& broadcast_axes)
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const AxisSet& broadcast_axes)
{
}
......@@ -83,61 +87,59 @@ namespace ngraph
// For the reference kernel this is gpud on, see ngraph/runtime/kernel/concat.hpp.
//
void emit_concat(codegen::CodeWriter& writer,
const std::string& element_type,
const std::vector<std::string>& args,
const std::string& out,
const std::vector<Shape>& in_shapes,
const Shape& out_shape,
size_t concatenation_axis)
const std::string& element_type,
const std::vector<std::string>& args,
const std::string& out,
const std::vector<Shape>& in_shapes,
const Shape& out_shape,
size_t concatenation_axis)
{
}
void emit_replace_slice(
codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& arg1, // replacement value
const std::string& out,
const Shape& arg1_shape,
const Shape& out_shape,
const Coordinate& lower_bounds,
const Coordinate& upper_bounds,
const Strides& strides)
void emit_replace_slice(codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& arg1, // replacement value
const std::string& out,
const Shape& arg1_shape,
const Shape& out_shape,
const Coordinate& lower_bounds,
const Coordinate& upper_bounds,
const Strides& strides)
{
}
void emit_slice(codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const Coordinate& lower_bounds,
const Coordinate& upper_bounds,
const Strides& strides)
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const Coordinate& lower_bounds,
const Coordinate& upper_bounds,
const Strides& strides)
{
}
void emit_reshape(codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const AxisVector& arg0_axis_order)
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const AxisVector& arg0_axis_order)
{
}
void emit_sum(codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const AxisSet& reduction_axes)
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const AxisSet& reduction_axes)
{
}
}
}
}
......
......@@ -37,12 +37,12 @@ namespace ngraph
const Shape& out_shape,
const AxisSet& broadcast_axes);
void emit_concat(codegen::CodeWriter& writer,
const std::string& element_type,
const std::vector<std::string>& args,
const std::string& out,
const std::vector<Shape>& in_shapes,
const Shape& out_shape,
const size_t concatenation_axis);
const std::string& element_type,
const std::vector<std::string>& args,
const std::string& out,
const std::vector<Shape>& in_shapes,
const Shape& out_shape,
const size_t concatenation_axis);
void emit_replace_slice(codegen::CodeWriter& writer,
const std::string& element_type,
......@@ -64,19 +64,19 @@ namespace ngraph
const Coordinate& upper_bounds,
const Strides& strides);
void emit_reshape(codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const AxisVector& arg0_axis_order);
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const AxisVector& arg0_axis_order);
void emit_sum(codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const AxisSet& reduction_axes);
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const AxisSet& reduction_axes);
}
}
}
......
......@@ -17,20 +17,20 @@
#include <algorithm>
#include <cmath>
#include <cublas_v2.h>
#include <cublas_v2.h>
#include <cuda.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_runtime.h>
#include <cudnn_v7.h>
#include <cudnn_v7.h>
#include <iostream>
#include <nvrtc.h>
#include <nvrtc.h>
#include <string>
#include <typeindex>
#include <unordered_map>
#include <vector>
#include <nvrtc.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cudnn_v7.h>
#include "ngraph/node.hpp"
#include "ngraph/ops/broadcast.hpp"
......@@ -48,9 +48,9 @@
#include "ngraph/ops/reverse.hpp"
#include "ngraph/ops/slice.hpp"
#include "ngraph/ops/sum.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
#include "ngraph/runtime/gpu/gpu_emitter.hpp"
#include "ngraph/runtime/gpu/gpu_kernel_emitters.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
#include "ngraph/util.hpp"
using namespace std;
......@@ -80,30 +80,31 @@ using namespace ngraph;
} while (0)
void runtime::gpu::GPU_Emitter::EmitNop(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
}
void runtime::gpu::GPU_Emitter::EmitAbs(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
writer << "{ // " << n->get_name() << "\n";
writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n";
writer << "ngraph::runtime::gpu::cuda::kernel::emit_abs((void*) " << args[0].get_name() << ", (void*) " << out[0].get_name() << ", count);\n";
writer << "ngraph::runtime::gpu::cuda::kernel::emit_abs((void*) " << args[0].get_name()
<< ", (void*) " << out[0].get_name() << ", count);\n";
writer.indent--;
writer << "}\n";
}
void runtime::gpu::GPU_Emitter::EmitAdd(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
writer << "{ // " << n->get_name() << "\n";
writer.indent++;
......@@ -142,9 +143,9 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
}
void runtime::gpu::GPU_Emitter::EmitConcat(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
}
......@@ -205,12 +206,12 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer,
else if ((arg0_shape.size() == 2) && (arg1_shape.size() == 2))
{
// GEMM Call
if(arg0_shape[0] != out[0].get_shape()[0] || // m
arg1_shape[1] != out[0].get_shape()[1] || // n
arg0_shape[1] != arg1_shape[0]) // k
{
throw std::runtime_error("input and output shape is not correct for dot;");
}
if (arg0_shape[0] != out[0].get_shape()[0] || // m
arg1_shape[1] != out[0].get_shape()[1] || // n
arg0_shape[1] != arg1_shape[0]) // k
{
throw std::runtime_error("input and output shape is not correct for dot;");
}
writer << "{ // " << n->get_name() << "\n";
writer.indent++;
writer << "static const float alpha = 1.0;\n";
......@@ -243,66 +244,66 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer,
}
void runtime::gpu::GPU_Emitter::EmitDivide(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitEqual(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitGreater(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitGreaterEq(
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitLess(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitLessEq(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitLog(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitMaximum(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
writer << "{ // " << n->get_name() << "\n";
writer.indent++;
......@@ -341,9 +342,9 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
}
void runtime::gpu::GPU_Emitter::EmitMinimum(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
writer << "{ // " << n->get_name() << "\n";
writer.indent++;
......@@ -382,10 +383,10 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
}
void runtime::gpu::GPU_Emitter::EmitNegative(
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
writer << "{ // " << n->get_name() << "\n";
writer.indent++;
......@@ -424,60 +425,60 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
}
void runtime::gpu::GPU_Emitter::EmitNotEqual(
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitSelect(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitSubtract(
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitBroadcast(
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitConvert(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitConstant(
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
auto reshape = static_cast<const op::Reshape*>(n);
writer << "{ // " << n->get_name() << "\n";
......@@ -505,7 +506,7 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer,
writer << "{ // " << n->get_name() << " 1\n";
writer.indent++;
writer << "runtime::gpu::cuda_memcpyDtD(" << out[0].get_name() << ", " << args[0].get_name()
<< ", " << out[0].get_size() << "," << out[0].get_element_type().size() << ");\n";
<< ", " << out[0].get_size() << "," << out[0].get_element_type().size() << ");\n";
writer.indent--;
writer << "}\n";
}
......@@ -542,50 +543,50 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer,
}
void runtime::gpu::GPU_Emitter::EmitFunctionCall(
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
}
void runtime::gpu::GPU_Emitter::EmitReduce(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitSign(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitSlice(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitSum(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitMultiply(
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
writer << "{ // " << n->get_name() << "\n";
writer.indent++;
......@@ -624,130 +625,130 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
}
void runtime::gpu::GPU_Emitter::EmitExp(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitSin(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitSinh(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitCos(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitCosh(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitTan(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitTanh(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitAsin(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitAcos(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitAtan(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitPower(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitReplaceSlice(
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitOneHot(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitCeiling(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitFloor(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitSqrt(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
writer << "{ // " << n->get_name() << "\n";
writer.indent++;
......@@ -786,52 +787,52 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
}
void runtime::gpu::GPU_Emitter::EmitConvolution(
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitNot(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitMaxPool(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitReverse(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitReduceWindow(
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitSelectAndScatter(
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
......@@ -94,10 +94,10 @@
#include "ngraph/pass/memory_layout.hpp"
#include "ngraph/runtime/gpu/gpu_backend.hpp"
#include "ngraph/runtime/gpu/gpu_call_frame.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
#include "ngraph/runtime/gpu/gpu_emitter.hpp"
#include "ngraph/runtime/gpu/gpu_external_function.hpp"
#include "ngraph/runtime/gpu/gpu_kernel_emitters.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
using namespace std;
using namespace ngraph;
......@@ -249,17 +249,17 @@ void runtime::gpu::GPU_ExternalFunction::compile()
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/memory_layout.hpp"
#include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp"
#include "ngraph/util.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
)";
string pch_header_source = writer.get_code();
// writer += R"(
// using namespace ngraph;
// using namespace std;
//)";
// writer += R"(
// using namespace ngraph;
// using namespace std;
//)";
if (m_emit_timing)
{
......
......@@ -16,27 +16,28 @@
#pragma once
#define NVRTC_SAFE_CALL(x) \
do { \
nvrtcResult result = x; \
if (result != NVRTC_SUCCESS) { \
std::cerr << "\nerror: " #x " failed with error " \
<< nvrtcGetErrorString(result) << '\n'; \
exit(1); \
} \
} while(0)
#define NVRTC_SAFE_CALL(x) \
do \
{ \
nvrtcResult result = x; \
if (result != NVRTC_SUCCESS) \
{ \
throw std::runtime_error("\nerror: " #x " failed with error " + \
std::string(nvrtcGetErrorString(result))); \
} \
} while (0)
#define CUDA_SAFE_CALL(x) \
do { \
CUresult result = x; \
if (result != CUDA_SUCCESS) { \
const char *msg; \
cuGetErrorName(result, &msg); \
std::cerr << "\nerror: " #x " failed with error " \
<< msg << '\n'; \
exit(1); \
} \
} while(0)
#define CUDA_SAFE_CALL(x) \
do \
{ \
CUresult result = x; \
if (result != CUDA_SUCCESS) \
{ \
const char* msg; \
cuGetErrorName(result, &msg); \
throw std::runtime_error("\nerror: " #x " failed with error " + std::string(msg)); \
} \
} while (0)
namespace ngraph
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment