Commit 8eb31b78 authored by fenglei.tian's avatar fenglei.tian

clang format

parent d172456c
...@@ -104,27 +104,27 @@ namespace ngraph ...@@ -104,27 +104,27 @@ namespace ngraph
{ {
namespace gpu namespace gpu
{ {
template <> template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Abs) void GPU_Emitter::EMITTER_DECL(ngraph::op::Abs)
{ {
writer << "{ // " << node->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n"; writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
writer << "ngraph::runtime::gpu::emit_abs((void*) " << args[0].get_name() << ", (void*) " writer << "ngraph::runtime::gpu::emit_abs((void*) " << args[0].get_name()
<< out[0].get_name() << ", count);\n"; << ", (void*) " << out[0].get_name() << ", count);\n";
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
} }
template <> template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Add) void GPU_Emitter::EMITTER_DECL(ngraph::op::Add)
{ {
writer << "{ // " << node->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n"; writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
writer += R"( writer += R"(
float alpha1 = 1.0, alpha2 = 1.0, beta = 0; float alpha1 = 1.0, alpha2 = 1.0, beta = 0;
cudnnTensorDescriptor_t descriptor; cudnnTensorDescriptor_t descriptor;
cudnnCreateTensorDescriptor(&descriptor); cudnnCreateTensorDescriptor(&descriptor);
...@@ -144,148 +144,151 @@ cudnnSetOpTensorDescriptor(opTensorDesc, ...@@ -144,148 +144,151 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
CUDNN_NOT_PROPAGATE_NAN); CUDNN_NOT_PROPAGATE_NAN);
)"; )";
writer << "cudnnOpTensor(cudnn_handle," writer << "cudnnOpTensor(cudnn_handle,"
<< "opTensorDesc," << "opTensorDesc,"
<< "&alpha1," << "&alpha1,"
<< "descriptor," << args[0].get_name() << "," << "descriptor," << args[0].get_name() << ","
<< "&alpha2," << "&alpha2,"
<< "descriptor," << args[1].get_name() << "," << "descriptor," << args[1].get_name() << ","
<< "&beta," << "&beta,"
<< "descriptor," << out[0].get_name() << ");\n"; << "descriptor," << out[0].get_name() << ");\n";
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
} }
template <> template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Dot) void GPU_Emitter::EMITTER_DECL(ngraph::op::Dot)
{ {
const ngraph::op::Dot* dot = static_cast<const ngraph::op::Dot*>(node); const ngraph::op::Dot* dot = static_cast<const ngraph::op::Dot*>(node);
const Shape& arg0_shape = args[0].get_shape(); const Shape& arg0_shape = args[0].get_shape();
const Shape& arg1_shape = args[1].get_shape(); const Shape& arg1_shape = args[1].get_shape();
if (arg0_shape.empty() || arg1_shape.empty()) if (arg0_shape.empty() || arg1_shape.empty())
{ {
auto& first = (arg0_shape.empty() ? args[0] : args[1]); auto& first = (arg0_shape.empty() ? args[0] : args[1]);
auto& second = (arg0_shape.empty() ? args[1] : args[0]); auto& second = (arg0_shape.empty() ? args[1] : args[0]);
writer << "{ // " << node->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "int count = " << second.get_size() << ";\n"; writer << "int count = " << second.get_size() << ";\n";
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
writer << "cublasScopy(" writer << "cublasScopy("
<< "cublas_handle," << "cublas_handle,"
<< "count ," << second.get_name() << "," << "count ," << second.get_name() << ","
<< "1," << out[0].get_name() << ", 1);\n"; << "1," << out[0].get_name() << ", 1);\n";
writer << "cublasSscal(" writer << "cublasSscal("
<< "cublas_handle," << "cublas_handle,"
<< "count ," << first.get_name() << "," << out[0].get_name() << ", 1);\n"; << "count ," << first.get_name() << "," << out[0].get_name()
writer.indent--; << ", 1);\n";
writer << "}\n"; writer.indent--;
return; writer << "}\n";
} return;
}
//return if output size is 0;
if (out[0].get_size() == 0) //return if output size is 0;
{ if (out[0].get_size() == 0)
writer << "{ // " << node->get_name() << "\n"; {
writer.indent++; writer << "{ // " << node->get_name() << "\n";
writer << "return;\n"; writer.indent++;
writer.indent--; writer << "return;\n";
writer << "}\n"; writer.indent--;
return; writer << "}\n";
} return;
}
//set output to 0 if input size is 0
if (args[0].get_size() == 0 || args[1].get_size() == 0) //set output to 0 if input size is 0
{ if (args[0].get_size() == 0 || args[1].get_size() == 0)
writer << "{ // " << node->get_name() << "\n"; {
writer.indent++; writer << "{ // " << node->get_name() << "\n";
writer << "runtime::gpu::cuda_memset(" << out[0].get_name() << ", 0, " << out[0].get_size() writer.indent++;
<< " * sizeof(float));\n"; writer << "runtime::gpu::cuda_memset(" << out[0].get_name() << ", 0, "
writer << "return;\n"; << out[0].get_size() << " * sizeof(float));\n";
writer.indent--; writer << "return;\n";
writer << "}\n"; writer.indent--;
return; writer << "}\n";
} return;
}
if ((arg0_shape.size() == 1) && (arg1_shape.size() == 1))
{ if ((arg0_shape.size() == 1) && (arg1_shape.size() == 1))
writer << "{ // " << node->get_name() << "\n"; {
writer.indent++; writer << "{ // " << node->get_name() << "\n";
writer << "cublasSdot(" writer.indent++;
<< "cublas_handle," << arg0_shape[0] << "," << args[0].get_name() << "," writer << "cublasSdot("
<< "1," << args[1].get_name() << "," << "cublas_handle," << arg0_shape[0] << "," << args[0].get_name() << ","
<< "1," << out[0].get_name() << ");\n"; << "1," << args[1].get_name() << ","
writer.indent--; << "1," << out[0].get_name() << ");\n";
writer << "}\n"; writer.indent--;
} writer << "}\n";
else if ((arg0_shape.size() == 2) && (arg1_shape.size() == 1)) }
{ else if ((arg0_shape.size() == 2) && (arg1_shape.size() == 1))
writer << "{ // " << node->get_name() << "\n"; {
writer.indent++; writer << "{ // " << node->get_name() << "\n";
writer << "const float alpha = 1.0;\n"; writer.indent++;
writer << "const float beta = 0;\n"; writer << "const float alpha = 1.0;\n";
writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_HOST);\n"; writer << "const float beta = 0;\n";
writer << "cublasSgemv(" writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_HOST);\n";
<< "cublas_handle," writer << "cublasSgemv("
<< "CUBLAS_OP_T," << arg0_shape[0] << "," << arg0_shape[1] << "," << "cublas_handle,"
<< "&alpha," // Alpha << "CUBLAS_OP_T," << arg0_shape[0] << "," << arg0_shape[1] << ","
<< args[0].get_name() << "," << arg0_shape[1] << "," << args[1].get_name() << "," << "&alpha," // Alpha
<< "1," << args[0].get_name() << "," << arg0_shape[1] << ","
<< "&beta," // beta << args[1].get_name() << ","
<< out[0].get_name() << "," << "1,"
<< "1);\n"; << "&beta," // beta
writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE);\n"; << out[0].get_name() << ","
writer.indent--; << "1);\n";
writer << "}\n"; writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE);\n";
} writer.indent--;
else if ((arg0_shape.size() == 2) && (arg1_shape.size() == 2)) writer << "}\n";
{ }
// GEMM Call else if ((arg0_shape.size() == 2) && (arg1_shape.size() == 2))
if (arg0_shape[0] != out[0].get_shape()[0] || // m {
arg1_shape[1] != out[0].get_shape()[1] || // n // GEMM Call
arg0_shape[1] != arg1_shape[0]) // k if (arg0_shape[0] != out[0].get_shape()[0] || // m
{ arg1_shape[1] != out[0].get_shape()[1] || // n
throw std::runtime_error("input and output shape is not correct for dot;"); arg0_shape[1] != arg1_shape[0]) // k
} {
writer << "{ // " << node->get_name() << "\n"; throw std::runtime_error("input and output shape is not correct for dot;");
writer.indent++; }
writer << "const float alpha = 1.0;\n"; writer << "{ // " << node->get_name() << "\n";
writer << "const float beta = 0.0;\n"; writer.indent++;
writer << "int m = " << arg0_shape[0] << ";\n"; writer << "const float alpha = 1.0;\n";
writer << "int n = " << arg1_shape[1] << ";\n"; writer << "const float beta = 0.0;\n";
writer << "int k = " << arg0_shape[0] << ";\n"; writer << "int m = " << arg0_shape[0] << ";\n";
writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_HOST);\n"; writer << "int n = " << arg1_shape[1] << ";\n";
writer << "cublasSgemm(" writer << "int k = " << arg0_shape[0] << ";\n";
<< "cublas_handle," writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_HOST);\n";
<< "CUBLAS_OP_N," writer << "cublasSgemm("
<< "CUBLAS_OP_N," << "cublas_handle,"
<< "n," << "CUBLAS_OP_N,"
<< "m," << "CUBLAS_OP_N,"
<< "k," << "n,"
<< "&alpha," // Alpha << "m,"
<< args[1].get_name() << "," << "k,"
<< "n," << args[0].get_name() << "," << "&alpha," // Alpha
<< "k," << args[1].get_name() << ","
<< "&beta," // beta << "n," << args[0].get_name() << ","
<< out[0].get_name() << "," << "k,"
<< "n);\n"; << "&beta," // beta
writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE);\n"; << out[0].get_name() << ","
writer.indent--; << "n);\n";
writer << "}\n"; writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE);\n";
} writer.indent--;
else writer << "}\n";
{ }
throw std::runtime_error(node->get_name() + " with more then 2D is not implemented."); else
} {
} throw std::runtime_error(node->get_name() +
" with more then 2D is not implemented.");
}
}
template <> template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Maximum) void GPU_Emitter::EMITTER_DECL(ngraph::op::Maximum)
{ {
writer << "{ // " << node->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n"; writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
writer += R"( writer += R"(
float alpha1 = 1.0, alpha2 = 1.0, beta = 0; float alpha1 = 1.0, alpha2 = 1.0, beta = 0;
cudnnTensorDescriptor_t descriptor; cudnnTensorDescriptor_t descriptor;
cudnnCreateTensorDescriptor(&descriptor); cudnnCreateTensorDescriptor(&descriptor);
...@@ -305,26 +308,26 @@ cudnnSetOpTensorDescriptor(opTensorDesc, ...@@ -305,26 +308,26 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
CUDNN_NOT_PROPAGATE_NAN); CUDNN_NOT_PROPAGATE_NAN);
)"; )";
writer << "cudnnOpTensor(cudnn_handle," writer << "cudnnOpTensor(cudnn_handle,"
<< "opTensorDesc," << "opTensorDesc,"
<< "&alpha1," << "&alpha1,"
<< "descriptor," << args[0].get_name() << "," << "descriptor," << args[0].get_name() << ","
<< "&alpha2," << "&alpha2,"
<< "descriptor," << args[1].get_name() << "," << "descriptor," << args[1].get_name() << ","
<< "&beta," << "&beta,"
<< "descriptor," << out[0].get_name() << ");\n"; << "descriptor," << out[0].get_name() << ");\n";
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
} }
template <> template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Minimum) void GPU_Emitter::EMITTER_DECL(ngraph::op::Minimum)
{ {
writer << "{ // " << node->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n"; writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
writer += R"( writer += R"(
float alpha1 = 1.0, alpha2 = 1.0, beta = 0; float alpha1 = 1.0, alpha2 = 1.0, beta = 0;
cudnnTensorDescriptor_t descriptor; cudnnTensorDescriptor_t descriptor;
cudnnCreateTensorDescriptor(&descriptor); cudnnCreateTensorDescriptor(&descriptor);
...@@ -344,26 +347,26 @@ cudnnSetOpTensorDescriptor(opTensorDesc, ...@@ -344,26 +347,26 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
CUDNN_NOT_PROPAGATE_NAN); CUDNN_NOT_PROPAGATE_NAN);
)"; )";
writer << "cudnnOpTensor(cudnn_handle," writer << "cudnnOpTensor(cudnn_handle,"
<< "opTensorDesc," << "opTensorDesc,"
<< "&alpha1," << "&alpha1,"
<< "descriptor," << args[0].get_name() << "," << "descriptor," << args[0].get_name() << ","
<< "&alpha2," << "&alpha2,"
<< "descriptor," << args[1].get_name() << "," << "descriptor," << args[1].get_name() << ","
<< "&beta," << "&beta,"
<< "descriptor," << out[0].get_name() << ");\n"; << "descriptor," << out[0].get_name() << ");\n";
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
} }
template <> template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Negative) void GPU_Emitter::EMITTER_DECL(ngraph::op::Negative)
{ {
writer << "{ // " << node->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n"; writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
writer += R"( writer += R"(
float alpha1 = -1.0, alpha2 = 0, beta = 0; float alpha1 = -1.0, alpha2 = 0, beta = 0;
cudnnTensorDescriptor_t descriptor; cudnnTensorDescriptor_t descriptor;
cudnnCreateTensorDescriptor(&descriptor); cudnnCreateTensorDescriptor(&descriptor);
...@@ -383,164 +386,167 @@ cudnnSetOpTensorDescriptor(opTensorDesc, ...@@ -383,164 +386,167 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
CUDNN_NOT_PROPAGATE_NAN); CUDNN_NOT_PROPAGATE_NAN);
)"; )";
writer << "cudnnOpTensor(cudnn_handle," writer << "cudnnOpTensor(cudnn_handle,"
<< "opTensorDesc," << "opTensorDesc,"
<< "&alpha1," << "&alpha1,"
<< "descriptor," << args[0].get_name() << "," << "descriptor," << args[0].get_name() << ","
<< "&alpha2," << "&alpha2,"
<< "descriptor," << args[0].get_name() << "," << "descriptor," << args[0].get_name() << ","
<< "&beta," << "&beta,"
<< "descriptor," << out[0].get_name() << ");\n"; << "descriptor," << out[0].get_name() << ");\n";
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
} }
template <> template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Broadcast) void GPU_Emitter::EMITTER_DECL(ngraph::op::Broadcast)
{
auto broadcast = static_cast<const ngraph::op::Broadcast*>(node);
auto arg_shape = args[0].get_shape();
auto result_shape = out[0].get_shape();
auto& axes = broadcast->get_broadcast_axes();
//broadcast axes is empty, do a copy
if (axes.empty())
{
writer << "{ // " << node->get_name() << " \n";
writer.indent++;
writer << "runtime::gpu::cuda_memcpyDtD(" << out[0].get_name() << ", " << args[0].get_name()
<< ", " << out[0].get_size() << " * " << out[0].get_element_type().size() << ");\n";
writer.indent--;
writer << "}\n";
return;
}
//broadcast axes size is 1, or can be group to 1 (consecutive axes, like 01 or 12 or 123 etc)
vector<int> axes_v;
std::copy(axes.begin(), axes.end(), std::back_inserter(axes_v));
std::sort(axes_v.begin(), axes_v.end());
bool is_one_axes = true;
if (axes.size() != 1)
{
for (int i = 1; i < axes_v.size(); i++)
{
if (axes_v[i] != axes_v[i - 1] + 1)
{ {
is_one_axes = false; auto broadcast = static_cast<const ngraph::op::Broadcast*>(node);
break; auto arg_shape = args[0].get_shape();
auto result_shape = out[0].get_shape();
auto& axes = broadcast->get_broadcast_axes();
//broadcast axes is empty, do a copy
if (axes.empty())
{
writer << "{ // " << node->get_name() << " \n";
writer.indent++;
writer << "runtime::gpu::cuda_memcpyDtD(" << out[0].get_name() << ", "
<< args[0].get_name() << ", " << out[0].get_size() << " * "
<< out[0].get_element_type().size() << ");\n";
writer.indent--;
writer << "}\n";
return;
}
//broadcast axes size is 1, or can be group to 1 (consecutive axes, like 01 or 12 or 123 etc)
vector<int> axes_v;
std::copy(axes.begin(), axes.end(), std::back_inserter(axes_v));
std::sort(axes_v.begin(), axes_v.end());
bool is_one_axes = true;
if (axes.size() != 1)
{
for (int i = 1; i < axes_v.size(); i++)
{
if (axes_v[i] != axes_v[i - 1] + 1)
{
is_one_axes = false;
break;
}
}
}
if (is_one_axes)
{
int repeat_times = 1;
for (int i = 0; i < axes_v.size(); i++)
{
repeat_times *= result_shape[axes_v[i]];
}
int repeat_size = 1;
for (int i = *axes_v.rbegin() + 1; i < result_shape.size(); i++)
{
repeat_size *= result_shape[i];
}
writer << "{ // " << node->get_name() << " \n";
writer.indent++;
writer << "runtime::gpu::emit_broadcast(" << args[0].get_name() << ", "
<< out[0].get_name() << ", " << repeat_size << ", " << repeat_times
<< ", " << out[0].get_size() << ");\n";
writer.indent--;
writer << "}\n";
}
else
{
throw std::runtime_error(node->get_name() + " is not implemented.");
}
} }
}
}
if (is_one_axes)
{
int repeat_times = 1;
for (int i = 0; i < axes_v.size(); i++)
{
repeat_times *= result_shape[axes_v[i]];
}
int repeat_size = 1;
for (int i = *axes_v.rbegin() + 1; i < result_shape.size(); i++)
{
repeat_size *= result_shape[i];
}
writer << "{ // " << node->get_name() << " \n";
writer.indent++;
writer << "runtime::gpu::emit_broadcast(" << args[0].get_name() << ", " << out[0].get_name()
<< ", " << repeat_size << ", " << repeat_times << ", " << out[0].get_size()
<< ");\n";
writer.indent--;
writer << "}\n";
}
else
{
throw std::runtime_error(node->get_name() + " is not implemented.");
}
}
template <> template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Constant) void GPU_Emitter::EMITTER_DECL(ngraph::op::Constant)
{ {
} }
template <> template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Reshape) void GPU_Emitter::EMITTER_DECL(ngraph::op::Reshape)
{ {
auto reshape = static_cast<const op::Reshape*>(node); auto reshape = static_cast<const op::Reshape*>(node);
writer << "{ // " << node->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
auto arg_shape = args[0].get_shape(); auto arg_shape = args[0].get_shape();
auto arg_rank = arg_shape.size(); auto arg_rank = arg_shape.size();
auto result_shape = out[0].get_shape(); auto result_shape = out[0].get_shape();
auto& result_element_type = out[0].get_element_type(); auto& result_element_type = out[0].get_element_type();
auto input_order = reshape->get_input_order(); auto input_order = reshape->get_input_order();
bool same_layout = is_sorted(input_order.begin(), input_order.end()); bool same_layout = is_sorted(input_order.begin(), input_order.end());
size_t result_shape_product = 1; size_t result_shape_product = 1;
for (auto i : result_shape) for (auto i : result_shape)
{ {
result_shape_product *= i; result_shape_product *= i;
} }
// If there is no layout change or we are just going from 1^n to 1^m or a zero-size tensor, // If there is no layout change or we are just going from 1^n to 1^m or a zero-size tensor,
// we can just copy. // we can just copy.
if (same_layout || result_shape_product < 2) if (same_layout || result_shape_product < 2)
{ {
writer << "{ // " << node->get_name() << " 1\n"; writer << "{ // " << node->get_name() << " 1\n";
writer.indent++; writer.indent++;
writer << "runtime::gpu::cuda_memcpyDtD(" << out[0].get_name() << ", " << args[0].get_name() writer << "runtime::gpu::cuda_memcpyDtD(" << out[0].get_name() << ", "
<< ", " << out[0].get_size() << " * " << out[0].get_element_type().size() << ");\n"; << args[0].get_name() << ", " << out[0].get_size() << " * "
writer.indent--; << out[0].get_element_type().size() << ");\n";
writer << "}\n"; writer.indent--;
} writer << "}\n";
// If there *is* a layout change in the 2D case, we transpose the input. }
else if (arg_rank == 2) // If there *is* a layout change in the 2D case, we transpose the input.
{ else if (arg_rank == 2)
// TODO Assert arg0_shape[0] == arg1_shape[0]? {
writer << "{ // " << node->get_name() << "\n"; // TODO Assert arg0_shape[0] == arg1_shape[0]?
writer.indent++; writer << "{ // " << node->get_name() << "\n";
writer << "const float alpha = 1.0;\n"; writer.indent++;
writer << "const float beta = 0;\n"; writer << "const float alpha = 1.0;\n";
writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_HOST);\n"; writer << "const float beta = 0;\n";
writer << "cublasSgeam(" writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_HOST);\n";
<< "cublas_handle," writer << "cublasSgeam("
<< "CUBLAS_OP_T," << "cublas_handle,"
<< "CUBLAS_OP_T," << arg_shape[0] << "," << arg_shape[1] << "," << "CUBLAS_OP_T,"
<< "&alpha," // Alpha << "CUBLAS_OP_T," << arg_shape[0] << "," << arg_shape[1] << ","
<< args[0].get_name() << "," << arg_shape[1] << "," << "&alpha," // Alpha
<< "&beta," // beta << args[0].get_name() << "," << arg_shape[1] << ","
<< args[0].get_name() << "," << arg_shape[1] << "," << out[0].get_name() << "," << "&beta," // beta
<< result_shape[1] << ");\n"; << args[0].get_name() << "," << arg_shape[1] << "," << out[0].get_name()
writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE);\n"; << "," << result_shape[1] << ");\n";
writer.indent--; writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE);\n";
writer << "}\n"; writer.indent--;
} writer << "}\n";
// Other cases (reordering of axes for tensors with rank>2) are not handled yet. }
else // Other cases (reordering of axes for tensors with rank>2) are not handled yet.
{ else
throw runtime_error( {
"Axis permutation in reshape is not implemented yet for tensors with rank>2"); throw runtime_error(
} "Axis permutation in reshape is not implemented yet for tensors with "
writer.indent--; "rank>2");
writer << "}\n"; }
} writer.indent--;
writer << "}\n";
}
template <> template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::FunctionCall) void GPU_Emitter::EMITTER_DECL(ngraph::op::FunctionCall)
{ {
} }
template <> template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Multiply) void GPU_Emitter::EMITTER_DECL(ngraph::op::Multiply)
{ {
writer << "{ // " << node->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n"; writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
writer += R"( writer += R"(
float alpha1 = 1.0, alpha2 = 1.0, beta = 0; float alpha1 = 1.0, alpha2 = 1.0, beta = 0;
cudnnTensorDescriptor_t descriptor; cudnnTensorDescriptor_t descriptor;
cudnnCreateTensorDescriptor(&descriptor); cudnnCreateTensorDescriptor(&descriptor);
...@@ -560,26 +566,26 @@ cudnnSetOpTensorDescriptor(opTensorDesc, ...@@ -560,26 +566,26 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
CUDNN_NOT_PROPAGATE_NAN); CUDNN_NOT_PROPAGATE_NAN);
)"; )";
writer << "cudnnOpTensor(cudnn_handle," writer << "cudnnOpTensor(cudnn_handle,"
<< "opTensorDesc," << "opTensorDesc,"
<< "&alpha1," << "&alpha1,"
<< "descriptor," << args[0].get_name() << "," << "descriptor," << args[0].get_name() << ","
<< "&alpha2," << "&alpha2,"
<< "descriptor," << args[1].get_name() << "," << "descriptor," << args[1].get_name() << ","
<< "&beta," << "&beta,"
<< "descriptor," << out[0].get_name() << ");\n"; << "descriptor," << out[0].get_name() << ");\n";
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
} }
template <> template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Sqrt) void GPU_Emitter::EMITTER_DECL(ngraph::op::Sqrt)
{ {
writer << "{ // " << node->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n"; writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
writer += R"( writer += R"(
float alpha1 = 1.0, alpha2 = 0, beta = 0; float alpha1 = 1.0, alpha2 = 0, beta = 0;
cudnnTensorDescriptor_t descriptor; cudnnTensorDescriptor_t descriptor;
cudnnCreateTensorDescriptor(&descriptor); cudnnCreateTensorDescriptor(&descriptor);
...@@ -599,17 +605,17 @@ cudnnSetOpTensorDescriptor(opTensorDesc, ...@@ -599,17 +605,17 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
CUDNN_NOT_PROPAGATE_NAN); CUDNN_NOT_PROPAGATE_NAN);
)"; )";
writer << "cudnnOpTensor(cudnn_handle," writer << "cudnnOpTensor(cudnn_handle,"
<< "opTensorDesc," << "opTensorDesc,"
<< "&alpha1," << "&alpha1,"
<< "descriptor," << args[0].get_name() << "," << "descriptor," << args[0].get_name() << ","
<< "&alpha2," << "&alpha2,"
<< "descriptor," << args[0].get_name() << "," << "descriptor," << args[0].get_name() << ","
<< "&beta," << "&beta,"
<< "descriptor," << out[0].get_name() << ");\n"; << "descriptor," << out[0].get_name() << ");\n";
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
} }
} }
} }
} }
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
emit<op_name>(GPU_ExternalFunction * external_function, \ emit<op_name>(GPU_ExternalFunction * external_function, \
codegen::CodeWriter & writer, \ codegen::CodeWriter & writer, \
const ngraph::Node* node, \ const ngraph::Node* node, \
const std::vector<GPU_TensorViewWrapper>& args, \ const std::vector<GPU_TensorViewWrapper>& args, \
const std::vector<GPU_TensorViewWrapper>& out) const std::vector<GPU_TensorViewWrapper>& out)
namespace ngraph namespace ngraph
{ {
......
...@@ -169,7 +169,8 @@ static const runtime::gpu::OpMap dispatcher{ ...@@ -169,7 +169,8 @@ static const runtime::gpu::OpMap dispatcher{
{TI(ngraph::op::Concat), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Concat>}, {TI(ngraph::op::Concat), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Concat>},
{TI(ngraph::op::Divide), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Divide>}, {TI(ngraph::op::Divide), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Divide>},
{TI(ngraph::op::Equal), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Equal>}, {TI(ngraph::op::Equal), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Equal>},
{TI(ngraph::op::GetOutputElement), &runtime::gpu::GPU_Emitter::emit<ngraph::op::GetOutputElement>}, {TI(ngraph::op::GetOutputElement),
&runtime::gpu::GPU_Emitter::emit<ngraph::op::GetOutputElement>},
{TI(ngraph::op::Greater), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Greater>}, {TI(ngraph::op::Greater), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Greater>},
{TI(ngraph::op::GreaterEq), &runtime::gpu::GPU_Emitter::emit<ngraph::op::GreaterEq>}, {TI(ngraph::op::GreaterEq), &runtime::gpu::GPU_Emitter::emit<ngraph::op::GreaterEq>},
{TI(ngraph::op::Less), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Less>}, {TI(ngraph::op::Less), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Less>},
...@@ -216,13 +217,17 @@ static const runtime::gpu::OpMap dispatcher{ ...@@ -216,13 +217,17 @@ static const runtime::gpu::OpMap dispatcher{
{TI(ngraph::op::Reverse), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Reverse>}, {TI(ngraph::op::Reverse), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Reverse>},
{TI(ngraph::op::Result), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Result>}, {TI(ngraph::op::Result), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Result>},
{TI(ngraph::op::ReduceWindow), &runtime::gpu::GPU_Emitter::emit<ngraph::op::ReduceWindow>}, {TI(ngraph::op::ReduceWindow), &runtime::gpu::GPU_Emitter::emit<ngraph::op::ReduceWindow>},
{TI(ngraph::op::SelectAndScatter), &runtime::gpu::GPU_Emitter::emit<ngraph::op::SelectAndScatter>}, {TI(ngraph::op::SelectAndScatter),
&runtime::gpu::GPU_Emitter::emit<ngraph::op::SelectAndScatter>},
{TI(ngraph::op::AvgPool), &runtime::gpu::GPU_Emitter::emit<ngraph::op::AvgPool>}, {TI(ngraph::op::AvgPool), &runtime::gpu::GPU_Emitter::emit<ngraph::op::AvgPool>},
{TI(ngraph::op::AvgPoolBackprop), &runtime::gpu::GPU_Emitter::emit<ngraph::op::AvgPoolBackprop>}, {TI(ngraph::op::AvgPoolBackprop),
&runtime::gpu::GPU_Emitter::emit<ngraph::op::AvgPoolBackprop>},
{TI(ngraph::op::Pad), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Pad>}, {TI(ngraph::op::Pad), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Pad>},
{TI(ngraph::op::BatchNorm), &runtime::gpu::GPU_Emitter::emit<ngraph::op::BatchNorm>}, {TI(ngraph::op::BatchNorm), &runtime::gpu::GPU_Emitter::emit<ngraph::op::BatchNorm>},
{TI(ngraph::op::BatchNormBackprop), &runtime::gpu::GPU_Emitter::emit<ngraph::op::BatchNormBackprop>}, {TI(ngraph::op::BatchNormBackprop),
{TI(ngraph::op::MaxPoolBackprop), &runtime::gpu::GPU_Emitter::emit<ngraph::op::MaxPoolBackprop>}, &runtime::gpu::GPU_Emitter::emit<ngraph::op::BatchNormBackprop>},
{TI(ngraph::op::MaxPoolBackprop),
&runtime::gpu::GPU_Emitter::emit<ngraph::op::MaxPoolBackprop>},
{TI(ngraph::op::Product), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Product>}, {TI(ngraph::op::Product), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Product>},
{TI(ngraph::op::Max), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Max>}, {TI(ngraph::op::Max), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Max>},
{TI(ngraph::op::Min), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Min>}, {TI(ngraph::op::Min), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Min>},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment