Commit 8db26428 authored by Fenglei Tian's avatar Fenglei Tian

update gpu_emitter use template

parent ad58cb29
...@@ -28,21 +28,69 @@ ...@@ -28,21 +28,69 @@
#include <vector> #include <vector>
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/ops/abs.hpp"
#include "ngraph/ops/acos.hpp"
#include "ngraph/ops/add.hpp"
#include "ngraph/ops/allreduce.hpp"
#include "ngraph/ops/asin.hpp"
#include "ngraph/ops/atan.hpp"
#include "ngraph/ops/avg_pool.hpp"
#include "ngraph/ops/batch_norm.hpp"
#include "ngraph/ops/broadcast.hpp" #include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/ceiling.hpp"
#include "ngraph/ops/concat.hpp" #include "ngraph/ops/concat.hpp"
#include "ngraph/ops/constant.hpp" #include "ngraph/ops/constant.hpp"
#include "ngraph/ops/convert.hpp"
#include "ngraph/ops/convolution.hpp" #include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/cos.hpp"
#include "ngraph/ops/cosh.hpp"
#include "ngraph/ops/divide.hpp"
#include "ngraph/ops/dot.hpp" #include "ngraph/ops/dot.hpp"
#include "ngraph/ops/equal.hpp"
#include "ngraph/ops/exp.hpp"
#include "ngraph/ops/floor.hpp"
#include "ngraph/ops/function_call.hpp" #include "ngraph/ops/function_call.hpp"
#include "ngraph/ops/get_output_element.hpp" #include "ngraph/ops/get_output_element.hpp"
#include "ngraph/ops/greater.hpp"
#include "ngraph/ops/greater_eq.hpp"
#include "ngraph/ops/less.hpp"
#include "ngraph/ops/less_eq.hpp"
#include "ngraph/ops/log.hpp"
#include "ngraph/ops/max.hpp"
#include "ngraph/ops/max_pool.hpp" #include "ngraph/ops/max_pool.hpp"
#include "ngraph/ops/maximum.hpp"
#include "ngraph/ops/min.hpp"
#include "ngraph/ops/minimum.hpp"
#include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/negative.hpp"
#include "ngraph/ops/not.hpp"
#include "ngraph/ops/not_equal.hpp"
#include "ngraph/ops/one_hot.hpp" #include "ngraph/ops/one_hot.hpp"
#include "ngraph/ops/op.hpp"
#include "ngraph/ops/pad.hpp"
#include "ngraph/ops/parameter.hpp"
#include "ngraph/ops/power.hpp"
#include "ngraph/ops/product.hpp"
#include "ngraph/ops/reduce.hpp" #include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/reduce_window.hpp"
#include "ngraph/ops/relu.hpp"
#include "ngraph/ops/remainder.hpp"
#include "ngraph/ops/replace_slice.hpp" #include "ngraph/ops/replace_slice.hpp"
#include "ngraph/ops/reshape.hpp" #include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/result.hpp"
#include "ngraph/ops/reverse.hpp" #include "ngraph/ops/reverse.hpp"
#include "ngraph/ops/select.hpp"
#include "ngraph/ops/select_and_scatter.hpp"
#include "ngraph/ops/sign.hpp"
#include "ngraph/ops/sin.hpp"
#include "ngraph/ops/sinh.hpp"
#include "ngraph/ops/slice.hpp" #include "ngraph/ops/slice.hpp"
#include "ngraph/ops/softmax.hpp"
#include "ngraph/ops/sqrt.hpp"
#include "ngraph/ops/subtract.hpp"
#include "ngraph/ops/sum.hpp" #include "ngraph/ops/sum.hpp"
#include "ngraph/ops/tan.hpp"
#include "ngraph/ops/tanh.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp" #include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
#include "ngraph/runtime/gpu/gpu_emitter.hpp" #include "ngraph/runtime/gpu/gpu_emitter.hpp"
#include "ngraph/runtime/gpu/gpu_kernel_emitters.hpp" #include "ngraph/runtime/gpu/gpu_kernel_emitters.hpp"
...@@ -50,20 +98,16 @@ ...@@ -50,20 +98,16 @@
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
namespace ngraph
void runtime::gpu::GPU_Emitter::EmitNop(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{ {
} namespace runtime
{
void runtime::gpu::GPU_Emitter::EmitAbs(codegen::CodeWriter& writer, namespace gpu
const ngraph::Node* n, {
const vector<runtime::gpu::GPU_TensorViewWrapper>& args, template <>
const vector<runtime::gpu::GPU_TensorViewWrapper>& out) void CPU_Emitter::EMITTER_DECL(ngraph::op::Abs)
{ {
writer << "{ // " << n->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n"; writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
...@@ -73,12 +117,10 @@ void runtime::gpu::GPU_Emitter::EmitAbs(codegen::CodeWriter& writer, ...@@ -73,12 +117,10 @@ void runtime::gpu::GPU_Emitter::EmitAbs(codegen::CodeWriter& writer,
writer << "}\n"; writer << "}\n";
} }
void runtime::gpu::GPU_Emitter::EmitAdd(codegen::CodeWriter& writer, template <>
const ngraph::Node* n, void CPU_Emitter::EMITTER_DECL(ngraph::op::Add)
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{ {
writer << "{ // " << n->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n"; writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
...@@ -114,17 +156,8 @@ cudnnSetOpTensorDescriptor(opTensorDesc, ...@@ -114,17 +156,8 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
writer << "}\n"; writer << "}\n";
} }
void runtime::gpu::GPU_Emitter::EmitConcat(codegen::CodeWriter& writer, template <>
const ngraph::Node* n, void CPU_Emitter::EMITTER_DECL(ngraph::op::Dot)
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
}
void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{ {
const ngraph::op::Dot* dot = static_cast<const ngraph::op::Dot*>(n); const ngraph::op::Dot* dot = static_cast<const ngraph::op::Dot*>(n);
const Shape& arg0_shape = args[0].get_shape(); const Shape& arg0_shape = args[0].get_shape();
...@@ -133,7 +166,7 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer, ...@@ -133,7 +166,7 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer,
{ {
auto& first = (arg0_shape.empty() ? args[0] : args[1]); auto& first = (arg0_shape.empty() ? args[0] : args[1]);
auto& second = (arg0_shape.empty() ? args[1] : args[0]); auto& second = (arg0_shape.empty() ? args[1] : args[0]);
writer << "{ // " << n->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "int count = " << second.get_size() << ";\n"; writer << "int count = " << second.get_size() << ";\n";
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
...@@ -152,7 +185,7 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer, ...@@ -152,7 +185,7 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer,
//return if output size is 0; //return if output size is 0;
if (out[0].get_size() == 0) if (out[0].get_size() == 0)
{ {
writer << "{ // " << n->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "return;\n"; writer << "return;\n";
writer.indent--; writer.indent--;
...@@ -163,7 +196,7 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer, ...@@ -163,7 +196,7 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer,
//set output to 0 if input size is 0 //set output to 0 if input size is 0
if (args[0].get_size() == 0 || args[1].get_size() == 0) if (args[0].get_size() == 0 || args[1].get_size() == 0)
{ {
writer << "{ // " << n->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "runtime::gpu::cuda_memset(" << out[0].get_name() << ", 0, " << out[0].get_size() writer << "runtime::gpu::cuda_memset(" << out[0].get_name() << ", 0, " << out[0].get_size()
<< " * sizeof(float));\n"; << " * sizeof(float));\n";
...@@ -175,7 +208,7 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer, ...@@ -175,7 +208,7 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer,
if ((arg0_shape.size() == 1) && (arg1_shape.size() == 1)) if ((arg0_shape.size() == 1) && (arg1_shape.size() == 1))
{ {
writer << "{ // " << n->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "cublasSdot(" writer << "cublasSdot("
<< "cublas_handle," << arg0_shape[0] << "," << args[0].get_name() << "," << "cublas_handle," << arg0_shape[0] << "," << args[0].get_name() << ","
...@@ -186,7 +219,7 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer, ...@@ -186,7 +219,7 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer,
} }
else if ((arg0_shape.size() == 2) && (arg1_shape.size() == 1)) else if ((arg0_shape.size() == 2) && (arg1_shape.size() == 1))
{ {
writer << "{ // " << n->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "const float alpha = 1.0;\n"; writer << "const float alpha = 1.0;\n";
writer << "const float beta = 0;\n"; writer << "const float beta = 0;\n";
...@@ -213,7 +246,7 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer, ...@@ -213,7 +246,7 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer,
{ {
throw std::runtime_error("input and output shape is not correct for dot;"); throw std::runtime_error("input and output shape is not correct for dot;");
} }
writer << "{ // " << n->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "const float alpha = 1.0;\n"; writer << "const float alpha = 1.0;\n";
writer << "const float beta = 0.0;\n"; writer << "const float beta = 0.0;\n";
...@@ -241,73 +274,14 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer, ...@@ -241,73 +274,14 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer,
} }
else else
{ {
throw std::runtime_error(n->get_name() + " with more then 2D is not implemented."); throw std::runtime_error(node->get_name() + " with more then 2D is not implemented.");
} }
} }
void runtime::gpu::GPU_Emitter::EmitDivide(codegen::CodeWriter& writer, template <>
const ngraph::Node* n, void CPU_Emitter::EMITTER_DECL(ngraph::op::Maximum)
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitEqual(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitGreater(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitGreaterEq(
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitLess(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitLessEq(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitLog(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitMaximum(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{ {
writer << "{ // " << n->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n"; writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
...@@ -343,12 +317,10 @@ cudnnSetOpTensorDescriptor(opTensorDesc, ...@@ -343,12 +317,10 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
writer << "}\n"; writer << "}\n";
} }
void runtime::gpu::GPU_Emitter::EmitMinimum(codegen::CodeWriter& writer, template <>
const ngraph::Node* n, void CPU_Emitter::EMITTER_DECL(ngraph::op::Minimum)
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{ {
writer << "{ // " << n->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n"; writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
...@@ -384,13 +356,10 @@ cudnnSetOpTensorDescriptor(opTensorDesc, ...@@ -384,13 +356,10 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
writer << "}\n"; writer << "}\n";
} }
void runtime::gpu::GPU_Emitter::EmitNegative( template <>
codegen::CodeWriter& writer, void CPU_Emitter::EMITTER_DECL(ngraph::op::Negative)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{ {
writer << "{ // " << n->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n"; writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
...@@ -426,36 +395,8 @@ cudnnSetOpTensorDescriptor(opTensorDesc, ...@@ -426,36 +395,8 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
writer << "}\n"; writer << "}\n";
} }
void runtime::gpu::GPU_Emitter::EmitNotEqual( template <>
codegen::CodeWriter& writer, void CPU_Emitter::EMITTER_DECL(ngraph::op::Broadcast)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitSelect(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitSubtract(
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitBroadcast(
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{ {
auto broadcast = static_cast<const ngraph::op::Broadcast*>(n); auto broadcast = static_cast<const ngraph::op::Broadcast*>(n);
auto arg_shape = args[0].get_shape(); auto arg_shape = args[0].get_shape();
...@@ -465,7 +406,7 @@ void runtime::gpu::GPU_Emitter::EmitBroadcast( ...@@ -465,7 +406,7 @@ void runtime::gpu::GPU_Emitter::EmitBroadcast(
//broadcast axes is empty, do a copy //broadcast axes is empty, do a copy
if (axes.empty()) if (axes.empty())
{ {
writer << "{ // " << n->get_name() << " \n"; writer << "{ // " << node->get_name() << " \n";
writer.indent++; writer.indent++;
writer << "runtime::gpu::cuda_memcpyDtD(" << out[0].get_name() << ", " << args[0].get_name() writer << "runtime::gpu::cuda_memcpyDtD(" << out[0].get_name() << ", " << args[0].get_name()
<< ", " << out[0].get_size() << " * " << out[0].get_element_type().size() << ");\n"; << ", " << out[0].get_size() << " * " << out[0].get_element_type().size() << ");\n";
...@@ -504,7 +445,7 @@ void runtime::gpu::GPU_Emitter::EmitBroadcast( ...@@ -504,7 +445,7 @@ void runtime::gpu::GPU_Emitter::EmitBroadcast(
repeat_size *= result_shape[i]; repeat_size *= result_shape[i];
} }
writer << "{ // " << n->get_name() << " \n"; writer << "{ // " << node->get_name() << " \n";
writer.indent++; writer.indent++;
writer << "runtime::gpu::emit_broadcast(" << args[0].get_name() << ", " << out[0].get_name() writer << "runtime::gpu::emit_broadcast(" << args[0].get_name() << ", " << out[0].get_name()
<< ", " << repeat_size << ", " << repeat_times << ", " << out[0].get_size() << ", " << repeat_size << ", " << repeat_times << ", " << out[0].get_size()
...@@ -514,33 +455,20 @@ void runtime::gpu::GPU_Emitter::EmitBroadcast( ...@@ -514,33 +455,20 @@ void runtime::gpu::GPU_Emitter::EmitBroadcast(
} }
else else
{ {
throw std::runtime_error(n->get_name() + " is not implemented."); throw std::runtime_error(node->get_name() + " is not implemented.");
} }
} }
void runtime::gpu::GPU_Emitter::EmitConvert(codegen::CodeWriter& writer, template <>
const ngraph::Node* n, void CPU_Emitter::EMITTER_DECL(ngraph::op::Constant)
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{ {
throw std::runtime_error(n->get_name() + " is not implemented.");
} }
void runtime::gpu::GPU_Emitter::EmitConstant( template <>
codegen::CodeWriter& writer, void CPU_Emitter::EMITTER_DECL(ngraph::op::Reshape)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
}
void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{ {
auto reshape = static_cast<const op::Reshape*>(n); auto reshape = static_cast<const op::Reshape*>(n);
writer << "{ // " << n->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
auto arg_shape = args[0].get_shape(); auto arg_shape = args[0].get_shape();
auto arg_rank = arg_shape.size(); auto arg_rank = arg_shape.size();
...@@ -561,7 +489,7 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer, ...@@ -561,7 +489,7 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer,
// we can just copy. // we can just copy.
if (same_layout || result_shape_product < 2) if (same_layout || result_shape_product < 2)
{ {
writer << "{ // " << n->get_name() << " 1\n"; writer << "{ // " << node->get_name() << " 1\n";
writer.indent++; writer.indent++;
writer << "runtime::gpu::cuda_memcpyDtD(" << out[0].get_name() << ", " << args[0].get_name() writer << "runtime::gpu::cuda_memcpyDtD(" << out[0].get_name() << ", " << args[0].get_name()
<< ", " << out[0].get_size() << " * " << out[0].get_element_type().size() << ");\n"; << ", " << out[0].get_size() << " * " << out[0].get_element_type().size() << ");\n";
...@@ -572,7 +500,7 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer, ...@@ -572,7 +500,7 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer,
else if (arg_rank == 2) else if (arg_rank == 2)
{ {
// TODO Assert arg0_shape[0] == arg1_shape[0]? // TODO Assert arg0_shape[0] == arg1_shape[0]?
writer << "{ // " << n->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "const float alpha = 1.0;\n"; writer << "const float alpha = 1.0;\n";
writer << "const float beta = 0;\n"; writer << "const float beta = 0;\n";
...@@ -600,53 +528,15 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer, ...@@ -600,53 +528,15 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer,
writer << "}\n"; writer << "}\n";
} }
void runtime::gpu::GPU_Emitter::EmitFunctionCall( template <>
codegen::CodeWriter& writer, void CPU_Emitter::EMITTER_DECL(ngraph::op::FunctionCall)
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{ {
} }
void runtime::gpu::GPU_Emitter::EmitReduce(codegen::CodeWriter& writer, template <>
const ngraph::Node* n, void CPU_Emitter::EMITTER_DECL(ngraph::op::Multiply)
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{ {
throw std::runtime_error(n->get_name() + " is not implemented."); writer << "{ // " << node->get_name() << "\n";
}
void runtime::gpu::GPU_Emitter::EmitSign(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitSlice(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitSum(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitMultiply(
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
writer << "{ // " << n->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n"; writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
...@@ -682,133 +572,10 @@ cudnnSetOpTensorDescriptor(opTensorDesc, ...@@ -682,133 +572,10 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
writer << "}\n"; writer << "}\n";
} }
void runtime::gpu::GPU_Emitter::EmitExp(codegen::CodeWriter& writer, template <>
const ngraph::Node* n, void CPU_Emitter::EMITTER_DECL(ngraph::op::Sqrt)
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitSin(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitSinh(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{ {
throw std::runtime_error(n->get_name() + " is not implemented."); writer << "{ // " << node->get_name() << "\n";
}
void runtime::gpu::GPU_Emitter::EmitCos(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitCosh(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitTan(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitTanh(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitAsin(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitAcos(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitAtan(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitPower(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitReplaceSlice(
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitOneHot(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitCeiling(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitFloor(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitSqrt(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
writer << "{ // " << n->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n"; writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
...@@ -843,54 +610,6 @@ cudnnSetOpTensorDescriptor(opTensorDesc, ...@@ -843,54 +610,6 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
} }
}
void runtime::gpu::GPU_Emitter::EmitConvolution( }
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitNot(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitMaxPool(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitReverse(codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitReduceWindow(
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
}
void runtime::gpu::GPU_Emitter::EmitSelectAndScatter(
codegen::CodeWriter& writer,
const ngraph::Node* n,
const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
{
throw std::runtime_error(n->get_name() + " is not implemented.");
} }
...@@ -24,12 +24,12 @@ ...@@ -24,12 +24,12 @@
#include "ngraph/runtime/gpu/gpu_external_function.hpp" #include "ngraph/runtime/gpu/gpu_external_function.hpp"
#include "ngraph/runtime/gpu/gpu_tensor_view_wrapper.hpp" #include "ngraph/runtime/gpu/gpu_tensor_view_wrapper.hpp"
#define EMITTER_DECL(E) \ #define EMITTER_DECL(op_name) \
E(codegen::CodeWriter& writer, \ emit<op_name>(GPU_ExternalFunction * external_function, \
const ngraph::Node* n, \ codegen::CodeWriter & writer, \
const std::vector<ngraph::runtime::gpu::GPU_TensorViewWrapper>& args, \ const ngraph::Node* node, \
const std::vector<ngraph::runtime::gpu::GPU_TensorViewWrapper>& out) const std::vector<GPU_TensorViewWrapper>& args, \
const std::vector<GPU_TensorViewWrapper>& out)
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
...@@ -39,59 +39,31 @@ namespace ngraph ...@@ -39,59 +39,31 @@ namespace ngraph
class GPU_Emitter class GPU_Emitter
{ {
public: public:
static void EMITTER_DECL(EmitNop); template <typename OP>
static void EMITTER_DECL(EmitAdd); static void emit(GPU_ExternalFunction* external_function,
static void EMITTER_DECL(EmitDot); codegen::CodeWriter& writer,
static void EMITTER_DECL(EmitMultiply); const ngraph::Node* node,
static void EMITTER_DECL(EmitGetOutputElement); const std::vector<GPU_TensorViewWrapper>& args,
static void EMITTER_DECL(EmitXLAGetTupleElement); const std::vector<GPU_TensorViewWrapper>& out)
static void EMITTER_DECL(EmitTuple); {
static void EMITTER_DECL(EmitAbs); throw std::runtime_error("Unimplemented op in GPU emitter");
static void EMITTER_DECL(EmitConcat); }
static void EMITTER_DECL(EmitDivide);
static void EMITTER_DECL(EmitEqual); static void nop(GPU_ExternalFunction* external_function,
static void EMITTER_DECL(EmitGreater); codegen::CodeWriter& writer,
static void EMITTER_DECL(EmitGreaterEq); const ngraph::Node* node,
static void EMITTER_DECL(EmitLess); const std::vector<GPU_TensorViewWrapper>& args,
static void EMITTER_DECL(EmitLessEq); const std::vector<GPU_TensorViewWrapper>& out)
static void EMITTER_DECL(EmitLog); {
static void EMITTER_DECL(EmitMaximum); }
static void EMITTER_DECL(EmitMinimum);
static void EMITTER_DECL(EmitNegative); private:
static void EMITTER_DECL(EmitNotEqual); static std::string emit_vector(const GPU_TensorViewWrapper&,
static void EMITTER_DECL(EmitSelect); const std::string& name = "");
static void EMITTER_DECL(EmitSubtract); static std::string emit_array1d(const GPU_TensorViewWrapper&,
static void EMITTER_DECL(EmitBroadcast); const std::string& name = "");
static void EMITTER_DECL(EmitConvert); static std::string emit_matrix(const GPU_TensorViewWrapper&,
static void EMITTER_DECL(EmitConstant); const std::string& name = "");
static void EMITTER_DECL(EmitReshape);
static void EMITTER_DECL(EmitFunctionCall);
static void EMITTER_DECL(EmitReduce);
static void EMITTER_DECL(EmitSign);
static void EMITTER_DECL(EmitSlice);
static void EMITTER_DECL(EmitSum);
static void EMITTER_DECL(EmitExp);
static void EMITTER_DECL(EmitSin);
static void EMITTER_DECL(EmitSinh);
static void EMITTER_DECL(EmitCos);
static void EMITTER_DECL(EmitCosh);
static void EMITTER_DECL(EmitTan);
static void EMITTER_DECL(EmitTanh);
static void EMITTER_DECL(EmitAsin);
static void EMITTER_DECL(EmitAcos);
static void EMITTER_DECL(EmitAtan);
static void EMITTER_DECL(EmitPower);
static void EMITTER_DECL(EmitReplaceSlice);
static void EMITTER_DECL(EmitOneHot);
static void EMITTER_DECL(EmitFloor);
static void EMITTER_DECL(EmitCeiling);
static void EMITTER_DECL(EmitSqrt);
static void EMITTER_DECL(EmitConvolution);
static void EMITTER_DECL(EmitNot);
static void EMITTER_DECL(EmitMaxPool);
static void EMITTER_DECL(EmitReverse);
static void EMITTER_DECL(EmitReduceWindow);
static void EMITTER_DECL(EmitSelectAndScatter);
}; };
} }
} }
......
...@@ -41,8 +41,11 @@ ...@@ -41,8 +41,11 @@
#include "ngraph/ops/abs.hpp" #include "ngraph/ops/abs.hpp"
#include "ngraph/ops/acos.hpp" #include "ngraph/ops/acos.hpp"
#include "ngraph/ops/add.hpp" #include "ngraph/ops/add.hpp"
#include "ngraph/ops/allreduce.hpp"
#include "ngraph/ops/asin.hpp" #include "ngraph/ops/asin.hpp"
#include "ngraph/ops/atan.hpp" #include "ngraph/ops/atan.hpp"
#include "ngraph/ops/avg_pool.hpp"
#include "ngraph/ops/batch_norm.hpp"
#include "ngraph/ops/broadcast.hpp" #include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/ceiling.hpp" #include "ngraph/ops/ceiling.hpp"
#include "ngraph/ops/concat.hpp" #include "ngraph/ops/concat.hpp"
...@@ -57,24 +60,34 @@ ...@@ -57,24 +60,34 @@
#include "ngraph/ops/exp.hpp" #include "ngraph/ops/exp.hpp"
#include "ngraph/ops/floor.hpp" #include "ngraph/ops/floor.hpp"
#include "ngraph/ops/function_call.hpp" #include "ngraph/ops/function_call.hpp"
#include "ngraph/ops/get_output_element.hpp"
#include "ngraph/ops/greater.hpp" #include "ngraph/ops/greater.hpp"
#include "ngraph/ops/greater_eq.hpp" #include "ngraph/ops/greater_eq.hpp"
#include "ngraph/ops/less.hpp" #include "ngraph/ops/less.hpp"
#include "ngraph/ops/less_eq.hpp" #include "ngraph/ops/less_eq.hpp"
#include "ngraph/ops/log.hpp" #include "ngraph/ops/log.hpp"
#include "ngraph/ops/max.hpp"
#include "ngraph/ops/max_pool.hpp" #include "ngraph/ops/max_pool.hpp"
#include "ngraph/ops/maximum.hpp" #include "ngraph/ops/maximum.hpp"
#include "ngraph/ops/min.hpp"
#include "ngraph/ops/minimum.hpp" #include "ngraph/ops/minimum.hpp"
#include "ngraph/ops/multiply.hpp" #include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/negative.hpp" #include "ngraph/ops/negative.hpp"
#include "ngraph/ops/not.hpp" #include "ngraph/ops/not.hpp"
#include "ngraph/ops/not_equal.hpp" #include "ngraph/ops/not_equal.hpp"
#include "ngraph/ops/one_hot.hpp" #include "ngraph/ops/one_hot.hpp"
#include "ngraph/ops/op.hpp"
#include "ngraph/ops/pad.hpp"
#include "ngraph/ops/parameter.hpp"
#include "ngraph/ops/power.hpp" #include "ngraph/ops/power.hpp"
#include "ngraph/ops/product.hpp"
#include "ngraph/ops/reduce.hpp" #include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/reduce_window.hpp" #include "ngraph/ops/reduce_window.hpp"
#include "ngraph/ops/relu.hpp"
#include "ngraph/ops/remainder.hpp"
#include "ngraph/ops/replace_slice.hpp" #include "ngraph/ops/replace_slice.hpp"
#include "ngraph/ops/reshape.hpp" #include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/result.hpp"
#include "ngraph/ops/reverse.hpp" #include "ngraph/ops/reverse.hpp"
#include "ngraph/ops/select.hpp" #include "ngraph/ops/select.hpp"
#include "ngraph/ops/select_and_scatter.hpp" #include "ngraph/ops/select_and_scatter.hpp"
...@@ -82,6 +95,7 @@ ...@@ -82,6 +95,7 @@
#include "ngraph/ops/sin.hpp" #include "ngraph/ops/sin.hpp"
#include "ngraph/ops/sinh.hpp" #include "ngraph/ops/sinh.hpp"
#include "ngraph/ops/slice.hpp" #include "ngraph/ops/slice.hpp"
#include "ngraph/ops/softmax.hpp"
#include "ngraph/ops/sqrt.hpp" #include "ngraph/ops/sqrt.hpp"
#include "ngraph/ops/subtract.hpp" #include "ngraph/ops/subtract.hpp"
#include "ngraph/ops/sum.hpp" #include "ngraph/ops/sum.hpp"
...@@ -147,56 +161,77 @@ static StaticInitializers s_static_initializers; ...@@ -147,56 +161,77 @@ static StaticInitializers s_static_initializers;
#define TI(x) type_index(typeid(x)) #define TI(x) type_index(typeid(x))
static const runtime::gpu::OpMap dispatcher{ static const runtime::gpu::OpMap dispatcher{
{TI(ngraph::op::Add), &runtime::gpu::GPU_Emitter::EmitAdd}, {TI(ngraph::op::Add), &runtime::cpu::GPU_Emitter::emit<op::Add>},
{TI(ngraph::op::Dot), &runtime::gpu::GPU_Emitter::EmitDot}, {TI(ngraph::op::MatmulBias), &runtime::cpu::GPU_Emitter::emit<op::MatmulBias>},
{TI(ngraph::op::Multiply), &runtime::gpu::GPU_Emitter::EmitMultiply}, {TI(ngraph::op::Dot), &runtime::cpu::GPU_Emitter::emit<op::Dot>},
{TI(ngraph::op::Parameter), &runtime::gpu::GPU_Emitter::EmitNop}, {TI(ngraph::op::Multiply), &runtime::cpu::GPU_Emitter::emit<op::Multiply>},
{TI(ngraph::op::Abs), &runtime::gpu::GPU_Emitter::EmitAbs}, {TI(ngraph::op::Parameter), &runtime::cpu::GPU_Emitter::nop},
{TI(ngraph::op::Concat), &runtime::gpu::GPU_Emitter::EmitConcat}, {TI(ngraph::op::Abs), &runtime::cpu::GPU_Emitter::emit<op::Abs>},
{TI(ngraph::op::Divide), &runtime::gpu::GPU_Emitter::EmitDivide}, {TI(ngraph::op::Concat), &runtime::cpu::GPU_Emitter::emit<op::Concat>},
{TI(ngraph::op::Equal), &runtime::gpu::GPU_Emitter::EmitEqual}, {TI(ngraph::op::Divide), &runtime::cpu::GPU_Emitter::emit<op::Divide>},
{TI(ngraph::op::Greater), &runtime::gpu::GPU_Emitter::EmitGreater}, {TI(ngraph::op::Equal), &runtime::cpu::GPU_Emitter::emit<op::Equal>},
{TI(ngraph::op::GreaterEq), &runtime::gpu::GPU_Emitter::EmitGreaterEq}, {TI(ngraph::op::GetOutputElement), &runtime::cpu::GPU_Emitter::emit<op::GetOutputElement>},
{TI(ngraph::op::Less), &runtime::gpu::GPU_Emitter::EmitLess}, {TI(ngraph::op::Greater), &runtime::cpu::GPU_Emitter::emit<op::Greater>},
{TI(ngraph::op::LessEq), &runtime::gpu::GPU_Emitter::EmitLessEq}, {TI(ngraph::op::GreaterEq), &runtime::cpu::GPU_Emitter::emit<op::GreaterEq>},
{TI(ngraph::op::Log), &runtime::gpu::GPU_Emitter::EmitLog}, {TI(ngraph::op::Less), &runtime::cpu::GPU_Emitter::emit<op::Less>},
{TI(ngraph::op::Maximum), &runtime::gpu::GPU_Emitter::EmitMaximum}, {TI(ngraph::op::LessEq), &runtime::cpu::GPU_Emitter::emit<op::LessEq>},
{TI(ngraph::op::Minimum), &runtime::gpu::GPU_Emitter::EmitMinimum}, {TI(ngraph::op::Log), &runtime::cpu::GPU_Emitter::emit<op::Log>},
{TI(ngraph::op::Negative), &runtime::gpu::GPU_Emitter::EmitNegative}, {TI(ngraph::op::Maximum), &runtime::cpu::GPU_Emitter::emit<op::Maximum>},
{TI(ngraph::op::NotEqual), &runtime::gpu::GPU_Emitter::EmitNotEqual}, {TI(ngraph::op::Minimum), &runtime::cpu::GPU_Emitter::emit<op::Minimum>},
{TI(ngraph::op::Power), &runtime::gpu::GPU_Emitter::EmitPower}, {TI(ngraph::op::Negative), &runtime::cpu::GPU_Emitter::emit<op::Negative>},
{TI(ngraph::op::Select), &runtime::gpu::GPU_Emitter::EmitSelect}, {TI(ngraph::op::NotEqual), &runtime::cpu::GPU_Emitter::emit<op::NotEqual>},
{TI(ngraph::op::Subtract), &runtime::gpu::GPU_Emitter::EmitSubtract}, {TI(ngraph::op::Power), &runtime::cpu::GPU_Emitter::emit<op::Power>},
{TI(ngraph::op::Broadcast), &runtime::gpu::GPU_Emitter::EmitBroadcast}, {TI(ngraph::op::Select), &runtime::cpu::GPU_Emitter::emit<op::Select>},
{TI(ngraph::op::Convert), &runtime::gpu::GPU_Emitter::EmitConvert}, {TI(ngraph::op::Subtract), &runtime::cpu::GPU_Emitter::emit<op::Subtract>},
{TI(ngraph::op::Constant), &runtime::gpu::GPU_Emitter::EmitConstant}, {TI(ngraph::op::Broadcast), &runtime::cpu::GPU_Emitter::emit<op::Broadcast>},
{TI(ngraph::op::Reshape), &runtime::gpu::GPU_Emitter::EmitReshape}, {TI(ngraph::op::Convert), &runtime::cpu::GPU_Emitter::emit<op::Convert>},
{TI(ngraph::op::FunctionCall), &runtime::gpu::GPU_Emitter::EmitFunctionCall}, {TI(ngraph::op::Constant), &runtime::cpu::GPU_Emitter::emit<op::Constant>},
{TI(ngraph::op::Reduce), &runtime::gpu::GPU_Emitter::EmitReduce}, {TI(ngraph::op::Reshape), &runtime::cpu::GPU_Emitter::emit<op::Reshape>},
{TI(ngraph::op::Sign), &runtime::gpu::GPU_Emitter::EmitSign}, {TI(ngraph::op::FunctionCall), &runtime::cpu::GPU_Emitter::emit<op::FunctionCall>},
{TI(ngraph::op::Slice), &runtime::gpu::GPU_Emitter::EmitSlice}, {TI(ngraph::op::Reduce), &runtime::cpu::GPU_Emitter::emit<op::Reduce>},
{TI(ngraph::op::Sum), &runtime::gpu::GPU_Emitter::EmitSum}, {TI(ngraph::op::Sign), &runtime::cpu::GPU_Emitter::emit<op::Sign>},
{TI(ngraph::op::Exp), &runtime::gpu::GPU_Emitter::EmitExp}, {TI(ngraph::op::Slice), &runtime::cpu::GPU_Emitter::emit<op::Slice>},
{TI(ngraph::op::Sin), &runtime::gpu::GPU_Emitter::EmitSin}, {TI(ngraph::op::Sum), &runtime::cpu::GPU_Emitter::emit<op::Sum>},
{TI(ngraph::op::Sinh), &runtime::gpu::GPU_Emitter::EmitSinh}, {TI(ngraph::op::Exp), &runtime::cpu::GPU_Emitter::emit<op::Exp>},
{TI(ngraph::op::Cos), &runtime::gpu::GPU_Emitter::EmitCos}, {TI(ngraph::op::Sin), &runtime::cpu::GPU_Emitter::emit<op::Sin>},
{TI(ngraph::op::Cosh), &runtime::gpu::GPU_Emitter::EmitCosh}, {TI(ngraph::op::Sinh), &runtime::cpu::GPU_Emitter::emit<op::Sinh>},
{TI(ngraph::op::Tan), &runtime::gpu::GPU_Emitter::EmitTan}, {TI(ngraph::op::Cos), &runtime::cpu::GPU_Emitter::emit<op::Cos>},
{TI(ngraph::op::Tanh), &runtime::gpu::GPU_Emitter::EmitTanh}, {TI(ngraph::op::Cosh), &runtime::cpu::GPU_Emitter::emit<op::Cosh>},
{TI(ngraph::op::Asin), &runtime::gpu::GPU_Emitter::EmitAsin}, {TI(ngraph::op::Tan), &runtime::cpu::GPU_Emitter::emit<op::Tan>},
{TI(ngraph::op::Acos), &runtime::gpu::GPU_Emitter::EmitAcos}, {TI(ngraph::op::Tanh), &runtime::cpu::GPU_Emitter::emit<op::Tanh>},
{TI(ngraph::op::Atan), &runtime::gpu::GPU_Emitter::EmitAtan}, {TI(ngraph::op::Asin), &runtime::cpu::GPU_Emitter::emit<op::Asin>},
{TI(ngraph::op::ReplaceSlice), &runtime::gpu::GPU_Emitter::EmitReplaceSlice}, {TI(ngraph::op::Acos), &runtime::cpu::GPU_Emitter::emit<op::Acos>},
{TI(ngraph::op::OneHot), &runtime::gpu::GPU_Emitter::EmitOneHot}, {TI(ngraph::op::Atan), &runtime::cpu::GPU_Emitter::emit<op::Atan>},
{TI(ngraph::op::Floor), &runtime::gpu::GPU_Emitter::EmitFloor}, {TI(ngraph::op::ReplaceSlice), &runtime::cpu::GPU_Emitter::emit<op::ReplaceSlice>},
{TI(ngraph::op::Ceiling), &runtime::gpu::GPU_Emitter::EmitCeiling}, {TI(ngraph::op::OneHot), &runtime::cpu::GPU_Emitter::emit<op::OneHot>},
{TI(ngraph::op::Sqrt), &runtime::gpu::GPU_Emitter::EmitSqrt}, {TI(ngraph::op::Floor), &runtime::cpu::GPU_Emitter::emit<op::Floor>},
{TI(ngraph::op::Convolution), &runtime::gpu::GPU_Emitter::EmitConvolution}, {TI(ngraph::op::Ceiling), &runtime::cpu::GPU_Emitter::emit<op::Ceiling>},
{TI(ngraph::op::Not), &runtime::gpu::GPU_Emitter::EmitNot}, {TI(ngraph::op::Sqrt), &runtime::cpu::GPU_Emitter::emit<op::Sqrt>},
{TI(ngraph::op::MaxPool), &runtime::gpu::GPU_Emitter::EmitMaxPool}, {TI(ngraph::op::Convolution), &runtime::cpu::GPU_Emitter::emit<op::Convolution>},
{TI(ngraph::op::Reverse), &runtime::gpu::GPU_Emitter::EmitReverse}, {TI(ngraph::op::ConvolutionBackpropFilters),
{TI(ngraph::op::ReduceWindow), &runtime::gpu::GPU_Emitter::EmitReduceWindow}, &runtime::cpu::GPU_Emitter::emit<op::ConvolutionBackpropFilters>},
{TI(ngraph::op::SelectAndScatter), &runtime::gpu::GPU_Emitter::EmitSelectAndScatter}, {TI(ngraph::op::ConvolutionBackpropData),
&runtime::cpu::GPU_Emitter::emit<op::ConvolutionBackpropData>},
{TI(ngraph::runtime::cpu::op::ConvertLayout),
&runtime::cpu::GPU_Emitter::emit<runtime::cpu::op::ConvertLayout>},
{TI(ngraph::op::Not), &runtime::cpu::GPU_Emitter::emit<op::Not>},
{TI(ngraph::op::MaxPool), &runtime::cpu::GPU_Emitter::emit<op::MaxPool>},
{TI(ngraph::op::Reverse), &runtime::cpu::GPU_Emitter::emit<op::Reverse>},
{TI(ngraph::op::Result), &runtime::cpu::GPU_Emitter::emit<op::Result>},
{TI(ngraph::op::ReduceWindow), &runtime::cpu::GPU_Emitter::emit<op::ReduceWindow>},
{TI(ngraph::op::SelectAndScatter), &runtime::cpu::GPU_Emitter::emit<op::SelectAndScatter>},
{TI(ngraph::op::AvgPool), &runtime::cpu::GPU_Emitter::emit<op::AvgPool>},
{TI(ngraph::op::AvgPoolBackprop), &runtime::cpu::GPU_Emitter::emit<op::AvgPoolBackprop>},
{TI(ngraph::op::Pad), &runtime::cpu::GPU_Emitter::emit<op::Pad>},
{TI(ngraph::op::BatchNorm), &runtime::cpu::GPU_Emitter::emit<op::BatchNorm>},
{TI(ngraph::op::BatchNormBackprop), &runtime::cpu::GPU_Emitter::emit<op::BatchNormBackprop>},
{TI(ngraph::op::MaxPoolBackprop), &runtime::cpu::GPU_Emitter::emit<op::MaxPoolBackprop>},
{TI(ngraph::op::Product), &runtime::cpu::GPU_Emitter::emit<op::Product>},
{TI(ngraph::op::Max), &runtime::cpu::GPU_Emitter::emit<op::Max>},
{TI(ngraph::op::Min), &runtime::cpu::GPU_Emitter::emit<op::Min>},
{TI(ngraph::op::Relu), &runtime::cpu::GPU_Emitter::emit<op::Relu>},
{TI(ngraph::op::ReluBackprop), &runtime::cpu::GPU_Emitter::emit<op::ReluBackprop>},
{TI(ngraph::op::Softmax), &runtime::cpu::GPU_Emitter::emit<op::Softmax>},
}; };
runtime::gpu::GPU_ExternalFunction::GPU_ExternalFunction( runtime::gpu::GPU_ExternalFunction::GPU_ExternalFunction(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment