Commit 94cd8274 authored by fenglei.tian's avatar fenglei.tian

add template

parent 8eb31b78
......@@ -91,8 +91,8 @@
#include "ngraph/ops/sum.hpp"
#include "ngraph/ops/tan.hpp"
#include "ngraph/ops/tanh.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
#include "ngraph/runtime/gpu/gpu_emitter.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
#include "ngraph/runtime/gpu/gpu_kernel_emitters.hpp"
#include "ngraph/util.hpp"
......@@ -107,6 +107,7 @@ namespace ngraph
template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Abs)
{
std::cout << "abs" << std::endl;
writer << "{ // " << node->get_name() << "\n";
writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n";
......@@ -578,6 +579,10 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
writer << "}\n";
}
template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Result)
{
}
template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Sqrt)
{
......
......@@ -46,6 +46,7 @@ namespace ngraph
const std::vector<GPU_TensorViewWrapper>& args,
const std::vector<GPU_TensorViewWrapper>& out)
{
std::cout << node->get_name() << std::endl;
throw std::runtime_error("Unimplemented op in GPU emitter");
}
......@@ -55,6 +56,7 @@ namespace ngraph
const std::vector<GPU_TensorViewWrapper>& args,
const std::vector<GPU_TensorViewWrapper>& out)
{
std::cout << "nop" << std::endl;
}
private:
......
......@@ -160,91 +160,92 @@ static StaticInitializers s_static_initializers;
#define TI(x) type_index(typeid(x))
static const runtime::gpu::OpMap dispatcher{
{TI(ngraph::op::Add), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Add>},
{TI(ngraph::op::Dot), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Dot>},
{TI(ngraph::op::Multiply), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Multiply>},
{TI(ngraph::op::Parameter), &runtime::gpu::GPU_Emitter::nop},
{TI(ngraph::op::Abs), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Abs>},
{TI(ngraph::op::Concat), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Concat>},
{TI(ngraph::op::Divide), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Divide>},
{TI(ngraph::op::Equal), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Equal>},
static const ngraph::runtime::gpu::OpMap dispatcher{
{TI(ngraph::op::Add), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Add>},
{TI(ngraph::op::Dot), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Dot>},
{TI(ngraph::op::Multiply), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Multiply>},
{TI(ngraph::op::Parameter), &ngraph::runtime::gpu::GPU_Emitter::nop},
{TI(ngraph::op::Abs), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Abs>},
{TI(ngraph::op::Concat), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Concat>},
{TI(ngraph::op::Divide), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Divide>},
{TI(ngraph::op::Equal), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Equal>},
{TI(ngraph::op::GetOutputElement),
&runtime::gpu::GPU_Emitter::emit<ngraph::op::GetOutputElement>},
{TI(ngraph::op::Greater), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Greater>},
{TI(ngraph::op::GreaterEq), &runtime::gpu::GPU_Emitter::emit<ngraph::op::GreaterEq>},
{TI(ngraph::op::Less), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Less>},
{TI(ngraph::op::LessEq), &runtime::gpu::GPU_Emitter::emit<ngraph::op::LessEq>},
{TI(ngraph::op::Log), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Log>},
{TI(ngraph::op::Maximum), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Maximum>},
{TI(ngraph::op::Minimum), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Minimum>},
{TI(ngraph::op::Negative), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Negative>},
{TI(ngraph::op::NotEqual), &runtime::gpu::GPU_Emitter::emit<ngraph::op::NotEqual>},
{TI(ngraph::op::Power), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Power>},
{TI(ngraph::op::Select), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Select>},
{TI(ngraph::op::Subtract), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Subtract>},
{TI(ngraph::op::Broadcast), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Broadcast>},
{TI(ngraph::op::Convert), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Convert>},
{TI(ngraph::op::Constant), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Constant>},
{TI(ngraph::op::Reshape), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Reshape>},
{TI(ngraph::op::FunctionCall), &runtime::gpu::GPU_Emitter::emit<ngraph::op::FunctionCall>},
{TI(ngraph::op::Reduce), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Reduce>},
{TI(ngraph::op::Sign), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Sign>},
{TI(ngraph::op::Slice), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Slice>},
{TI(ngraph::op::Sum), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Sum>},
{TI(ngraph::op::Exp), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Exp>},
{TI(ngraph::op::Sin), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Sin>},
{TI(ngraph::op::Sinh), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Sinh>},
{TI(ngraph::op::Cos), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Cos>},
{TI(ngraph::op::Cosh), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Cosh>},
{TI(ngraph::op::Tan), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Tan>},
{TI(ngraph::op::Tanh), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Tanh>},
{TI(ngraph::op::Asin), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Asin>},
{TI(ngraph::op::Acos), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Acos>},
{TI(ngraph::op::Atan), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Atan>},
{TI(ngraph::op::ReplaceSlice), &runtime::gpu::GPU_Emitter::emit<ngraph::op::ReplaceSlice>},
{TI(ngraph::op::OneHot), &runtime::gpu::GPU_Emitter::emit<ngraph::op::OneHot>},
{TI(ngraph::op::Floor), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Floor>},
{TI(ngraph::op::Ceiling), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Ceiling>},
{TI(ngraph::op::Sqrt), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Sqrt>},
{TI(ngraph::op::Convolution), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Convolution>},
&ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::GetOutputElement>},
{TI(ngraph::op::Greater), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Greater>},
{TI(ngraph::op::GreaterEq), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::GreaterEq>},
{TI(ngraph::op::Less), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Less>},
{TI(ngraph::op::LessEq), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::LessEq>},
{TI(ngraph::op::Log), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Log>},
{TI(ngraph::op::Maximum), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Maximum>},
{TI(ngraph::op::Minimum), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Minimum>},
{TI(ngraph::op::Negative), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Negative>},
{TI(ngraph::op::NotEqual), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::NotEqual>},
{TI(ngraph::op::Power), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Power>},
{TI(ngraph::op::Select), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Select>},
{TI(ngraph::op::Subtract), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Subtract>},
{TI(ngraph::op::Broadcast), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Broadcast>},
{TI(ngraph::op::Convert), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Convert>},
{TI(ngraph::op::Constant), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Constant>},
{TI(ngraph::op::Reshape), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Reshape>},
{TI(ngraph::op::FunctionCall), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::FunctionCall>},
{TI(ngraph::op::Reduce), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Reduce>},
{TI(ngraph::op::Sign), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Sign>},
{TI(ngraph::op::Slice), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Slice>},
{TI(ngraph::op::Sum), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Sum>},
{TI(ngraph::op::Exp), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Exp>},
{TI(ngraph::op::Sin), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Sin>},
{TI(ngraph::op::Sinh), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Sinh>},
{TI(ngraph::op::Cos), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Cos>},
{TI(ngraph::op::Cosh), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Cosh>},
{TI(ngraph::op::Tan), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Tan>},
{TI(ngraph::op::Tanh), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Tanh>},
{TI(ngraph::op::Asin), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Asin>},
{TI(ngraph::op::Acos), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Acos>},
{TI(ngraph::op::Atan), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Atan>},
{TI(ngraph::op::ReplaceSlice), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::ReplaceSlice>},
{TI(ngraph::op::OneHot), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::OneHot>},
{TI(ngraph::op::Floor), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Floor>},
{TI(ngraph::op::Ceiling), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Ceiling>},
{TI(ngraph::op::Sqrt), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Sqrt>},
{TI(ngraph::op::Convolution), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Convolution>},
{TI(ngraph::op::ConvolutionBackpropFilters),
&runtime::gpu::GPU_Emitter::emit<ngraph::op::ConvolutionBackpropFilters>},
&ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::ConvolutionBackpropFilters>},
{TI(ngraph::op::ConvolutionBackpropData),
&runtime::gpu::GPU_Emitter::emit<ngraph::op::ConvolutionBackpropData>},
{TI(ngraph::op::Not), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Not>},
{TI(ngraph::op::MaxPool), &runtime::gpu::GPU_Emitter::emit<ngraph::op::MaxPool>},
{TI(ngraph::op::Reverse), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Reverse>},
{TI(ngraph::op::Result), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Result>},
{TI(ngraph::op::ReduceWindow), &runtime::gpu::GPU_Emitter::emit<ngraph::op::ReduceWindow>},
&ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::ConvolutionBackpropData>},
{TI(ngraph::op::Not), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Not>},
{TI(ngraph::op::MaxPool), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::MaxPool>},
{TI(ngraph::op::Reverse), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Reverse>},
{TI(ngraph::op::Result), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Result>},
{TI(ngraph::op::ReduceWindow), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::ReduceWindow>},
{TI(ngraph::op::SelectAndScatter),
&runtime::gpu::GPU_Emitter::emit<ngraph::op::SelectAndScatter>},
{TI(ngraph::op::AvgPool), &runtime::gpu::GPU_Emitter::emit<ngraph::op::AvgPool>},
&ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::SelectAndScatter>},
{TI(ngraph::op::AvgPool), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::AvgPool>},
{TI(ngraph::op::AvgPoolBackprop),
&runtime::gpu::GPU_Emitter::emit<ngraph::op::AvgPoolBackprop>},
{TI(ngraph::op::Pad), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Pad>},
{TI(ngraph::op::BatchNorm), &runtime::gpu::GPU_Emitter::emit<ngraph::op::BatchNorm>},
&ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::AvgPoolBackprop>},
{TI(ngraph::op::Pad), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Pad>},
{TI(ngraph::op::BatchNorm), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::BatchNorm>},
{TI(ngraph::op::BatchNormBackprop),
&runtime::gpu::GPU_Emitter::emit<ngraph::op::BatchNormBackprop>},
&ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::BatchNormBackprop>},
{TI(ngraph::op::MaxPoolBackprop),
&runtime::gpu::GPU_Emitter::emit<ngraph::op::MaxPoolBackprop>},
{TI(ngraph::op::Product), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Product>},
{TI(ngraph::op::Max), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Max>},
{TI(ngraph::op::Min), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Min>},
{TI(ngraph::op::Relu), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Relu>},
{TI(ngraph::op::ReluBackprop), &runtime::gpu::GPU_Emitter::emit<ngraph::op::ReluBackprop>},
{TI(ngraph::op::Softmax), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Softmax>},
&ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::MaxPoolBackprop>},
{TI(ngraph::op::Product), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Product>},
{TI(ngraph::op::Max), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Max>},
{TI(ngraph::op::Min), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Min>},
{TI(ngraph::op::Relu), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Relu>},
{TI(ngraph::op::ReluBackprop), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::ReluBackprop>},
{TI(ngraph::op::Softmax), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Softmax>},
};
runtime::gpu::GPU_ExternalFunction::GPU_ExternalFunction(
ngraph::runtime::gpu::GPU_ExternalFunction::GPU_ExternalFunction(
const shared_ptr<ngraph::Function>& function, bool release_function)
: ngraph::runtime::ExternalFunction(function, release_function)
, m_compiled_function(nullptr)
, m_emit_timing(std::getenv("NGRAPH_GPU_EMIT_TIMING") != nullptr)
, m_function_name(function->get_name())
{
}
void runtime::gpu::GPU_ExternalFunction::compile()
void ngraph::runtime::gpu::GPU_ExternalFunction::compile()
{
if (m_is_compiled)
{
......@@ -540,7 +541,7 @@ using namespace std;
size_t temp_pool_size = current_function->get_temporary_pool_size();
writer << "// Allocate the memory pool\n";
// TODO memory pool malloc.
writer << "void* pool_base_ptr = runtime::gpu::create_gpu_buffer(" << temp_pool_size
writer << "void* pool_base_ptr = ngraph::runtime::gpu::create_gpu_buffer(" << temp_pool_size
<< ");\n";
// Add temporaries to the variable name map
......@@ -605,7 +606,7 @@ using namespace std;
if (tv == ptv)
{
parameter_as_output = true;
writer << "runtime::gpu::cuda_memcpyDtD(reinterpret_cast<"
writer << "ngraph::runtime::gpu::cuda_memcpyDtD(reinterpret_cast<"
<< et.c_type_string() << "*>(outputs[" << output_index << "]), "
<< m_variable_name_map[ptv->get_tensor().get_name()] << ", "
<< ptv->get_tensor().size() << ");\n";
......@@ -617,7 +618,7 @@ using namespace std;
{
if (contains(constants, tv.get()))
{
writer << "runtime::gpu::cuda_memcpyHtD(outputs[" << output_index << "], "
writer << "ngraph::runtime::gpu::cuda_memcpyHtD(outputs[" << output_index << "], "
<< tv->get_tensor().get_name() << ", " << tv->get_tensor().size()
<< ");\n";
}
......@@ -738,7 +739,7 @@ using namespace std;
}
}
void runtime::gpu::GPU_ExternalFunction::handle_output_alias(
void ngraph::runtime::gpu::GPU_ExternalFunction::handle_output_alias(
codegen::CodeWriter& writer,
const Node& node,
const unordered_map<descriptor::TensorView*, vector<size_t>>& output_alias_map)
......@@ -756,7 +757,7 @@ void runtime::gpu::GPU_ExternalFunction::handle_output_alias(
writer.indent++;
for (size_t i = 1; i < outputs.size(); i++)
{
writer << "runtime::gpu::cuda_memcpyDtD(static_cast<void*>(outputs["
writer << "ngraph::runtime::gpu::cuda_memcpyDtD(static_cast<void*>(outputs["
<< outputs[i] << "]), static_cast<void*>(outputs[" << outputs[0]
<< "]), " << otv->get_tensor().size() << ");\n";
}
......@@ -767,7 +768,7 @@ void runtime::gpu::GPU_ExternalFunction::handle_output_alias(
}
}
shared_ptr<ngraph::runtime::CallFrame> runtime::gpu::GPU_ExternalFunction::make_call_frame()
shared_ptr<ngraph::runtime::CallFrame> ngraph::runtime::gpu::GPU_ExternalFunction::make_call_frame()
{
if (!m_is_compiled)
{
......@@ -778,7 +779,7 @@ shared_ptr<ngraph::runtime::CallFrame> runtime::gpu::GPU_ExternalFunction::make_
m_compiled_function);
}
void runtime::gpu::GPU_ExternalFunction::emit_debug_function_entry(
void ngraph::runtime::gpu::GPU_ExternalFunction::emit_debug_function_entry(
codegen::CodeWriter& writer,
Node* node,
const std::vector<GPU_TensorViewWrapper>& in,
......@@ -787,7 +788,7 @@ void runtime::gpu::GPU_ExternalFunction::emit_debug_function_entry(
writer << "timer_" << node->get_name() << ".start();\n";
}
void runtime::gpu::GPU_ExternalFunction::emit_debug_function_exit(
void ngraph::runtime::gpu::GPU_ExternalFunction::emit_debug_function_exit(
codegen::CodeWriter& writer,
Node* node,
const std::vector<GPU_TensorViewWrapper>& in,
......
......@@ -82,6 +82,7 @@ namespace ngraph
std::unique_ptr<codegen::ExecutionEngine> m_execution_engine;
bool m_emit_timing;
std::unordered_map<std::string, std::string> m_variable_name_map;
std::string m_function_name;
};
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment