Commit 94cd8274 authored by fenglei.tian's avatar fenglei.tian

add template

parent 8eb31b78
...@@ -91,8 +91,8 @@ ...@@ -91,8 +91,8 @@
#include "ngraph/ops/sum.hpp" #include "ngraph/ops/sum.hpp"
#include "ngraph/ops/tan.hpp" #include "ngraph/ops/tan.hpp"
#include "ngraph/ops/tanh.hpp" #include "ngraph/ops/tanh.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
#include "ngraph/runtime/gpu/gpu_emitter.hpp" #include "ngraph/runtime/gpu/gpu_emitter.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
#include "ngraph/runtime/gpu/gpu_kernel_emitters.hpp" #include "ngraph/runtime/gpu/gpu_kernel_emitters.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
...@@ -106,7 +106,8 @@ namespace ngraph ...@@ -106,7 +106,8 @@ namespace ngraph
{ {
template <> template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Abs) void GPU_Emitter::EMITTER_DECL(ngraph::op::Abs)
{ {
std::cout << "abs" << std::endl;
writer << "{ // " << node->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n"; writer << "int count = " << out[0].get_size() << ";\n";
...@@ -467,7 +468,7 @@ cudnnSetOpTensorDescriptor(opTensorDesc, ...@@ -467,7 +468,7 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
void GPU_Emitter::EMITTER_DECL(ngraph::op::Constant) void GPU_Emitter::EMITTER_DECL(ngraph::op::Constant)
{ {
} }
template <> template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Reshape) void GPU_Emitter::EMITTER_DECL(ngraph::op::Reshape)
{ {
...@@ -577,7 +578,11 @@ cudnnSetOpTensorDescriptor(opTensorDesc, ...@@ -577,7 +578,11 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
} }
template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Result)
{
}
template <> template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Sqrt) void GPU_Emitter::EMITTER_DECL(ngraph::op::Sqrt)
{ {
......
...@@ -45,7 +45,8 @@ namespace ngraph ...@@ -45,7 +45,8 @@ namespace ngraph
const ngraph::Node* node, const ngraph::Node* node,
const std::vector<GPU_TensorViewWrapper>& args, const std::vector<GPU_TensorViewWrapper>& args,
const std::vector<GPU_TensorViewWrapper>& out) const std::vector<GPU_TensorViewWrapper>& out)
{ {
std::cout << node->get_name() << std::endl;
throw std::runtime_error("Unimplemented op in GPU emitter"); throw std::runtime_error("Unimplemented op in GPU emitter");
} }
...@@ -55,6 +56,7 @@ namespace ngraph ...@@ -55,6 +56,7 @@ namespace ngraph
const std::vector<GPU_TensorViewWrapper>& args, const std::vector<GPU_TensorViewWrapper>& args,
const std::vector<GPU_TensorViewWrapper>& out) const std::vector<GPU_TensorViewWrapper>& out)
{ {
std::cout << "nop" << std::endl;
} }
private: private:
......
...@@ -160,91 +160,92 @@ static StaticInitializers s_static_initializers; ...@@ -160,91 +160,92 @@ static StaticInitializers s_static_initializers;
#define TI(x) type_index(typeid(x)) #define TI(x) type_index(typeid(x))
static const runtime::gpu::OpMap dispatcher{ static const ngraph::runtime::gpu::OpMap dispatcher{
{TI(ngraph::op::Add), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Add>}, {TI(ngraph::op::Add), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Add>},
{TI(ngraph::op::Dot), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Dot>}, {TI(ngraph::op::Dot), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Dot>},
{TI(ngraph::op::Multiply), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Multiply>}, {TI(ngraph::op::Multiply), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Multiply>},
{TI(ngraph::op::Parameter), &runtime::gpu::GPU_Emitter::nop}, {TI(ngraph::op::Parameter), &ngraph::runtime::gpu::GPU_Emitter::nop},
{TI(ngraph::op::Abs), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Abs>}, {TI(ngraph::op::Abs), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Abs>},
{TI(ngraph::op::Concat), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Concat>}, {TI(ngraph::op::Concat), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Concat>},
{TI(ngraph::op::Divide), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Divide>}, {TI(ngraph::op::Divide), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Divide>},
{TI(ngraph::op::Equal), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Equal>}, {TI(ngraph::op::Equal), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Equal>},
{TI(ngraph::op::GetOutputElement), {TI(ngraph::op::GetOutputElement),
&runtime::gpu::GPU_Emitter::emit<ngraph::op::GetOutputElement>}, &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::GetOutputElement>},
{TI(ngraph::op::Greater), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Greater>}, {TI(ngraph::op::Greater), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Greater>},
{TI(ngraph::op::GreaterEq), &runtime::gpu::GPU_Emitter::emit<ngraph::op::GreaterEq>}, {TI(ngraph::op::GreaterEq), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::GreaterEq>},
{TI(ngraph::op::Less), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Less>}, {TI(ngraph::op::Less), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Less>},
{TI(ngraph::op::LessEq), &runtime::gpu::GPU_Emitter::emit<ngraph::op::LessEq>}, {TI(ngraph::op::LessEq), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::LessEq>},
{TI(ngraph::op::Log), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Log>}, {TI(ngraph::op::Log), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Log>},
{TI(ngraph::op::Maximum), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Maximum>}, {TI(ngraph::op::Maximum), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Maximum>},
{TI(ngraph::op::Minimum), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Minimum>}, {TI(ngraph::op::Minimum), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Minimum>},
{TI(ngraph::op::Negative), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Negative>}, {TI(ngraph::op::Negative), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Negative>},
{TI(ngraph::op::NotEqual), &runtime::gpu::GPU_Emitter::emit<ngraph::op::NotEqual>}, {TI(ngraph::op::NotEqual), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::NotEqual>},
{TI(ngraph::op::Power), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Power>}, {TI(ngraph::op::Power), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Power>},
{TI(ngraph::op::Select), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Select>}, {TI(ngraph::op::Select), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Select>},
{TI(ngraph::op::Subtract), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Subtract>}, {TI(ngraph::op::Subtract), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Subtract>},
{TI(ngraph::op::Broadcast), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Broadcast>}, {TI(ngraph::op::Broadcast), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Broadcast>},
{TI(ngraph::op::Convert), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Convert>}, {TI(ngraph::op::Convert), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Convert>},
{TI(ngraph::op::Constant), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Constant>}, {TI(ngraph::op::Constant), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Constant>},
{TI(ngraph::op::Reshape), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Reshape>}, {TI(ngraph::op::Reshape), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Reshape>},
{TI(ngraph::op::FunctionCall), &runtime::gpu::GPU_Emitter::emit<ngraph::op::FunctionCall>}, {TI(ngraph::op::FunctionCall), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::FunctionCall>},
{TI(ngraph::op::Reduce), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Reduce>}, {TI(ngraph::op::Reduce), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Reduce>},
{TI(ngraph::op::Sign), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Sign>}, {TI(ngraph::op::Sign), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Sign>},
{TI(ngraph::op::Slice), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Slice>}, {TI(ngraph::op::Slice), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Slice>},
{TI(ngraph::op::Sum), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Sum>}, {TI(ngraph::op::Sum), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Sum>},
{TI(ngraph::op::Exp), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Exp>}, {TI(ngraph::op::Exp), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Exp>},
{TI(ngraph::op::Sin), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Sin>}, {TI(ngraph::op::Sin), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Sin>},
{TI(ngraph::op::Sinh), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Sinh>}, {TI(ngraph::op::Sinh), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Sinh>},
{TI(ngraph::op::Cos), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Cos>}, {TI(ngraph::op::Cos), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Cos>},
{TI(ngraph::op::Cosh), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Cosh>}, {TI(ngraph::op::Cosh), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Cosh>},
{TI(ngraph::op::Tan), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Tan>}, {TI(ngraph::op::Tan), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Tan>},
{TI(ngraph::op::Tanh), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Tanh>}, {TI(ngraph::op::Tanh), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Tanh>},
{TI(ngraph::op::Asin), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Asin>}, {TI(ngraph::op::Asin), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Asin>},
{TI(ngraph::op::Acos), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Acos>}, {TI(ngraph::op::Acos), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Acos>},
{TI(ngraph::op::Atan), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Atan>}, {TI(ngraph::op::Atan), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Atan>},
{TI(ngraph::op::ReplaceSlice), &runtime::gpu::GPU_Emitter::emit<ngraph::op::ReplaceSlice>}, {TI(ngraph::op::ReplaceSlice), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::ReplaceSlice>},
{TI(ngraph::op::OneHot), &runtime::gpu::GPU_Emitter::emit<ngraph::op::OneHot>}, {TI(ngraph::op::OneHot), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::OneHot>},
{TI(ngraph::op::Floor), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Floor>}, {TI(ngraph::op::Floor), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Floor>},
{TI(ngraph::op::Ceiling), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Ceiling>}, {TI(ngraph::op::Ceiling), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Ceiling>},
{TI(ngraph::op::Sqrt), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Sqrt>}, {TI(ngraph::op::Sqrt), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Sqrt>},
{TI(ngraph::op::Convolution), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Convolution>}, {TI(ngraph::op::Convolution), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Convolution>},
{TI(ngraph::op::ConvolutionBackpropFilters), {TI(ngraph::op::ConvolutionBackpropFilters),
&runtime::gpu::GPU_Emitter::emit<ngraph::op::ConvolutionBackpropFilters>}, &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::ConvolutionBackpropFilters>},
{TI(ngraph::op::ConvolutionBackpropData), {TI(ngraph::op::ConvolutionBackpropData),
&runtime::gpu::GPU_Emitter::emit<ngraph::op::ConvolutionBackpropData>}, &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::ConvolutionBackpropData>},
{TI(ngraph::op::Not), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Not>}, {TI(ngraph::op::Not), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Not>},
{TI(ngraph::op::MaxPool), &runtime::gpu::GPU_Emitter::emit<ngraph::op::MaxPool>}, {TI(ngraph::op::MaxPool), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::MaxPool>},
{TI(ngraph::op::Reverse), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Reverse>}, {TI(ngraph::op::Reverse), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Reverse>},
{TI(ngraph::op::Result), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Result>}, {TI(ngraph::op::Result), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Result>},
{TI(ngraph::op::ReduceWindow), &runtime::gpu::GPU_Emitter::emit<ngraph::op::ReduceWindow>}, {TI(ngraph::op::ReduceWindow), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::ReduceWindow>},
{TI(ngraph::op::SelectAndScatter), {TI(ngraph::op::SelectAndScatter),
&runtime::gpu::GPU_Emitter::emit<ngraph::op::SelectAndScatter>}, &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::SelectAndScatter>},
{TI(ngraph::op::AvgPool), &runtime::gpu::GPU_Emitter::emit<ngraph::op::AvgPool>}, {TI(ngraph::op::AvgPool), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::AvgPool>},
{TI(ngraph::op::AvgPoolBackprop), {TI(ngraph::op::AvgPoolBackprop),
&runtime::gpu::GPU_Emitter::emit<ngraph::op::AvgPoolBackprop>}, &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::AvgPoolBackprop>},
{TI(ngraph::op::Pad), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Pad>}, {TI(ngraph::op::Pad), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Pad>},
{TI(ngraph::op::BatchNorm), &runtime::gpu::GPU_Emitter::emit<ngraph::op::BatchNorm>}, {TI(ngraph::op::BatchNorm), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::BatchNorm>},
{TI(ngraph::op::BatchNormBackprop), {TI(ngraph::op::BatchNormBackprop),
&runtime::gpu::GPU_Emitter::emit<ngraph::op::BatchNormBackprop>}, &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::BatchNormBackprop>},
{TI(ngraph::op::MaxPoolBackprop), {TI(ngraph::op::MaxPoolBackprop),
&runtime::gpu::GPU_Emitter::emit<ngraph::op::MaxPoolBackprop>}, &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::MaxPoolBackprop>},
{TI(ngraph::op::Product), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Product>}, {TI(ngraph::op::Product), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Product>},
{TI(ngraph::op::Max), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Max>}, {TI(ngraph::op::Max), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Max>},
{TI(ngraph::op::Min), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Min>}, {TI(ngraph::op::Min), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Min>},
{TI(ngraph::op::Relu), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Relu>}, {TI(ngraph::op::Relu), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Relu>},
{TI(ngraph::op::ReluBackprop), &runtime::gpu::GPU_Emitter::emit<ngraph::op::ReluBackprop>}, {TI(ngraph::op::ReluBackprop), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::ReluBackprop>},
{TI(ngraph::op::Softmax), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Softmax>}, {TI(ngraph::op::Softmax), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Softmax>},
}; };
runtime::gpu::GPU_ExternalFunction::GPU_ExternalFunction( ngraph::runtime::gpu::GPU_ExternalFunction::GPU_ExternalFunction(
const shared_ptr<ngraph::Function>& function, bool release_function) const shared_ptr<ngraph::Function>& function, bool release_function)
: ngraph::runtime::ExternalFunction(function, release_function) : ngraph::runtime::ExternalFunction(function, release_function)
, m_compiled_function(nullptr) , m_compiled_function(nullptr)
, m_emit_timing(std::getenv("NGRAPH_GPU_EMIT_TIMING") != nullptr) , m_emit_timing(std::getenv("NGRAPH_GPU_EMIT_TIMING") != nullptr)
, m_function_name(function->get_name())
{ {
} }
void runtime::gpu::GPU_ExternalFunction::compile() void ngraph::runtime::gpu::GPU_ExternalFunction::compile()
{ {
if (m_is_compiled) if (m_is_compiled)
{ {
...@@ -540,7 +541,7 @@ using namespace std; ...@@ -540,7 +541,7 @@ using namespace std;
size_t temp_pool_size = current_function->get_temporary_pool_size(); size_t temp_pool_size = current_function->get_temporary_pool_size();
writer << "// Allocate the memory pool\n"; writer << "// Allocate the memory pool\n";
// TODO memory pool malloc. // TODO memory pool malloc.
writer << "void* pool_base_ptr = runtime::gpu::create_gpu_buffer(" << temp_pool_size writer << "void* pool_base_ptr = ngraph::runtime::gpu::create_gpu_buffer(" << temp_pool_size
<< ");\n"; << ");\n";
// Add temporaries to the variable name map // Add temporaries to the variable name map
...@@ -605,7 +606,7 @@ using namespace std; ...@@ -605,7 +606,7 @@ using namespace std;
if (tv == ptv) if (tv == ptv)
{ {
parameter_as_output = true; parameter_as_output = true;
writer << "runtime::gpu::cuda_memcpyDtD(reinterpret_cast<" writer << "ngraph::runtime::gpu::cuda_memcpyDtD(reinterpret_cast<"
<< et.c_type_string() << "*>(outputs[" << output_index << "]), " << et.c_type_string() << "*>(outputs[" << output_index << "]), "
<< m_variable_name_map[ptv->get_tensor().get_name()] << ", " << m_variable_name_map[ptv->get_tensor().get_name()] << ", "
<< ptv->get_tensor().size() << ");\n"; << ptv->get_tensor().size() << ");\n";
...@@ -617,7 +618,7 @@ using namespace std; ...@@ -617,7 +618,7 @@ using namespace std;
{ {
if (contains(constants, tv.get())) if (contains(constants, tv.get()))
{ {
writer << "runtime::gpu::cuda_memcpyHtD(outputs[" << output_index << "], " writer << "ngraph::runtime::gpu::cuda_memcpyHtD(outputs[" << output_index << "], "
<< tv->get_tensor().get_name() << ", " << tv->get_tensor().size() << tv->get_tensor().get_name() << ", " << tv->get_tensor().size()
<< ");\n"; << ");\n";
} }
...@@ -738,7 +739,7 @@ using namespace std; ...@@ -738,7 +739,7 @@ using namespace std;
} }
} }
void runtime::gpu::GPU_ExternalFunction::handle_output_alias( void ngraph::runtime::gpu::GPU_ExternalFunction::handle_output_alias(
codegen::CodeWriter& writer, codegen::CodeWriter& writer,
const Node& node, const Node& node,
const unordered_map<descriptor::TensorView*, vector<size_t>>& output_alias_map) const unordered_map<descriptor::TensorView*, vector<size_t>>& output_alias_map)
...@@ -756,7 +757,7 @@ void runtime::gpu::GPU_ExternalFunction::handle_output_alias( ...@@ -756,7 +757,7 @@ void runtime::gpu::GPU_ExternalFunction::handle_output_alias(
writer.indent++; writer.indent++;
for (size_t i = 1; i < outputs.size(); i++) for (size_t i = 1; i < outputs.size(); i++)
{ {
writer << "runtime::gpu::cuda_memcpyDtD(static_cast<void*>(outputs[" writer << "ngraph::runtime::gpu::cuda_memcpyDtD(static_cast<void*>(outputs["
<< outputs[i] << "]), static_cast<void*>(outputs[" << outputs[0] << outputs[i] << "]), static_cast<void*>(outputs[" << outputs[0]
<< "]), " << otv->get_tensor().size() << ");\n"; << "]), " << otv->get_tensor().size() << ");\n";
} }
...@@ -767,7 +768,7 @@ void runtime::gpu::GPU_ExternalFunction::handle_output_alias( ...@@ -767,7 +768,7 @@ void runtime::gpu::GPU_ExternalFunction::handle_output_alias(
} }
} }
shared_ptr<ngraph::runtime::CallFrame> runtime::gpu::GPU_ExternalFunction::make_call_frame() shared_ptr<ngraph::runtime::CallFrame> ngraph::runtime::gpu::GPU_ExternalFunction::make_call_frame()
{ {
if (!m_is_compiled) if (!m_is_compiled)
{ {
...@@ -778,7 +779,7 @@ shared_ptr<ngraph::runtime::CallFrame> runtime::gpu::GPU_ExternalFunction::make_ ...@@ -778,7 +779,7 @@ shared_ptr<ngraph::runtime::CallFrame> runtime::gpu::GPU_ExternalFunction::make_
m_compiled_function); m_compiled_function);
} }
void runtime::gpu::GPU_ExternalFunction::emit_debug_function_entry( void ngraph::runtime::gpu::GPU_ExternalFunction::emit_debug_function_entry(
codegen::CodeWriter& writer, codegen::CodeWriter& writer,
Node* node, Node* node,
const std::vector<GPU_TensorViewWrapper>& in, const std::vector<GPU_TensorViewWrapper>& in,
...@@ -787,7 +788,7 @@ void runtime::gpu::GPU_ExternalFunction::emit_debug_function_entry( ...@@ -787,7 +788,7 @@ void runtime::gpu::GPU_ExternalFunction::emit_debug_function_entry(
writer << "timer_" << node->get_name() << ".start();\n"; writer << "timer_" << node->get_name() << ".start();\n";
} }
void runtime::gpu::GPU_ExternalFunction::emit_debug_function_exit( void ngraph::runtime::gpu::GPU_ExternalFunction::emit_debug_function_exit(
codegen::CodeWriter& writer, codegen::CodeWriter& writer,
Node* node, Node* node,
const std::vector<GPU_TensorViewWrapper>& in, const std::vector<GPU_TensorViewWrapper>& in,
......
...@@ -82,6 +82,7 @@ namespace ngraph ...@@ -82,6 +82,7 @@ namespace ngraph
std::unique_ptr<codegen::ExecutionEngine> m_execution_engine; std::unique_ptr<codegen::ExecutionEngine> m_execution_engine;
bool m_emit_timing; bool m_emit_timing;
std::unordered_map<std::string, std::string> m_variable_name_map; std::unordered_map<std::string, std::string> m_variable_name_map;
std::string m_function_name;
}; };
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment