Commit b5414ba5 authored by fenglei.tian's avatar fenglei.tian

clang format

:
parent 6204a154
...@@ -165,7 +165,7 @@ namespace ngraph ...@@ -165,7 +165,7 @@ namespace ngraph
{ {
namespace gpu namespace gpu
{ {
static const OpMap dispatcher{ static const OpMap dispatcher{
{TI(ngraph::op::Add), &GPU_Emitter::emit<ngraph::op::Add>}, {TI(ngraph::op::Add), &GPU_Emitter::emit<ngraph::op::Add>},
{TI(ngraph::op::Dot), &GPU_Emitter::emit<ngraph::op::Dot>}, {TI(ngraph::op::Dot), &GPU_Emitter::emit<ngraph::op::Dot>},
{TI(ngraph::op::Multiply), &GPU_Emitter::emit<ngraph::op::Multiply>}, {TI(ngraph::op::Multiply), &GPU_Emitter::emit<ngraph::op::Multiply>},
...@@ -192,8 +192,7 @@ static const OpMap dispatcher{ ...@@ -192,8 +192,7 @@ static const OpMap dispatcher{
{TI(ngraph::op::Convert), &GPU_Emitter::emit<ngraph::op::Convert>}, {TI(ngraph::op::Convert), &GPU_Emitter::emit<ngraph::op::Convert>},
{TI(ngraph::op::Constant), &GPU_Emitter::emit<ngraph::op::Constant>}, {TI(ngraph::op::Constant), &GPU_Emitter::emit<ngraph::op::Constant>},
{TI(ngraph::op::Reshape), &GPU_Emitter::emit<ngraph::op::Reshape>}, {TI(ngraph::op::Reshape), &GPU_Emitter::emit<ngraph::op::Reshape>},
{TI(ngraph::op::FunctionCall), {TI(ngraph::op::FunctionCall), &GPU_Emitter::emit<ngraph::op::FunctionCall>},
&GPU_Emitter::emit<ngraph::op::FunctionCall>},
{TI(ngraph::op::Reduce), &GPU_Emitter::emit<ngraph::op::Reduce>}, {TI(ngraph::op::Reduce), &GPU_Emitter::emit<ngraph::op::Reduce>},
{TI(ngraph::op::Sign), &GPU_Emitter::EmitUnaryElementwise}, {TI(ngraph::op::Sign), &GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Slice), &GPU_Emitter::emit<ngraph::op::Slice>}, {TI(ngraph::op::Slice), &GPU_Emitter::emit<ngraph::op::Slice>},
...@@ -208,14 +207,12 @@ static const OpMap dispatcher{ ...@@ -208,14 +207,12 @@ static const OpMap dispatcher{
{TI(ngraph::op::Asin), &GPU_Emitter::EmitUnaryElementwise}, {TI(ngraph::op::Asin), &GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Acos), &GPU_Emitter::EmitUnaryElementwise}, {TI(ngraph::op::Acos), &GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Atan), &GPU_Emitter::EmitUnaryElementwise}, {TI(ngraph::op::Atan), &GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::ReplaceSlice), {TI(ngraph::op::ReplaceSlice), &GPU_Emitter::emit<ngraph::op::ReplaceSlice>},
&GPU_Emitter::emit<ngraph::op::ReplaceSlice>},
{TI(ngraph::op::OneHot), &GPU_Emitter::emit<ngraph::op::OneHot>}, {TI(ngraph::op::OneHot), &GPU_Emitter::emit<ngraph::op::OneHot>},
{TI(ngraph::op::Floor), &GPU_Emitter::EmitUnaryElementwise}, {TI(ngraph::op::Floor), &GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Ceiling), &GPU_Emitter::EmitUnaryElementwise}, {TI(ngraph::op::Ceiling), &GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Sqrt), &GPU_Emitter::emit<ngraph::op::Sqrt>}, {TI(ngraph::op::Sqrt), &GPU_Emitter::emit<ngraph::op::Sqrt>},
{TI(ngraph::op::Convolution), {TI(ngraph::op::Convolution), &GPU_Emitter::emit<ngraph::op::Convolution>},
&GPU_Emitter::emit<ngraph::op::Convolution>},
{TI(ngraph::op::ConvolutionBackpropFilters), {TI(ngraph::op::ConvolutionBackpropFilters),
&GPU_Emitter::emit<ngraph::op::ConvolutionBackpropFilters>}, &GPU_Emitter::emit<ngraph::op::ConvolutionBackpropFilters>},
{TI(ngraph::op::ConvolutionBackpropData), {TI(ngraph::op::ConvolutionBackpropData),
...@@ -224,50 +221,48 @@ static const OpMap dispatcher{ ...@@ -224,50 +221,48 @@ static const OpMap dispatcher{
{TI(ngraph::op::MaxPool), &GPU_Emitter::emit<ngraph::op::MaxPool>}, {TI(ngraph::op::MaxPool), &GPU_Emitter::emit<ngraph::op::MaxPool>},
{TI(ngraph::op::Reverse), &GPU_Emitter::emit<ngraph::op::Reverse>}, {TI(ngraph::op::Reverse), &GPU_Emitter::emit<ngraph::op::Reverse>},
{TI(ngraph::op::Result), &GPU_Emitter::emit<ngraph::op::Result>}, {TI(ngraph::op::Result), &GPU_Emitter::emit<ngraph::op::Result>},
{TI(ngraph::op::ReduceWindow), {TI(ngraph::op::ReduceWindow), &GPU_Emitter::emit<ngraph::op::ReduceWindow>},
&GPU_Emitter::emit<ngraph::op::ReduceWindow>},
{TI(ngraph::op::SelectAndScatter), {TI(ngraph::op::SelectAndScatter),
&GPU_Emitter::emit<ngraph::op::SelectAndScatter>}, &GPU_Emitter::emit<ngraph::op::SelectAndScatter>},
{TI(ngraph::op::AvgPool), &GPU_Emitter::emit<ngraph::op::AvgPool>}, {TI(ngraph::op::AvgPool), &GPU_Emitter::emit<ngraph::op::AvgPool>},
{TI(ngraph::op::AvgPoolBackprop), {TI(ngraph::op::AvgPoolBackprop), &GPU_Emitter::emit<ngraph::op::AvgPoolBackprop>},
&GPU_Emitter::emit<ngraph::op::AvgPoolBackprop>},
{TI(ngraph::op::Pad), &GPU_Emitter::emit<ngraph::op::Pad>}, {TI(ngraph::op::Pad), &GPU_Emitter::emit<ngraph::op::Pad>},
{TI(ngraph::op::BatchNorm), &GPU_Emitter::emit<ngraph::op::BatchNorm>}, {TI(ngraph::op::BatchNorm), &GPU_Emitter::emit<ngraph::op::BatchNorm>},
{TI(ngraph::op::BatchNormBackprop), {TI(ngraph::op::BatchNormBackprop),
&GPU_Emitter::emit<ngraph::op::BatchNormBackprop>}, &GPU_Emitter::emit<ngraph::op::BatchNormBackprop>},
{TI(ngraph::op::MaxPoolBackprop), {TI(ngraph::op::MaxPoolBackprop), &GPU_Emitter::emit<ngraph::op::MaxPoolBackprop>},
&GPU_Emitter::emit<ngraph::op::MaxPoolBackprop>},
{TI(ngraph::op::Product), &GPU_Emitter::emit<ngraph::op::Product>}, {TI(ngraph::op::Product), &GPU_Emitter::emit<ngraph::op::Product>},
{TI(ngraph::op::Max), &GPU_Emitter::emit<ngraph::op::Max>}, {TI(ngraph::op::Max), &GPU_Emitter::emit<ngraph::op::Max>},
{TI(ngraph::op::Min), &GPU_Emitter::emit<ngraph::op::Min>}, {TI(ngraph::op::Min), &GPU_Emitter::emit<ngraph::op::Min>},
{TI(ngraph::op::Relu), &GPU_Emitter::emit<ngraph::op::Relu>}, {TI(ngraph::op::Relu), &GPU_Emitter::emit<ngraph::op::Relu>},
{TI(ngraph::op::ReluBackprop), {TI(ngraph::op::ReluBackprop), &GPU_Emitter::emit<ngraph::op::ReluBackprop>},
&GPU_Emitter::emit<ngraph::op::ReluBackprop>},
{TI(ngraph::op::Softmax), &GPU_Emitter::emit<ngraph::op::Softmax>}, {TI(ngraph::op::Softmax), &GPU_Emitter::emit<ngraph::op::Softmax>},
}; };
GPU_ExternalFunction::GPU_ExternalFunction( GPU_ExternalFunction::GPU_ExternalFunction(const shared_ptr<ngraph::Function>& function,
const shared_ptr<ngraph::Function>& function, bool release_function) bool release_function)
: ngraph::runtime::ExternalFunction(function, release_function) : ngraph::runtime::ExternalFunction(function, release_function)
, m_compiled_function(nullptr) , m_compiled_function(nullptr)
, m_emit_timing(std::getenv("NGRAPH_GPU_EMIT_TIMING") != nullptr) , m_emit_timing(std::getenv("NGRAPH_GPU_EMIT_TIMING") != nullptr)
{ {
} }
void GPU_ExternalFunction::compile() void GPU_ExternalFunction::compile()
{ {
if (m_is_compiled) if (m_is_compiled)
{ {
return; return;
} }
string function_name = m_function->get_name(); string function_name = m_function->get_name();
string dump_filename = file_util::path_join(s_output_dir, function_name + "_ops.txt"); string dump_filename =
file_util::path_join(s_output_dir, function_name + "_ops.txt");
pass::Manager pass_manager; pass::Manager pass_manager;
// pass_manager.register_pass<pass::TopologicalSort>(); // pass_manager.register_pass<pass::TopologicalSort>();
// For now, just make everyone row-major. // For now, just make everyone row-major.
pass_manager.register_pass<pass::AssignLayout<descriptor::layout::DenseTensorViewLayout>>(); pass_manager
.register_pass<pass::AssignLayout<descriptor::layout::DenseTensorViewLayout>>();
pass_manager.register_pass<pass::Liveness>(); pass_manager.register_pass<pass::Liveness>();
pass_manager.register_pass<pass::MemoryLayout>(64); pass_manager.register_pass<pass::MemoryLayout>(64);
pass_manager.register_pass<pass::DumpSorted>(dump_filename); pass_manager.register_pass<pass::DumpSorted>(dump_filename);
...@@ -313,7 +308,8 @@ using namespace std; ...@@ -313,7 +308,8 @@ using namespace std;
{ {
writer << "// Declare debug timers\n"; writer << "// Declare debug timers\n";
vector<string> names; vector<string> names;
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions()) for (shared_ptr<Function> current_function :
pass_manager.get_state().get_functions())
{ {
for (shared_ptr<Node> node : current_function->get_ordered_ops()) for (shared_ptr<Node> node : current_function->get_ordered_ops())
{ {
...@@ -327,8 +323,8 @@ using namespace std; ...@@ -327,8 +323,8 @@ using namespace std;
{ {
writer << "ngraph::stopwatch timer_" << s << ";\n"; writer << "ngraph::stopwatch timer_" << s << ";\n";
} }
writer << "extern \"C\" size_t get_debug_timer_count() { return " << names.size() writer << "extern \"C\" size_t get_debug_timer_count() { return "
<< "; }\n"; << names.size() << "; }\n";
writer << "extern \"C\" const char* get_debug_timer_name(size_t index)\n"; writer << "extern \"C\" const char* get_debug_timer_name(size_t index)\n";
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
...@@ -344,7 +340,8 @@ using namespace std; ...@@ -344,7 +340,8 @@ using namespace std;
writer << "return rc;\n"; writer << "return rc;\n";
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
writer << "extern \"C\" const size_t get_debug_timer_microseconds(size_t index)\n"; writer
<< "extern \"C\" const size_t get_debug_timer_microseconds(size_t index)\n";
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
writer << "size_t rc;\n"; writer << "size_t rc;\n";
...@@ -360,7 +357,8 @@ using namespace std; ...@@ -360,7 +357,8 @@ using namespace std;
writer << "return rc;\n"; writer << "return rc;\n";
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
writer << "extern \"C\" const size_t get_debug_timer_call_count(size_t index)\n"; writer
<< "extern \"C\" const size_t get_debug_timer_call_count(size_t index)\n";
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
writer << "size_t rc;\n"; writer << "size_t rc;\n";
...@@ -368,7 +366,8 @@ using namespace std; ...@@ -368,7 +366,8 @@ using namespace std;
writer << "{\n"; writer << "{\n";
for (size_t i = 0; i < names.size(); i++) for (size_t i = 0; i < names.size(); i++)
{ {
writer << "case " << i << ": rc = timer_" << names[i] << ".get_call_count(); break;\n"; writer << "case " << i << ": rc = timer_" << names[i]
<< ".get_call_count(); break;\n";
} }
writer << "default: rc = 0;\n"; writer << "default: rc = 0;\n";
writer << "}\n"; writer << "}\n";
...@@ -384,26 +383,31 @@ using namespace std; ...@@ -384,26 +383,31 @@ using namespace std;
writer << "void *__dso_handle = 0;\n\n"; writer << "void *__dso_handle = 0;\n\n";
writer << "// Declare all constants\n"; writer << "// Declare all constants\n";
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions()) for (shared_ptr<Function> current_function :
pass_manager.get_state().get_functions())
{ {
for (shared_ptr<Node> node : current_function->get_ordered_ops()) for (shared_ptr<Node> node : current_function->get_ordered_ops())
{ {
const op::Constant* c = dynamic_cast<ngraph::op::Constant*>(node.get()); const op::Constant* c = dynamic_cast<ngraph::op::Constant*>(node.get());
if (c) if (c)
{ {
shared_ptr<descriptor::TensorView> tv = node->get_outputs()[0].get_tensor_view(); shared_ptr<descriptor::TensorView> tv =
node->get_outputs()[0].get_tensor_view();
auto c_value_strings = c->get_value_strings(); auto c_value_strings = c->get_value_strings();
writer << "static " << tv->get_tensor().get_element_type().c_type_string() << " " writer << "static "
<< tv->get_tensor().get_name() << "_cpu[" << c_value_strings.size() << tv->get_tensor().get_element_type().c_type_string() << " "
<< "] =\n"; << tv->get_tensor().get_name() << "_cpu["
<< c_value_strings.size() << "] =\n";
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
writer << emit_string_array(c_value_strings, 100 - writer.indent * 4); writer << emit_string_array(c_value_strings, 100 - writer.indent * 4);
writer.indent--; writer.indent--;
writer << "\n};\n\n"; writer << "\n};\n\n";
writer << "static " << tv->get_tensor().get_element_type().c_type_string() << " *" writer << "static "
<< tv->get_tensor().get_element_type().c_type_string() << " *"
<< tv->get_tensor().get_name() << ";\n"; << tv->get_tensor().get_name() << ";\n";
m_variable_name_map[tv->get_tensor().get_name()] = tv->get_tensor().get_name(); m_variable_name_map[tv->get_tensor().get_name()] =
tv->get_tensor().get_name();
} }
} }
} }
...@@ -411,7 +415,8 @@ using namespace std; ...@@ -411,7 +415,8 @@ using namespace std;
writer << "// Declare all functions\n"; writer << "// Declare all functions\n";
for (shared_ptr<Function> f : pass_manager.get_state().get_functions()) for (shared_ptr<Function> f : pass_manager.get_state().get_functions())
{ {
writer << "extern \"C\" void " << f->get_name() << "(void** inputs, void** outputs, " writer << "extern \"C\" void " << f->get_name()
<< "(void** inputs, void** outputs, "
"cublasHandle_t& cublas_handle, " "cublasHandle_t& cublas_handle, "
"cudnnHandle_t& cudnn_handle);\n"; "cudnnHandle_t& cudnn_handle);\n";
} }
...@@ -419,7 +424,8 @@ using namespace std; ...@@ -419,7 +424,8 @@ using namespace std;
writer << "\n"; writer << "\n";
unordered_map<Node*, string> match_functions; unordered_map<Node*, string> match_functions;
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions()) for (shared_ptr<Function> current_function :
pass_manager.get_state().get_functions())
{ {
bool temporaries_used = false; bool temporaries_used = false;
size_t worst_case_tmp_size = 0; size_t worst_case_tmp_size = 0;
...@@ -512,7 +518,8 @@ using namespace std; ...@@ -512,7 +518,8 @@ using namespace std;
} }
} }
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions()) for (shared_ptr<Function> current_function :
pass_manager.get_state().get_functions())
{ {
set<string> output_names; set<string> output_names;
for (shared_ptr<Node> op : current_function->get_results()) for (shared_ptr<Node> op : current_function->get_results())
...@@ -525,18 +532,21 @@ using namespace std; ...@@ -525,18 +532,21 @@ using namespace std;
{ {
if (dynamic_cast<ngraph::op::Constant*>(node.get())) if (dynamic_cast<ngraph::op::Constant*>(node.get()))
{ {
shared_ptr<descriptor::TensorView> tv = node->get_outputs()[0].get_tensor_view(); shared_ptr<descriptor::TensorView> tv =
node->get_outputs()[0].get_tensor_view();
constants.insert(tv.get()); constants.insert(tv.get());
} }
} }
writer << "extern \"C\" void " << current_function->get_name(); writer << "extern \"C\" void " << current_function->get_name();
writer << "(void** inputs, void** outputs, cublasHandle_t& cublas_handle, cudnnHandle_t& " writer << "(void** inputs, void** outputs, cublasHandle_t& cublas_handle, "
"cudnnHandle_t& "
"cudnn_handle)\n"; "cudnn_handle)\n";
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions()) for (shared_ptr<Function> current_function :
pass_manager.get_state().get_functions())
{ {
for (shared_ptr<Node> node : current_function->get_ordered_ops()) for (shared_ptr<Node> node : current_function->get_ordered_ops())
{ {
...@@ -548,9 +558,10 @@ using namespace std; ...@@ -548,9 +558,10 @@ using namespace std;
writer << "if(" << tv->get_tensor().get_name() << " == NULL)\n"; writer << "if(" << tv->get_tensor().get_name() << " == NULL)\n";
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
writer << "runtime::gpu::cuda_memcpyHtD(" << tv->get_tensor().get_name() << ", " writer << "runtime::gpu::cuda_memcpyHtD("
<< tv->get_tensor().get_name() << "_cpu, " << tv->get_tensor().size() << tv->get_tensor().get_name() << ", "
<< ");\n"; << tv->get_tensor().get_name() << "_cpu, "
<< tv->get_tensor().size() << ");\n";
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
} }
...@@ -584,7 +595,8 @@ using namespace std; ...@@ -584,7 +595,8 @@ using namespace std;
{ {
stringstream ss; stringstream ss;
ss << "((" << tensor->get_element_type().c_type_string() ss << "((" << tensor->get_element_type().c_type_string()
<< "*)((char *)pool_base_ptr + " << tensor->get_pool_offset() << "))"; << "*)((char *)pool_base_ptr + " << tensor->get_pool_offset()
<< "))";
m_variable_name_map[tensor->get_name()] = ss.str(); m_variable_name_map[tensor->get_name()] = ss.str();
} }
} }
...@@ -592,12 +604,15 @@ using namespace std; ...@@ -592,12 +604,15 @@ using namespace std;
// Add inputs to the variable name map // Add inputs to the variable name map
size_t arg_index = 0; size_t arg_index = 0;
for (shared_ptr<ngraph::op::Parameter> param : current_function->get_parameters()) for (shared_ptr<ngraph::op::Parameter> param :
current_function->get_parameters())
{ {
for (size_t i = 0; i < param->get_output_size(); ++i) for (size_t i = 0; i < param->get_output_size(); ++i)
{ {
shared_ptr<descriptor::TensorView> tv = param->get_output_tensor_view(i); shared_ptr<descriptor::TensorView> tv =
const element::Type& et = tv->get_tensor_view_type()->get_element_type(); param->get_output_tensor_view(i);
const element::Type& et =
tv->get_tensor_view_type()->get_element_type();
string type = et.c_type_string(); string type = et.c_type_string();
stringstream ss; stringstream ss;
ss << "((" << type << "*)(inputs[" << arg_index << "]))"; ss << "((" << type << "*)(inputs[" << arg_index << "]))";
...@@ -631,7 +646,8 @@ using namespace std; ...@@ -631,7 +646,8 @@ using namespace std;
shared_ptr<descriptor::TensorView> tv = op->get_output_tensor_view(); shared_ptr<descriptor::TensorView> tv = op->get_output_tensor_view();
const element::Type& et = tv->get_tensor_view_type()->get_element_type(); const element::Type& et = tv->get_tensor_view_type()->get_element_type();
bool parameter_as_output = false; bool parameter_as_output = false;
for (shared_ptr<ngraph::op::Parameter> param : current_function->get_parameters()) for (shared_ptr<ngraph::op::Parameter> param :
current_function->get_parameters())
{ {
for (const descriptor::Output& pout : param->get_outputs()) for (const descriptor::Output& pout : param->get_outputs())
{ {
...@@ -639,8 +655,10 @@ using namespace std; ...@@ -639,8 +655,10 @@ using namespace std;
if (tv == ptv) if (tv == ptv)
{ {
parameter_as_output = true; parameter_as_output = true;
writer << "ngraph::runtime::gpu::cuda_memcpyDtD(reinterpret_cast<" writer
<< et.c_type_string() << "*>(outputs[" << output_index << "]), " << "ngraph::runtime::gpu::cuda_memcpyDtD(reinterpret_cast<"
<< et.c_type_string() << "*>(outputs[" << output_index
<< "]), "
<< m_variable_name_map[ptv->get_tensor().get_name()] << ", " << m_variable_name_map[ptv->get_tensor().get_name()] << ", "
<< ptv->get_tensor().size() << ");\n"; << ptv->get_tensor().size() << ");\n";
break; break;
...@@ -651,9 +669,9 @@ using namespace std; ...@@ -651,9 +669,9 @@ using namespace std;
{ {
if (contains(constants, tv.get())) if (contains(constants, tv.get()))
{ {
writer << "ngraph::runtime::gpu::cuda_memcpyHtD(outputs[" << output_index writer << "ngraph::runtime::gpu::cuda_memcpyHtD(outputs["
<< "], " << tv->get_tensor().get_name() << ", " << output_index << "], " << tv->get_tensor().get_name()
<< tv->get_tensor().size() << ");\n"; << ", " << tv->get_tensor().size() << ");\n";
} }
else else
{ {
...@@ -668,27 +686,29 @@ using namespace std; ...@@ -668,27 +686,29 @@ using namespace std;
for (shared_ptr<Node> node : current_function->get_ordered_ops()) for (shared_ptr<Node> node : current_function->get_ordered_ops())
{ {
auto& n = *node; // Work around a compiler warning (*node inside typeid may have effects auto& n =
*node; // Work around a compiler warning (*node inside typeid may have effects
// with shared pointers, which is fine here but clang doesn't like it.) // with shared pointers, which is fine here but clang doesn't like it.)
auto handler = dispatcher.find(type_index(typeid(n))); auto handler = dispatcher.find(type_index(typeid(n)));
if (handler == dispatcher.end()) if (handler == dispatcher.end())
{ {
throw ngraph_error("Unhandled op during code generation : " + node->description()); throw ngraph_error("Unhandled op during code generation : " +
node->description());
} }
vector<GPU_TensorViewWrapper> in; vector<GPU_TensorViewWrapper> in;
for (const descriptor::Input& input : node->get_inputs()) for (const descriptor::Input& input : node->get_inputs())
{ {
const descriptor::Output& output = input.get_output(); const descriptor::Output& output = input.get_output();
shared_ptr<descriptor::TensorView> tv = output.get_tensor_view(); shared_ptr<descriptor::TensorView> tv = output.get_tensor_view();
in.push_back( in.push_back(GPU_TensorViewWrapper(
GPU_TensorViewWrapper(tv, m_variable_name_map[tv->get_tensor().get_name()])); tv, m_variable_name_map[tv->get_tensor().get_name()]));
} }
vector<GPU_TensorViewWrapper> out; vector<GPU_TensorViewWrapper> out;
for (const descriptor::Output& output : node->get_outputs()) for (const descriptor::Output& output : node->get_outputs())
{ {
shared_ptr<descriptor::TensorView> tv = output.get_tensor_view(); shared_ptr<descriptor::TensorView> tv = output.get_tensor_view();
out.push_back( out.push_back(GPU_TensorViewWrapper(
GPU_TensorViewWrapper(tv, m_variable_name_map[tv->get_tensor().get_name()])); tv, m_variable_name_map[tv->get_tensor().get_name()]));
} }
// Emit operation prologue // Emit operation prologue
...@@ -742,7 +762,8 @@ using namespace std; ...@@ -742,7 +762,8 @@ using namespace std;
// TODO: Cleanup and make this a utility function // TODO: Cleanup and make this a utility function
file_util::make_directory(s_output_dir); file_util::make_directory(s_output_dir);
string filename = file_util::path_join(s_output_dir, function_name + "_codegen.cpp"); string filename =
file_util::path_join(s_output_dir, function_name + "_codegen.cpp");
ofstream out(filename); ofstream out(filename);
string code = writer.get_code(); string code = writer.get_code();
out << code; out << code;
...@@ -761,7 +782,8 @@ using namespace std; ...@@ -761,7 +782,8 @@ using namespace std;
} }
m_execution_engine->add_module(codegen_module); m_execution_engine->add_module(codegen_module);
m_execution_engine->finalize(); m_execution_engine->finalize();
m_compiled_function = m_execution_engine->find_function<EntryPoint_t>(function_name); m_compiled_function =
m_execution_engine->find_function<EntryPoint_t>(function_name);
assert(m_compiled_function); assert(m_compiled_function);
m_is_compiled = true; m_is_compiled = true;
...@@ -769,13 +791,13 @@ using namespace std; ...@@ -769,13 +791,13 @@ using namespace std;
{ {
release_function(); release_function();
} }
} }
void GPU_ExternalFunction::handle_output_alias( void GPU_ExternalFunction::handle_output_alias(
codegen::CodeWriter& writer, codegen::CodeWriter& writer,
const Node& node, const Node& node,
const unordered_map<descriptor::TensorView*, vector<size_t>>& output_alias_map) const unordered_map<descriptor::TensorView*, vector<size_t>>& output_alias_map)
{ {
for (const descriptor::Output& output : node.get_outputs()) for (const descriptor::Output& output : node.get_outputs())
{ {
shared_ptr<descriptor::TensorView> otv = output.get_tensor_view(); shared_ptr<descriptor::TensorView> otv = output.get_tensor_view();
...@@ -789,45 +811,46 @@ void GPU_ExternalFunction::handle_output_alias( ...@@ -789,45 +811,46 @@ void GPU_ExternalFunction::handle_output_alias(
writer.indent++; writer.indent++;
for (size_t i = 1; i < outputs.size(); i++) for (size_t i = 1; i < outputs.size(); i++)
{ {
writer << "ngraph::runtime::gpu::cuda_memcpyDtD(static_cast<void*>(outputs[" writer << "ngraph::runtime::gpu::cuda_memcpyDtD(static_cast<void*>("
<< outputs[i] << "]), static_cast<void*>(outputs[" << outputs[0] "outputs["
<< "]), " << otv->get_tensor().size() << ");\n"; << outputs[i] << "]), static_cast<void*>(outputs["
<< outputs[0] << "]), " << otv->get_tensor().size()
<< ");\n";
} }
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
} }
} }
} }
} }
shared_ptr<ngraph::runtime::CallFrame> GPU_ExternalFunction::make_call_frame() shared_ptr<ngraph::runtime::CallFrame> GPU_ExternalFunction::make_call_frame()
{ {
if (!m_is_compiled) if (!m_is_compiled)
{ {
compile(); compile();
} }
return make_shared<GPU_CallFrame>(shared_from_this(), return make_shared<GPU_CallFrame>(shared_from_this(), m_compiled_function);
m_compiled_function); }
}
void GPU_ExternalFunction::emit_debug_function_entry( void GPU_ExternalFunction::emit_debug_function_entry(
codegen::CodeWriter& writer, codegen::CodeWriter& writer,
Node* node, Node* node,
const std::vector<GPU_TensorViewWrapper>& in, const std::vector<GPU_TensorViewWrapper>& in,
const std::vector<GPU_TensorViewWrapper>& out) const std::vector<GPU_TensorViewWrapper>& out)
{ {
writer << "timer_" << node->get_name() << ".start();\n"; writer << "timer_" << node->get_name() << ".start();\n";
} }
void GPU_ExternalFunction::emit_debug_function_exit( void GPU_ExternalFunction::emit_debug_function_exit(
codegen::CodeWriter& writer, codegen::CodeWriter& writer,
Node* node, Node* node,
const std::vector<GPU_TensorViewWrapper>& in, const std::vector<GPU_TensorViewWrapper>& in,
const std::vector<GPU_TensorViewWrapper>& out) const std::vector<GPU_TensorViewWrapper>& out)
{ {
writer << "timer_" << node->get_name() << ".stop();\n"; writer << "timer_" << node->get_name() << ".stop();\n";
} }
} }
} }
} }
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment