Commit e286e9d4 authored by Robert Kimball's avatar Robert Kimball Committed by Scott Cyphers

wip (#1826)

parent 5a32dfe4
...@@ -241,9 +241,9 @@ void runtime::gpu::GPU_ExternalFunction::emit_timer_functions() ...@@ -241,9 +241,9 @@ void runtime::gpu::GPU_ExternalFunction::emit_timer_functions()
m_writer << "// Declare debug timers\n"; m_writer << "// Declare debug timers\n";
vector<string> names; vector<string> names;
size_t index = 0; size_t index = 0;
for (shared_ptr<Function> current_function : m_pass_manager.get_state().get_functions()) for (const auto& p : m_function_ordered_ops)
{ {
for (shared_ptr<Node> node : m_function_ordered_ops.at(current_function)) for (shared_ptr<Node> node : p.second)
{ {
if (!node->is_parameter() && !node->is_constant()) if (!node->is_parameter() && !node->is_constant())
{ {
...@@ -296,9 +296,9 @@ void runtime::gpu::GPU_ExternalFunction::emit_timer_functions() ...@@ -296,9 +296,9 @@ void runtime::gpu::GPU_ExternalFunction::emit_timer_functions()
void runtime::gpu::GPU_ExternalFunction::emit_constant_declarations() void runtime::gpu::GPU_ExternalFunction::emit_constant_declarations()
{ {
m_writer << "// Declare all constants\n"; m_writer << "// Declare all constants\n";
for (shared_ptr<Function> current_function : m_pass_manager.get_state().get_functions()) for (const auto& p : m_function_ordered_ops)
{ {
for (shared_ptr<Node> node : m_function_ordered_ops.at(current_function)) for (shared_ptr<Node> node : p.second)
{ {
const op::Constant* c = dynamic_cast<ngraph::op::Constant*>(node.get()); const op::Constant* c = dynamic_cast<ngraph::op::Constant*>(node.get());
if (c) if (c)
...@@ -324,9 +324,9 @@ void runtime::gpu::GPU_ExternalFunction::emit_constant_declarations() ...@@ -324,9 +324,9 @@ void runtime::gpu::GPU_ExternalFunction::emit_constant_declarations()
m_writer << "if(is_constant_mem_ptr_null)\n"; m_writer << "if(is_constant_mem_ptr_null)\n";
m_writer.block_begin(); m_writer.block_begin();
{ {
for (shared_ptr<Function> current_function : m_pass_manager.get_state().get_functions()) for (const auto& p : m_function_ordered_ops)
{ {
for (shared_ptr<Node> node : m_function_ordered_ops.at(current_function)) for (shared_ptr<Node> node : p.second)
{ {
const op::Constant* c = dynamic_cast<ngraph::op::Constant*>(node.get()); const op::Constant* c = dynamic_cast<ngraph::op::Constant*>(node.get());
if (c) if (c)
...@@ -349,10 +349,10 @@ void runtime::gpu::GPU_ExternalFunction::emit_constant_declarations() ...@@ -349,10 +349,10 @@ void runtime::gpu::GPU_ExternalFunction::emit_constant_declarations()
void runtime::gpu::GPU_ExternalFunction::emit_function_declarations() void runtime::gpu::GPU_ExternalFunction::emit_function_declarations()
{ {
m_writer << "// Declare all functions\n"; m_writer << "// Declare all functions\n";
for (shared_ptr<Function> f : m_pass_manager.get_state().get_functions()) for (const auto& p : m_function_ordered_ops)
{ {
m_writer << "extern \"C\" void " << f->get_name() << "(void** inputs, void** outputs, " m_writer << "extern \"C\" void " << p.first->get_name() << "(void** inputs, "
<< "gpu::GPURuntimeContext* ctx);\n"; << "void** outputs, gpu::GPURuntimeContext* ctx);\n";
} }
m_writer << "\n"; m_writer << "\n";
} }
...@@ -397,8 +397,9 @@ void runtime::gpu::GPU_ExternalFunction::emit_temp_mem_pool_allocation( ...@@ -397,8 +397,9 @@ void runtime::gpu::GPU_ExternalFunction::emit_temp_mem_pool_allocation(
void runtime::gpu::GPU_ExternalFunction::emit_functions() void runtime::gpu::GPU_ExternalFunction::emit_functions()
{ {
for (shared_ptr<Function> current_function : m_pass_manager.get_state().get_functions()) for (const auto& p : m_function_ordered_ops)
{ {
auto current_function = p.first;
set<string> output_names; set<string> output_names;
for (shared_ptr<Node> op : current_function->get_results()) for (shared_ptr<Node> op : current_function->get_results())
{ {
...@@ -557,36 +558,36 @@ void runtime::gpu::GPU_ExternalFunction::compile() ...@@ -557,36 +558,36 @@ void runtime::gpu::GPU_ExternalFunction::compile()
auto allocator = std::make_shared<runtime::gpu::GPUAllocator>( auto allocator = std::make_shared<runtime::gpu::GPUAllocator>(
m_shared_context->m_primitive_emitter->get_memory_allocator()); m_shared_context->m_primitive_emitter->get_memory_allocator());
ngraph::pass::Manager pass_manager;
#if CUDNN_VERSION >= 7200 #if CUDNN_VERSION >= 7200
// recurrent network fusion // recurrent network fusion
m_pass_manager.register_pass<runtime::gpu::pass::LSTMFusion>(); pass_manager.register_pass<runtime::gpu::pass::LSTMFusion>();
m_pass_manager.register_pass<runtime::gpu::pass::RNNFusion>(); pass_manager.register_pass<runtime::gpu::pass::RNNFusion>();
m_pass_manager.register_pass<ngraph::pass::AlgebraicSimplification>(); pass_manager.register_pass<ngraph::pass::AlgebraicSimplification>();
m_pass_manager.register_pass<runtime::gpu::pass::MultiLayerRNNFusion>(); pass_manager.register_pass<runtime::gpu::pass::MultiLayerRNNFusion>();
#else #else
m_pass_manager.register_pass<ngraph::pass::AlgebraicSimplification>(); pass_manager.register_pass<ngraph::pass::AlgebraicSimplification>();
#endif #endif
m_pass_manager.register_pass<ngraph::pass::LikeReplacement>(); pass_manager.register_pass<ngraph::pass::LikeReplacement>();
m_pass_manager pass_manager.register_pass<ngraph::pass::AssignLayout<descriptor::layout::DenseTensorLayout>>();
.register_pass<ngraph::pass::AssignLayout<descriptor::layout::DenseTensorLayout>>(); pass_manager.register_pass<runtime::gpu::pass::GPULayout>(this);
m_pass_manager.register_pass<runtime::gpu::pass::GPULayout>(this); pass_manager.register_pass<ngraph::pass::Liveness>();
m_pass_manager.register_pass<ngraph::pass::Liveness>(); pass_manager.register_pass<ngraph::pass::MemoryLayout>(s_memory_pool_alignment);
m_pass_manager.register_pass<ngraph::pass::MemoryLayout>(s_memory_pool_alignment); pass_manager.register_pass<runtime::gpu::pass::TensorMemoryReservation>(
m_pass_manager.register_pass<runtime::gpu::pass::TensorMemoryReservation>(
*allocator, m_tensor_memory_buffers); *allocator, m_tensor_memory_buffers);
std::string common_function_string; std::string common_function_string;
auto femitter = bind(&ngraph::runtime::gpu::GPU_ExternalFunction::emit_op_as_function, auto femitter = bind(&ngraph::runtime::gpu::GPU_ExternalFunction::emit_op_as_function,
this, this,
placeholders::_1, placeholders::_1,
placeholders::_2); placeholders::_2);
m_pass_manager.register_pass<ngraph::pass::CommonFunctionCollection>( pass_manager.register_pass<ngraph::pass::CommonFunctionCollection>(
femitter, m_node_function_map, common_function_string); femitter, m_node_function_map, common_function_string);
string dump_filename = file_util::path_join(s_output_dir, m_function_name + "_ops.txt"); string dump_filename = file_util::path_join(s_output_dir, m_function_name + "_ops.txt");
m_pass_manager.register_pass<ngraph::pass::DumpSorted>(dump_filename); pass_manager.register_pass<ngraph::pass::DumpSorted>(dump_filename);
m_pass_manager.run_passes(m_function); pass_manager.run_passes(m_function);
for (shared_ptr<Function> current_function : m_pass_manager.get_state().get_functions()) for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
{ {
m_function_ordered_ops.emplace(current_function, current_function->get_ordered_ops()); m_function_ordered_ops.emplace(current_function, current_function->get_ordered_ops());
} }
......
...@@ -97,7 +97,6 @@ namespace ngraph ...@@ -97,7 +97,6 @@ namespace ngraph
static const std::string& get_header_source(); static const std::string& get_header_source();
codegen::CodeWriter m_writer; codegen::CodeWriter m_writer;
ngraph::pass::Manager m_pass_manager;
std::unique_ptr<codegen::Compiler> m_compiler; std::unique_ptr<codegen::Compiler> m_compiler;
std::unique_ptr<codegen::ExecutionEngine> m_execution_engine; std::unique_ptr<codegen::ExecutionEngine> m_execution_engine;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment