Commit e286e9d4 authored by Robert Kimball's avatar Robert Kimball Committed by Scott Cyphers

wip (#1826)

parent 5a32dfe4
......@@ -241,9 +241,9 @@ void runtime::gpu::GPU_ExternalFunction::emit_timer_functions()
m_writer << "// Declare debug timers\n";
vector<string> names;
size_t index = 0;
for (shared_ptr<Function> current_function : m_pass_manager.get_state().get_functions())
for (const auto& p : m_function_ordered_ops)
{
for (shared_ptr<Node> node : m_function_ordered_ops.at(current_function))
for (shared_ptr<Node> node : p.second)
{
if (!node->is_parameter() && !node->is_constant())
{
......@@ -296,9 +296,9 @@ void runtime::gpu::GPU_ExternalFunction::emit_timer_functions()
void runtime::gpu::GPU_ExternalFunction::emit_constant_declarations()
{
m_writer << "// Declare all constants\n";
for (shared_ptr<Function> current_function : m_pass_manager.get_state().get_functions())
for (const auto& p : m_function_ordered_ops)
{
for (shared_ptr<Node> node : m_function_ordered_ops.at(current_function))
for (shared_ptr<Node> node : p.second)
{
const op::Constant* c = dynamic_cast<ngraph::op::Constant*>(node.get());
if (c)
......@@ -324,9 +324,9 @@ void runtime::gpu::GPU_ExternalFunction::emit_constant_declarations()
m_writer << "if(is_constant_mem_ptr_null)\n";
m_writer.block_begin();
{
for (shared_ptr<Function> current_function : m_pass_manager.get_state().get_functions())
for (const auto& p : m_function_ordered_ops)
{
for (shared_ptr<Node> node : m_function_ordered_ops.at(current_function))
for (shared_ptr<Node> node : p.second)
{
const op::Constant* c = dynamic_cast<ngraph::op::Constant*>(node.get());
if (c)
......@@ -349,10 +349,10 @@ void runtime::gpu::GPU_ExternalFunction::emit_constant_declarations()
void runtime::gpu::GPU_ExternalFunction::emit_function_declarations()
{
m_writer << "// Declare all functions\n";
for (shared_ptr<Function> f : m_pass_manager.get_state().get_functions())
for (const auto& p : m_function_ordered_ops)
{
m_writer << "extern \"C\" void " << f->get_name() << "(void** inputs, void** outputs, "
<< "gpu::GPURuntimeContext* ctx);\n";
m_writer << "extern \"C\" void " << p.first->get_name() << "(void** inputs, "
<< "void** outputs, gpu::GPURuntimeContext* ctx);\n";
}
m_writer << "\n";
}
......@@ -397,8 +397,9 @@ void runtime::gpu::GPU_ExternalFunction::emit_temp_mem_pool_allocation(
void runtime::gpu::GPU_ExternalFunction::emit_functions()
{
for (shared_ptr<Function> current_function : m_pass_manager.get_state().get_functions())
for (const auto& p : m_function_ordered_ops)
{
auto current_function = p.first;
set<string> output_names;
for (shared_ptr<Node> op : current_function->get_results())
{
......@@ -557,36 +558,36 @@ void runtime::gpu::GPU_ExternalFunction::compile()
auto allocator = std::make_shared<runtime::gpu::GPUAllocator>(
m_shared_context->m_primitive_emitter->get_memory_allocator());
ngraph::pass::Manager pass_manager;
#if CUDNN_VERSION >= 7200
// recurrent network fusion
m_pass_manager.register_pass<runtime::gpu::pass::LSTMFusion>();
m_pass_manager.register_pass<runtime::gpu::pass::RNNFusion>();
m_pass_manager.register_pass<ngraph::pass::AlgebraicSimplification>();
m_pass_manager.register_pass<runtime::gpu::pass::MultiLayerRNNFusion>();
pass_manager.register_pass<runtime::gpu::pass::LSTMFusion>();
pass_manager.register_pass<runtime::gpu::pass::RNNFusion>();
pass_manager.register_pass<ngraph::pass::AlgebraicSimplification>();
pass_manager.register_pass<runtime::gpu::pass::MultiLayerRNNFusion>();
#else
m_pass_manager.register_pass<ngraph::pass::AlgebraicSimplification>();
pass_manager.register_pass<ngraph::pass::AlgebraicSimplification>();
#endif
m_pass_manager.register_pass<ngraph::pass::LikeReplacement>();
m_pass_manager
.register_pass<ngraph::pass::AssignLayout<descriptor::layout::DenseTensorLayout>>();
m_pass_manager.register_pass<runtime::gpu::pass::GPULayout>(this);
m_pass_manager.register_pass<ngraph::pass::Liveness>();
m_pass_manager.register_pass<ngraph::pass::MemoryLayout>(s_memory_pool_alignment);
m_pass_manager.register_pass<runtime::gpu::pass::TensorMemoryReservation>(
pass_manager.register_pass<ngraph::pass::LikeReplacement>();
pass_manager.register_pass<ngraph::pass::AssignLayout<descriptor::layout::DenseTensorLayout>>();
pass_manager.register_pass<runtime::gpu::pass::GPULayout>(this);
pass_manager.register_pass<ngraph::pass::Liveness>();
pass_manager.register_pass<ngraph::pass::MemoryLayout>(s_memory_pool_alignment);
pass_manager.register_pass<runtime::gpu::pass::TensorMemoryReservation>(
*allocator, m_tensor_memory_buffers);
std::string common_function_string;
auto femitter = bind(&ngraph::runtime::gpu::GPU_ExternalFunction::emit_op_as_function,
this,
placeholders::_1,
placeholders::_2);
m_pass_manager.register_pass<ngraph::pass::CommonFunctionCollection>(
pass_manager.register_pass<ngraph::pass::CommonFunctionCollection>(
femitter, m_node_function_map, common_function_string);
string dump_filename = file_util::path_join(s_output_dir, m_function_name + "_ops.txt");
m_pass_manager.register_pass<ngraph::pass::DumpSorted>(dump_filename);
pass_manager.register_pass<ngraph::pass::DumpSorted>(dump_filename);
m_pass_manager.run_passes(m_function);
pass_manager.run_passes(m_function);
for (shared_ptr<Function> current_function : m_pass_manager.get_state().get_functions())
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
{
m_function_ordered_ops.emplace(current_function, current_function->get_ordered_ops());
}
......
......@@ -97,7 +97,6 @@ namespace ngraph
static const std::string& get_header_source();
codegen::CodeWriter m_writer;
ngraph::pass::Manager m_pass_manager;
std::unique_ptr<codegen::Compiler> m_compiler;
std::unique_ptr<codegen::ExecutionEngine> m_execution_engine;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment