Remove internal functions (#3079)

af7c81a3 · Scott Cyphers · Robert Kimball · 52c0827d · af7c81a3 · af7c81a3
Commit af7c81a3 authored Jun 17, 2019 by Scott Cyphers Committed by Robert Kimball Jun 17, 2019
11 changed files
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -385,7 +385,6 @@ set (SRC
    pass/liveness.hpp
    pass/manager.cpp
    pass/manager.hpp
-    pass/manager_state.cpp
    pass/manager_state.hpp
    pass/memory_layout.cpp
    pass/memory_layout.hpp

--- a/src/ngraph/graph_util.cpp
+++ b/src/ngraph/graph_util.cpp
@@ -108,33 +108,6 @@ void ngraph::traverse_nodes(const NodeVector& subgraph_results,
    }
 }
-void ngraph::traverse_functions(std::shared_ptr<ngraph::Function> p,
-                                std::function<void(shared_ptr<Function>)> f)
-{
-    std::unordered_set<shared_ptr<Function>> instances_seen;
-    deque<shared_ptr<Function>> stack;
-    stack.push_front(p);
-    while (stack.size() > 0)
-    {
-        shared_ptr<Function> func = stack.front();
-        if (instances_seen.find(func) == instances_seen.end())
-        {
-            instances_seen.insert(func);
-            f(func);
-        }
-        stack.pop_front();
-        for (shared_ptr<Node> op : func->get_ops())
-        {
-            for (shared_ptr<Function> fp : op->get_functions())
-            {
-                stack.push_front(fp);
-            }
-        }
-    }
-}
 NodeVector ngraph::find_common_args(std::shared_ptr<Node> target, std::shared_ptr<Node> replacement)
 {
    std::unordered_set<std::shared_ptr<Node>> target_args;

--- a/src/ngraph/graph_util.hpp
+++ b/src/ngraph/graph_util.hpp
@@ -70,8 +70,12 @@ namespace ngraph
                        bool include_control_deps,
                        const NodeVector& subgraph_params = {});
-    void traverse_functions(std::shared_ptr<Function> p,
+    inline void traverse_functions(std::shared_ptr<Function> p,
-                            std::function<void(std::shared_ptr<Function>)> f);
+                                   std::function<void(std::shared_ptr<Function>)> f)
+        NGRAPH_DEPRECATED("Replace with f(p)")
+    {
+        f(p);
+    };
    void replace_node(std::shared_ptr<Node> target, std::shared_ptr<Node> replacement);

--- a/src/ngraph/node.cpp
+++ b/src/ngraph/node.cpp
@@ -339,11 +339,6 @@ void Node::add_control_dependency(std::shared_ptr<Node> node)
    m_control_dependencies.insert(node);
 }
-std::vector<std::shared_ptr<Function>> Node::get_functions() const
-{
-    return std::vector<std::shared_ptr<Function>>{};
-}
 namespace ngraph
 {
    ostream& operator<<(ostream& out, const Node& node)

--- a/src/ngraph/node.hpp
+++ b/src/ngraph/node.hpp
@@ -323,8 +323,6 @@ namespace ngraph
        // Will be replaced with an OutputVector version
        virtual std::shared_ptr<Node> copy_with_new_args(const NodeVector& new_args) const = 0;
-        virtual std::vector<std::shared_ptr<Function>> get_functions() const;
        /// True if this and node have one output with same element type and shape
        bool has_same_type(std::shared_ptr<const Node> node) const;

--- a/src/ngraph/pass/manager.cpp
+++ b/src/ngraph/pass/manager.cpp
@@ -61,27 +61,9 @@ void pass::Manager::run_passes(shared_ptr<Function> func, bool transitive)
 {
    bool profile_enabled = getenv("NGRAPH_PROFILE_PASS_ENABLE") != nullptr;
-    vector<std::pair<shared_ptr<Function>, bool>> fs;
+    get_state().set_function(func);
-    if (transitive)
+    vector<std::pair<shared_ptr<Function>, bool>> fs{std::make_pair(func, func->is_dynamic())};
-    {
+    vector<shared_ptr<Function>> f_array{func};
-        // find all functions
-        traverse_functions(func, [&](shared_ptr<Function> f) {
-            fs.push_back(std::make_pair(f, f->is_dynamic()));
-        });
-    }
-    else
-    {
-        fs = {std::make_pair(func, func->is_dynamic())};
-    }
-    set<shared_ptr<Function>> tfs;
-    std::vector<shared_ptr<Function>> f_array;
-    for (auto f_pair : fs)
-    {
-        shared_ptr<Function> f = f_pair.first;
-        tfs.insert(f);
-        f_array.push_back(f);
-    }
-    get_state().set_functions(tfs);
    size_t index = 0;
    stopwatch pass_timer;

--- a/src/ngraph/pass/manager_state.cpp
+++ b/src/ngraph/pass/manager_state.cpp
-//*****************************************************************************
-// Copyright 2017-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-#include <iostream>
-#include <memory>
-#include "ngraph/function.hpp"
-#include "ngraph/log.hpp"
-#include "ngraph/node.hpp"
-#include "ngraph/pass/manager_state.hpp"
-using namespace std;
-using namespace ngraph;
-const vector<shared_ptr<Function>>& pass::ManagerState::get_functions()
-{
-    return m_function_list;
-}
--- a/src/ngraph/pass/manager_state.hpp
+++ b/src/ngraph/pass/manager_state.hpp
@@ -41,15 +41,6 @@ namespace ngraph
 class ngraph::pass::ManagerState
 {
 public:
-    const std::vector<std::shared_ptr<Function>>& get_functions();
-    template <typename T>
-    void set_functions(const T& collection)
-    {
-        m_function_list.clear();
-        m_function_list.insert(m_function_list.begin(), collection.begin(), collection.end());
-    }
    void set_visualize_tree_ops_map(const visualize_tree_ops_map_t& ops_map)
    {
        m_visualize_tree_ops_map = ops_map;
@@ -60,7 +51,15 @@ public:
        return m_visualize_tree_ops_map;
    }
+    void set_function(const std::shared_ptr<Function> function) { m_function = function; }
+    std::shared_ptr<Function> get_function() const { return m_function; }
+    std::vector<std::shared_ptr<Function>> get_functions() const
+        NGRAPH_DEPRECATED("Use get_function()")
+    {
+        return {m_function};
+    }
 private:
-    std::vector<std::shared_ptr<Function>> m_function_list;
    visualize_tree_ops_map_t m_visualize_tree_ops_map;
+    std::shared_ptr<Function> m_function;
 };
--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
@@ -497,14 +497,7 @@ void runtime::cpu::CPU_ExternalFunction::compile(ngraph::pass::PassConfig& pass_
        femitter, node_function_map, common_function_string);
    pass_manager.run_passes(m_function);
-    unordered_map<shared_ptr<Function>, list<shared_ptr<Node>>> function_ordered_ops;
+    list<shared_ptr<Node>> ordered_ops = m_function->get_ordered_ops();
-    // only one function is allowed
-    NGRAPH_CHECK(pass_manager.get_state().get_functions().size() == 1,
-                 "only one function is allowed");
-    for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
-    {
-        function_ordered_ops.insert({current_function, current_function->get_ordered_ops()});
-    }
    CodeWriter writer;
@@ -594,15 +587,12 @@ using namespace ngraph::runtime;
        writer << "// Declare debug timers\n";
        vector<string> names;
        size_t index = 0;
-        for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
+        for (shared_ptr<Node> node : ordered_ops)
        {
-            for (shared_ptr<Node> node : function_ordered_ops.at(current_function))
+            if (!node->is_parameter() && !node->is_constant())
            {
-                if (!node->is_parameter() && !node->is_constant())
+                names.push_back(node->get_name());
-                {
+                m_name_index_map.insert({node->get_name(), index++});
-                    names.push_back(node->get_name());
-                    m_name_index_map.insert({node->get_name(), index++});
-                }
            }
        }
        writer << "ngraph::stopwatch timers[" << names.size() << "];\n";
@@ -644,28 +634,25 @@ using namespace ngraph::runtime;
    }
    writer << "// Declare all constants\n";
-    for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
+    for (shared_ptr<Node> node : ordered_ops)
    {
-        for (shared_ptr<Node> node : function_ordered_ops.at(current_function))
+        ngraph::op::Constant* c = dynamic_cast<ngraph::op::Constant*>(node.get());
+        if (c)
        {
-            ngraph::op::Constant* c = dynamic_cast<ngraph::op::Constant*>(node.get());
+            m_active_constants.push_back(node);
-            if (c)
+            shared_ptr<descriptor::Tensor> tv = node->get_outputs()[0].get_tensor_ptr();
+            string type = tv->get_element_type().c_type_string();
+            writer << "static " << type << "* " << tv->get_name() << " = ((" << type << "*)("
+                   << c->get_data_ptr() << "));\n";
+            auto output_tensor = &node->get_output_tensor();
+            auto tensor_set = get_tensor_set(output_tensor);
+            // process all tensors in the set containing the output tensor of the constant
+            for (auto& ele_t : tensor_set)
            {
-                m_active_constants.push_back(node);
+                NGRAPH_CHECK(ele_t->get_pool_offset() == 0, "no offset set for constants");
-                shared_ptr<descriptor::Tensor> tv = node->get_outputs()[0].get_tensor_ptr();
+                m_tensor_roles[ele_t->get_name()] = TensorRole::CONSTANT;
-                string type = tv->get_element_type().c_type_string();
+                m_variable_name_map[ele_t->get_name()] = output_tensor->get_name();
-                writer << "static " << type << "* " << tv->get_name() << " = ((" << type << "*)("
-                       << c->get_data_ptr() << "));\n";
-                auto output_tensor = &node->get_output_tensor();
-                auto tensor_set = get_tensor_set(output_tensor);
-                // process all tensors in the set containing the output tensor of the constant
-                for (auto& ele_t : tensor_set)
-                {
-                    NGRAPH_CHECK(ele_t->get_pool_offset() == 0, "no offset set for constants");
-                    m_tensor_roles[ele_t->get_name()] = TensorRole::CONSTANT;
-                    m_variable_name_map[ele_t->get_name()] = output_tensor->get_name();
-                }
            }
        }
    }
@@ -694,379 +681,370 @@ using namespace ngraph::runtime;
    writer.block_end();
    writer << "\n";
-    for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
+    set<string> output_names;
+    for (shared_ptr<Node> op : m_function->get_results())
    {
-        auto ordered_ops = function_ordered_ops.at(current_function);
+        shared_ptr<descriptor::Tensor> tv = op->get_output_tensor_ptr();
-        set<string> output_names;
+        output_names.insert(tv->get_name());
-        for (shared_ptr<Node> op : current_function->get_results())
+    }
+    set<descriptor::Tensor*> constants;
+    for (shared_ptr<Node> node : ordered_ops)
+    {
+        if (dynamic_cast<ngraph::op::Constant*>(node.get()))
        {
-            shared_ptr<descriptor::Tensor> tv = op->get_output_tensor_ptr();
+            shared_ptr<descriptor::Tensor> tv = node->get_outputs()[0].get_tensor_ptr();
-            output_names.insert(tv->get_name());
+            constants.insert(tv.get());
        }
-        set<descriptor::Tensor*> constants;
+    }
-        for (shared_ptr<Node> node : ordered_ops)
+    bool temporaries_used = false;
+    for (shared_ptr<Node> node : ordered_ops)
+    {
+        if (node->liveness_new_list.size() > 0)
        {
-            if (dynamic_cast<ngraph::op::Constant*>(node.get()))
+            temporaries_used = true;
-            {
-                shared_ptr<descriptor::Tensor> tv = node->get_outputs()[0].get_tensor_ptr();
-                constants.insert(tv.get());
-            }
        }
+    }
+    if (temporaries_used)
+    {
+        m_memory_buffer_sizes.push_back(m_function->get_temporary_pool_size());
+    }
-        bool temporaries_used = false;
+    // Indexing for Control Flags
-        for (shared_ptr<Node> node : ordered_ops)
+    std::map<std::string, size_t> tensor_index_map;
+    std::map<std::string, size_t> param_index_map;
+    size_t tensor_index = 0;
+    for (shared_ptr<Node> node : ordered_ops)
+    {
+        if (!node->is_parameter() && !node->is_constant())
        {
-            if (node->liveness_new_list.size() > 0)
+            for (const descriptor::Input& input : node->get_inputs())
            {
-                temporaries_used = true;
+                const descriptor::Output& output = input.get_output();
+                shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
+                tensor_index_map.insert({tv->get_name(), tensor_index++});
            }
        }
-        if (temporaries_used)
+    }
+    writer << "bool " << m_function->get_name() << "_t_en[" << tensor_index << "];\n";
+    writer << "extern \"C\" void " << m_function->get_name() << func_params << "\n";
+    writer << "{\n";
+    writer.indent++;
+    //deserialize and build mkldnn primitives
+    if (m_mkldnn_emitter->get_mkldnn_descriptors_size() > 0)
+    {
+        writer << "if (ctx->first_iteration)\n";
+        writer.block_begin();
+        writer << "// read in memory descriptors and build mkldnn primitives\n";
+        writer << "std::ifstream desc_file (\"" << m_desc_filename << "\", std::ios::binary);\n";
+        writer << "deserialize_memory_descs_and_build_memory_primitives(" << m_desc_filename
+               << ", cg_ctx, " << to_string(m_mkldnn_emitter->get_mkldnn_descriptors_size())
+               << ");\n";
+        writer.block_end();
+    }
+    // Execution tracing support
+    if (runtime::cpu::IsTracingEnabled() && m_function->get_name() == m_function_name)
+    {
+        writer << "cpu::Timestamp start_ts;\n"
+               << "int profiler_count = 0;\n\n";
+    }
+    if (temporaries_used)
+    {
+        writer << "size_t pool_base_ptr = (size_t) ctx->memory_buffers["
+               << m_memory_buffer_sizes.size() - 1 << "]->get_ptr();\n";
+        writer << "\n";
+    }
+    writer << "bool* t_en = (bool*)" << m_function->get_name() << "_t_en;\n";
+    if (m_use_tbb)
+    {
+        writer << "\n";
+        writer << "if (ctx->first_iteration) {\n";
+        writer.indent++;
+        writer << "tbb::flow::continue_node<tbb::flow::continue_msg>* "
+                  "flowgraph_node_start"
+               << " = new tbb::flow::continue_node<tbb::flow::continue_msg> "
+                  "(*(cg_ctx->tbb_graph), [&](const tbb::flow::continue_msg &msg)\n{});\n";
+    }
+    // Add inputs to the variable name map
+    size_t arg_index = 0;
+    for (shared_ptr<ngraph::op::Parameter> param : m_function->get_parameters())
+    {
+        for (size_t i = 0; i < param->get_output_size(); ++i)
        {
-            m_memory_buffer_sizes.push_back(current_function->get_temporary_pool_size());
+            auto output_tensor = &param->get_outputs().at(i).get_tensor();
+            param_index_map[output_tensor->get_name()] = arg_index;
+            auto tensor_set = get_tensor_set(output_tensor);
+            // process all tensors in the set containing the output tensor of the parameter
+            for (auto& ele_t : tensor_set)
+            {
+                const element::Type& et = ele_t->get_element_type();
+                string type = et.c_type_string();
+                stringstream ss;
+                ss << "(((" << type << "*)(inputs[" << arg_index << "])) + "
+                   << ele_t->get_pool_offset() / et.size() << ")";
+                m_variable_name_map[ele_t->get_name()] = ss.str();
+                m_tensor_roles[ele_t->get_name()] = TensorRole::INPUT;
+            }
+            arg_index++;
        }
+    }
-        // Indexing for Control Flags
+    // Add temporaries to the variable name map
-        std::map<std::string, size_t> tensor_index_map;
+    if (temporaries_used)
-        std::map<std::string, size_t> param_index_map;
+    {
-        size_t tensor_index = 0;
+        for (auto& ele : bufferID_to_tensorSets)
-        for (shared_ptr<Node> node : ordered_ops)
        {
-            if (!node->is_parameter() && !node->is_constant())
+            if (ele.second.first == TensorRole::INTERMEDIATE)
            {
-                for (const descriptor::Input& input : node->get_inputs())
+                for (auto& ele_t : ele.second.second)
                {
-                    const descriptor::Output& output = input.get_output();
+                    stringstream ss;
-                    shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
+                    ss << "((" << ele_t->get_element_type().c_type_string() << "*)(pool_base_ptr + "
-                    tensor_index_map.insert({tv->get_name(), tensor_index++});
+                       << ele_t->get_pool_offset() << "))";
+                    m_variable_name_map[ele_t->get_name()] = ss.str();
+                    m_tensor_roles[ele_t->get_name()] = TensorRole::INTERMEDIATE;
                }
            }
        }
+    }
-        writer << "bool " << current_function->get_name() << "_t_en[" << tensor_index << "];\n";
+    // Add outputs to the variable name map
+    for (size_t i = 0; i < m_function->get_output_size(); ++i)
-        writer << "extern \"C\" void " << current_function->get_name() << func_params << "\n";
+    {
-        writer << "{\n";
+        shared_ptr<Node> op = m_function->get_output_op(i);
-        writer.indent++;
+        auto output_tensor = &op->get_output_tensor();
+        auto tensor_set = get_tensor_set(output_tensor);
-        //deserialize and build mkldnn primitives
+        // process all tensors in the set containing the output tensor of the result
-        if (m_mkldnn_emitter->get_mkldnn_descriptors_size() > 0)
+        for (auto& ele_t : tensor_set)
        {
-            writer << "if (ctx->first_iteration)\n";
+            const element::Type& et = ele_t->get_element_type();
-            writer.block_begin();
+            string type = et.c_type_string();
-            writer << "// read in memory descriptors and build mkldnn primitives\n";
+            stringstream ss;
-            writer << "std::ifstream desc_file (\"" << m_desc_filename
+            ss << "(((" << type << "*)(outputs[" << i << "])) + "
-                   << "\", std::ios::binary);\n";
+               << ele_t->get_pool_offset() / et.size() << ")";
-            writer << "deserialize_memory_descs_and_build_memory_primitives(" << m_desc_filename
+            m_variable_name_map[ele_t->get_name()] = ss.str();
-                   << ", cg_ctx, " << to_string(m_mkldnn_emitter->get_mkldnn_descriptors_size())
+            m_tensor_roles[ele_t->get_name()] = TensorRole::OUTPUT;
-                   << ");\n";
-            writer.block_end();
        }
+    }
-        // Execution tracing support
+    for (shared_ptr<Node> node : ordered_ops)
-        if (runtime::cpu::IsTracingEnabled() && current_function->get_name() == m_function_name)
+    {
+        auto& n = *node; // Work around a compiler warning (*node inside typeid may have effects
+        // with shared pointers, which is fine here but clang doesn't like it.)
+        auto handler = dispatcher.find(type_index(typeid(n)));
+        if (handler == dispatcher.end())
        {
-            writer << "cpu::Timestamp start_ts;\n"
+            throw unsupported_op(node->description());
-                   << "int profiler_count = 0;\n\n";
        }
+        vector<TensorViewWrapper> in;
-        if (temporaries_used)
+        vector<string> node_input_names;
+        vector<string> node_output_names;
+        for (const descriptor::Input& input : node->get_inputs())
        {
-            writer << "size_t pool_base_ptr = (size_t) ctx->memory_buffers["
+            const descriptor::Output& output = input.get_output();
-                   << m_memory_buffer_sizes.size() - 1 << "]->get_ptr();\n";
+            shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
-            writer << "\n";
+            in.push_back(TensorViewWrapper(tv, m_variable_name_map[tv->get_name()]));
+            node_input_names.emplace_back(tv->get_name());
        }
+        vector<TensorViewWrapper> out;
-        writer << "bool* t_en = (bool*)" << current_function->get_name() << "_t_en;\n";
+        for (const descriptor::Output& output : node->get_outputs())
-        if (m_use_tbb)
        {
-            writer << "\n";
+            shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
-            writer << "if (ctx->first_iteration) {\n";
+            out.push_back(TensorViewWrapper(tv, m_variable_name_map[tv->get_name()]));
-            writer.indent++;
+            node_output_names.emplace_back(tv->get_name());
-            writer << "tbb::flow::continue_node<tbb::flow::continue_msg>* "
-                      "flowgraph_node_start"
-                   << " = new tbb::flow::continue_node<tbb::flow::continue_msg> "
-                      "(*(cg_ctx->tbb_graph), [&](const tbb::flow::continue_msg &msg)\n{});\n";
        }
-        // Add inputs to the variable name map
+        // Emit operation prologue
-        size_t arg_index = 0;
+        if (!node->is_parameter() && !node->is_constant())
-        for (shared_ptr<ngraph::op::Parameter> param : current_function->get_parameters())
        {
-            for (size_t i = 0; i < param->get_output_size(); ++i)
+            if (m_function->get_name() == m_function_name)
            {
-                auto output_tensor = &param->get_outputs().at(i).get_tensor();
+                m_op_attrs.emplace_back(node->description(), node_output_names, node_input_names);
-                param_index_map[output_tensor->get_name()] = arg_index;
+            }
-                auto tensor_set = get_tensor_set(output_tensor);
+            if (m_use_tbb)
+            {
-                // process all tensors in the set containing the output tensor of the parameter
+                writer << "tbb::flow::continue_node<tbb::flow::continue_msg>* "
-                for (auto& ele_t : tensor_set)
+                          "flowgraph_node_"
-                {
+                       << node->get_name()
-                    const element::Type& et = ele_t->get_element_type();
+                       << " = new tbb::flow::continue_node<tbb::flow::continue_msg> "
-                    string type = et.c_type_string();
+                          "(*(cg_ctx->tbb_graph), [&](const tbb::flow::continue_msg &msg)\n{\n";
-                    stringstream ss;
+                writer.indent++;
-                    ss << "(((" << type << "*)(inputs[" << arg_index << "])) + "
+            }
-                       << ele_t->get_pool_offset() / et.size() << ")";
+            if (runtime::cpu::IsTracingEnabled() && m_function->get_name() == m_function_name)
-                    m_variable_name_map[ele_t->get_name()] = ss.str();
+            {
-                    m_tensor_roles[ele_t->get_name()] = TensorRole::INPUT;
+                writer << "start_ts = cpu::Clock::now();\n";
-                }
-                arg_index++;
            }
        }
-        // Add temporaries to the variable name map
+        if (!node->is_parameter() && !node->is_constant())
-        if (temporaries_used)
        {
-            for (auto& ele : bufferID_to_tensorSets)
+            writer << "\n// " << node->get_name() << "(";
-            {
+            vector<string> parameter_nodes = node_input_names;
-                if (ele.second.first == TensorRole::INTERMEDIATE)
+            parameter_nodes.insert(
-                {
+                parameter_nodes.end(), node_output_names.begin(), node_output_names.end());
-                    for (auto& ele_t : ele.second.second)
+            writer << join(parameter_nodes);
-                    {
+            writer << ")\n";
-                        stringstream ss;
-                        ss << "((" << ele_t->get_element_type().c_type_string()
-                           << "*)(pool_base_ptr + " << ele_t->get_pool_offset() << "))";
-                        m_variable_name_map[ele_t->get_name()] = ss.str();
-                        m_tensor_roles[ele_t->get_name()] = TensorRole::INTERMEDIATE;
-                    }
-                }
-            }
        }
-        // Add outputs to the variable name map
+        // Emit operation body
-        for (size_t i = 0; i < current_function->get_output_size(); ++i)
+        if (!node->is_parameter() && !node->is_constant())
        {
-            shared_ptr<Node> op = current_function->get_output_op(i);
+            emit_debug_function_entry(writer, node.get(), in, out);
-            auto output_tensor = &op->get_output_tensor();
-            auto tensor_set = get_tensor_set(output_tensor);
-            // process all tensors in the set containing the output tensor of the result
-            for (auto& ele_t : tensor_set)
-            {
-                const element::Type& et = ele_t->get_element_type();
-                string type = et.c_type_string();
-                stringstream ss;
-                ss << "(((" << type << "*)(outputs[" << i << "])) + "
-                   << ele_t->get_pool_offset() / et.size() << ")";
-                m_variable_name_map[ele_t->get_name()] = ss.str();
-                m_tensor_roles[ele_t->get_name()] = TensorRole::OUTPUT;
-            }
        }
-        for (shared_ptr<Node> node : ordered_ops)
+        // Op Control
+        if (!node->is_parameter() && !node->is_constant())
        {
-            auto& n = *node; // Work around a compiler warning (*node inside typeid may have effects
+            writer << "if (ctx->first_iteration ";
-            // with shared pointers, which is fine here but clang doesn't like it.)
-            auto handler = dispatcher.find(type_index(typeid(n)));
-            if (handler == dispatcher.end())
-            {
-                throw unsupported_op(node->description());
-            }
-            vector<TensorViewWrapper> in;
-            vector<string> node_input_names;
-            vector<string> node_output_names;
            for (const descriptor::Input& input : node->get_inputs())
            {
                const descriptor::Output& output = input.get_output();
                shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
-                in.push_back(TensorViewWrapper(tv, m_variable_name_map[tv->get_name()]));
+                auto input_name = tv->get_name();
-                node_input_names.emplace_back(tv->get_name());
-            }
-            vector<TensorViewWrapper> out;
-            for (const descriptor::Output& output : node->get_outputs())
-            {
-                shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
-                out.push_back(TensorViewWrapper(tv, m_variable_name_map[tv->get_name()]));
-                node_output_names.emplace_back(tv->get_name());
-            }
-            // Emit operation prologue
+                if (output.get_node()->is_parameter())
-            if (!node->is_parameter() && !node->is_constant())
-            {
-                if (current_function->get_name() == m_function_name)
-                {
-                    m_op_attrs.emplace_back(
-                        node->description(), node_output_names, node_input_names);
-                }
-                if (m_use_tbb)
                {
-                    writer << "tbb::flow::continue_node<tbb::flow::continue_msg>* "
+                    writer << " || ctx->p_en[" << param_index_map[input_name] << "]";
-                              "flowgraph_node_"
-                           << node->get_name()
-                           << " = new tbb::flow::continue_node<tbb::flow::continue_msg> "
-                              "(*(cg_ctx->tbb_graph), [&](const tbb::flow::continue_msg &msg)\n{\n";
-                    writer.indent++;
                }
-                if (runtime::cpu::IsTracingEnabled() &&
+                else if (!output.get_node()->is_constant())
-                    current_function->get_name() == m_function_name)
                {
-                    writer << "start_ts = cpu::Clock::now();\n";
+                    writer << " || t_en[" << tensor_index_map[input_name] << "]";
                }
            }
-            if (!node->is_parameter() && !node->is_constant())
+            // Always enable nodes computing output tensors or nodes whose outputs might get
+            // overwritten due to inplace kernels
+            // TODO (jbobba) - Do we need to handle cacheability
+            if (computes_result(node.get()) || possibly_overwritten(node.get()))
            {
-                writer << "\n// " << node->get_name() << "(";
+                writer << " || 1";
-                vector<string> parameter_nodes = node_input_names;
-                parameter_nodes.insert(
-                    parameter_nodes.end(), node_output_names.begin(), node_output_names.end());
-                writer << join(parameter_nodes);
-                writer << ")\n";
            }
+            writer << ") {\n";
+            writer.indent++;
+        }
-            // Emit operation body
+        auto it = node_function_map.find(node.get());
-            if (!node->is_parameter() && !node->is_constant())
+        if (it == node_function_map.end())
+        {
+            handler->second(this, writer, node.get(), in, out);
+        }
+        else
+        {
+            string func_name =
+                ngraph::pass::CommonFunctionCollection::create_function_name(*it->second);
+            vector<string> names;
+            for (const TensorViewWrapper& tv : in)
+            {
+                names.push_back(tv.get_name());
+            }
+            for (const TensorViewWrapper& tv : out)
            {
-                emit_debug_function_entry(writer, node.get(), in, out);
+                names.push_back(tv.get_name());
            }
+            writer << func_name << "(" << join(names) << ", ctx, cg_ctx);\n";
+        }
-            // Op Control
+        // skip multi-output nodes since they would be covered by GetOutputElement
-            if (!node->is_parameter() && !node->is_constant())
+        if (node->get_output_size() == 1 &&
+            // skip non-FP nodes
+            (node->get_element_type() == element::f32 || node->get_element_type() == element::f64))
+        {
+            // check inputs and constants?
+            if ((!node->is_parameter() && !node->is_constant()) ||
+                std::getenv("NGRAPH_CPU_CHECK_PARMS_AND_CONSTS"))
            {
-                writer << "if (ctx->first_iteration ";
+                if (std::getenv("NGRAPH_CPU_NAN_CHECK"))
-                for (const descriptor::Input& input : node->get_inputs())
                {
-                    const descriptor::Output& output = input.get_output();
+                    generate_isnan_isinf_check(writer, node, out, "isnan");
-                    shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
-                    auto input_name = tv->get_name();
-                    if (output.get_node()->is_parameter())
-                    {
-                        writer << " || ctx->p_en[" << param_index_map[input_name] << "]";
-                    }
-                    else if (!output.get_node()->is_constant())
-                    {
-                        writer << " || t_en[" << tensor_index_map[input_name] << "]";
-                    }
                }
-                // Always enable nodes computing output tensors or nodes whose outputs might get
+                if (std::getenv("NGRAPH_CPU_INF_CHECK"))
-                // overwritten due to inplace kernels
-                // TODO (jbobba) - Do we need to handle cacheability
-                if (computes_result(node.get()) || possibly_overwritten(node.get()))
                {
-                    writer << " || 1";
+                    generate_isnan_isinf_check(writer, node, out, "isinf");
                }
-                writer << ") {\n";
-                writer.indent++;
            }
+        }
-            auto it = node_function_map.find(node.get());
+        // Emit operation epilogue
-            if (it == node_function_map.end())
+        if (!node->is_parameter() && !node->is_constant())
+        {
+            for (auto output_name : node_output_names)
            {
-                handler->second(this, writer, node.get(), in, out);
+                writer << "t_en[" << tensor_index_map[output_name] << "] = true;\n";
            }
-            else
+            writer.indent--;
+            writer << "} else {\n";
+            writer.indent++;
+            for (auto output_name : node_output_names)
            {
-                string func_name =
+                writer << "t_en[" << tensor_index_map[output_name] << "] = false;\n";
-                    ngraph::pass::CommonFunctionCollection::create_function_name(*it->second);
-                vector<string> names;
-                for (const TensorViewWrapper& tv : in)
-                {
-                    names.push_back(tv.get_name());
-                }
-                for (const TensorViewWrapper& tv : out)
-                {
-                    names.push_back(tv.get_name());
-                }
-                writer << func_name << "(" << join(names) << ", ctx, cg_ctx);\n";
            }
+            writer.indent--;
-            // skip multi-output nodes since they would be covered by GetOutputElement
+            writer << "}\n";
-            if (node->get_output_size() == 1 &&
+            emit_debug_function_exit(writer, node.get(), in, out);
-                // skip non-FP nodes
+            if (runtime::cpu::IsTracingEnabled() && m_function->get_name() == m_function_name)
-                (node->get_element_type() == element::f32 ||
-                 node->get_element_type() == element::f64))
            {
-                // check inputs and constants?
+                writer << "ctx->op_durations[profiler_count++] = "
-                if ((!node->is_parameter() && !node->is_constant()) ||
+                       << "(std::chrono::duration_cast<cpu::Timescale>(cpu::Clock::now() - "
-                    std::getenv("NGRAPH_CPU_CHECK_PARMS_AND_CONSTS"))
+                          "start_ts)).count();\n";
-                {
-                    if (std::getenv("NGRAPH_CPU_NAN_CHECK"))
-                    {
-                        generate_isnan_isinf_check(writer, node, out, "isnan");
-                    }
-                    if (std::getenv("NGRAPH_CPU_INF_CHECK"))
-                    {
-                        generate_isnan_isinf_check(writer, node, out, "isinf");
-                    }
-                }
            }
+            if (m_use_tbb)
-            // Emit operation epilogue
-            if (!node->is_parameter() && !node->is_constant())
            {
-                for (auto output_name : node_output_names)
-                {
-                    writer << "t_en[" << tensor_index_map[output_name] << "] = true;\n";
-                }
                writer.indent--;
-                writer << "} else {\n";
+                writer << "});\n";
-                writer.indent++;
-                for (auto output_name : node_output_names)
-                {
-                    writer << "t_en[" << tensor_index_map[output_name] << "] = false;\n";
-                }
-                writer.indent--;
-                writer << "}\n";
-                emit_debug_function_exit(writer, node.get(), in, out);
-                if (runtime::cpu::IsTracingEnabled() &&
-                    current_function->get_name() == m_function_name)
-                {
-                    writer << "ctx->op_durations[profiler_count++] = "
-                           << "(std::chrono::duration_cast<cpu::Timescale>(cpu::Clock::now() - "
-                              "start_ts)).count();\n";
-                }
-                if (m_use_tbb)
-                {
-                    writer.indent--;
-                    writer << "});\n";
-                }
            }
        }
+    }
-        if (m_use_tbb)
+    if (m_use_tbb)
-        {
+    {
-            writer << "\n";
+        writer << "\n";
-            // Build the flow graph
+        // Build the flow graph
-            traverse_nodes(current_function, [&writer](shared_ptr<Node> n) {
+        traverse_nodes(m_function, [&writer](shared_ptr<Node> n) {
-                if (!n->is_parameter() && !n->is_constant())
+            if (!n->is_parameter() && !n->is_constant())
+            {
+                bool is_head = true;
+                for (auto arg : n->get_arguments())
                {
-                    bool is_head = true;
+                    if (!arg->is_parameter() && !arg->is_constant())
-                    for (auto arg : n->get_arguments())
-                    {
-                        if (!arg->is_parameter() && !arg->is_constant())
-                        {
-                            is_head = false;
-                            writer << "tbb::flow::make_edge(*flowgraph_node_" << arg->get_name()
-                                   << ", *flowgraph_node_" << n->get_name() << ");\n";
-                        }
-                    }
-                    if (is_head)
                    {
-                        writer << "tbb::flow::make_edge(*flowgraph_node_start"
+                        is_head = false;
+                        writer << "tbb::flow::make_edge(*flowgraph_node_" << arg->get_name()
                               << ", *flowgraph_node_" << n->get_name() << ");\n";
                    }
                }
-            });
+                if (is_head)
+                {
-            writer.indent--;
+                    writer << "tbb::flow::make_edge(*flowgraph_node_start"
-            writer << "}\n";
+                           << ", *flowgraph_node_" << n->get_name() << ");\n";
+                }
-            // Execute the flow graph
+            }
-            writer << "(static_cast<tbb::flow::continue_node<tbb::flow::continue_msg>*>"
+        });
-                      "(&(*(cg_ctx->tbb_graph->begin()))))"
-                   << "->try_put(tbb::flow::continue_msg());\n";
-            writer << "try { cg_ctx->tbb_graph->wait_for_all(); } catch(...) { throw; }\n";
-        }
-        writer << "ctx->first_iteration = false;\n";
        writer.indent--;
-        // End generated function
+        writer << "}\n";
-        writer += "}\n\n";
+        // Execute the flow graph
+        writer << "(static_cast<tbb::flow::continue_node<tbb::flow::continue_msg>*>"
+                  "(&(*(cg_ctx->tbb_graph->begin()))))"
+               << "->try_put(tbb::flow::continue_msg());\n";
+        writer << "try { cg_ctx->tbb_graph->wait_for_all(); } catch(...) { throw; }\n";
    }
+    writer << "ctx->first_iteration = false;\n";
+    writer.indent--;
+    // End generated function
+    writer += "}\n\n";
    // TODO: Cleanup and make this a utility function
    string filename = file_util::path_join(s_output_dir, m_function_name + "_codegen.cpp");

--- a/src/ngraph/runtime/gpu/gpu_compiled_function.cpp
+++ b/src/ngraph/runtime/gpu/gpu_compiled_function.cpp
@@ -184,11 +184,7 @@ void runtime::gpu::GPUCompiledFunction::compile()
    string dump_filename = file_util::path_join(get_output_dir(), m_function_name + "_ops.txt");
    pass_manager.register_pass<ngraph::pass::DumpSorted>(dump_filename);
    pass_manager.run_passes(m_function);
+    m_function_ordered_ops.emplace(m_function, m_function->get_ordered_ops());
-    for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
-    {
-        m_function_ordered_ops.emplace(current_function, current_function->get_ordered_ops());
-    }
    add_passes(pass_manager);
    emit();

--- a/src/ngraph/serializer.cpp
+++ b/src/ngraph/serializer.cpp
@@ -340,33 +340,24 @@ static void serialize_to_cpio(ostream& out, shared_ptr<ngraph::Function> func, s
    cpio::Writer writer(out);
    writer.write(func->get_name(), j.c_str(), static_cast<uint32_t>(j.size()));
-    traverse_functions(func, [&](shared_ptr<ngraph::Function> f) {
+    traverse_nodes(const_cast<Function*>(func.get()),
-        traverse_nodes(const_cast<Function*>(f.get()),
+                   [&](shared_ptr<Node> node) {
-                       [&](shared_ptr<Node> node) {
+                       if (auto c = dynamic_pointer_cast<op::Constant>(node))
-                           if (auto c = dynamic_pointer_cast<op::Constant>(node))
+                       {
-                           {
+                           uint32_t size =
-                               uint32_t size =
+                               static_cast<uint32_t>(shape_size(c->get_output_shape(0)) *
-                                   static_cast<uint32_t>(shape_size(c->get_output_shape(0)) *
+                                                     c->get_output_element_type(0).size());
-                                                         c->get_output_element_type(0).size());
+                           writer.write(c->get_name(), c->get_data_ptr(), size);
-                               writer.write(c->get_name(), c->get_data_ptr(), size);
+                       }
-                           }
+                   },
-                       },
+                   true);
-                       true);
-    });
 }
 #endif
 static string serialize(shared_ptr<ngraph::Function> func, size_t indent, bool binary_constant_data)
 {
    json j;
-    vector<json> functions;
+    j.push_back(write(*func, binary_constant_data));
-    traverse_functions(func, [&](shared_ptr<ngraph::Function> f) {
-        functions.push_back(write(*f, binary_constant_data));
-    });
-    for (auto it = functions.rbegin(); it != functions.rend(); it++)
-    {
-        j.push_back(*it);
-    }
    string rc;
    if (indent == 0)