Merge pull request #236 from NervanaSystems/bob/codewriter4

Bob/codewriter4

Merge pull request #236 from NervanaSystems/bob/codewriter4
Bob/codewriter4
abdbe348 · Jai Menon · GitHub · 797b2796 · 28795d7f · abdbe348
Unverified Commit abdbe348 authored Nov 14, 2017 by Jai Menon Committed by GitHub Nov 14, 2017
36 changed files
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -21,6 +21,7 @@ set (SRC
    descriptor/tensor.cpp
    descriptor/tensor_view.cpp
    descriptor/tuple.cpp
+    file_util.cpp
    function.cpp
    log.cpp
    node.cpp
@@ -53,7 +54,6 @@ set (SRC
    ops/tuple.cpp
    ops/unary_elementwise_arithmetic.cpp
    ops/unary_elementwise.cpp
-    pass/collect_functions.cpp
    pass/dump_sorted.cpp
    pass/liveness.cpp
    pass/manager.cpp
@@ -104,6 +104,7 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
    # Add sources for the CPU backend
    # and all its dependencies
    set(SRC ${SRC}
+        codegen/code_writer.cpp
        codegen/compiler.cpp
        runtime/cpu/call_frame.cpp
        runtime/cpu/cpu_backend.cpp
@@ -111,6 +112,7 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
        runtime/cpu/cpu_kernels.cpp
        runtime/cpu/emitter.cpp
        runtime/cpu/external_function.cpp
+        runtime/cpu/tensor_view.cpp
        )
    # LLVM binary builds are typically built without RTTI
    # The built-in headers are in a version-specific directory

--- a/src/ngraph/pass/collect_functions.hpp
+++ b/src/ngraph/pass/collect_functions.hpp
@@ -12,22 +12,23 @@
 // See the License for the specific language governing permissions and
 // ----------------------------------------------------------------------------
-#pragma once
+#include "code_writer.hpp"
-#include "ngraph/pass/pass.hpp"
+using namespace std;
+using namespace ngraph;
-namespace ngraph
+codegen::CodeWriter::CodeWriter()
+    : indent(0)
+    , m_pending_indent(true)
 {
-    namespace pass
-    {
-        class CollectFunctions;
-    }
 }
-class ngraph::pass::CollectFunctions : public FunctionPass
+string codegen::CodeWriter::get_code() const
 {
-public:
+    return m_ss.str();
-    bool run_on_function(std::shared_ptr<ngraph::Function>) override;
+}
-private:
+void codegen::CodeWriter::operator+=(const std::string& s)
-};
+{
+    *this << s;
+}
--- a/src/ngraph/pass/collect_functions.cpp
+++ b/src/ngraph/pass/collect_functions.cpp
@@ -12,39 +12,61 @@
 // See the License for the specific language governing permissions and
 // ----------------------------------------------------------------------------
-#include "ngraph/pass/collect_functions.hpp"
+#pragma once
-#include "ngraph/function.hpp"
+#include <sstream>
+#include <string>
 #include "ngraph/log.hpp"
-#include "ngraph/node.hpp"
-#include "ngraph/ops/function_call.hpp"
-#include "ngraph/ops/op.hpp"
-#include "ngraph/util.hpp"
-using namespace std;
+namespace ngraph
-using namespace ngraph;
+{
-using namespace ngraph::pass;
+    namespace codegen
+    {
+        class CodeWriter;
+    }
+}
-bool CollectFunctions::run_on_function(shared_ptr<ngraph::Function> func)
+class ngraph::codegen::CodeWriter
 {
-    set<shared_ptr<ngraph::Function>> functions;
+public:
-    deque<shared_ptr<ngraph::Function>> stack;
+    CodeWriter();
-    stack.push_back(func);
+    std::string get_code() const;
+    void operator+=(const std::string&);
-    while (stack.empty() == false)
+    size_t indent;
+    template <typename T>
+    friend CodeWriter& operator<<(CodeWriter& out, const T& obj)
    {
-        shared_ptr<ngraph::Function> f = stack.front();
+        std::stringstream ss;
-        stack.pop_front();
+        ss << obj;
-        functions.insert(f);
-        traverse_nodes(f, [&](shared_ptr<Node> node) {
+        for (char c : ss.str())
-            shared_ptr<op::FunctionCall> fc = dynamic_pointer_cast<op::FunctionCall>(node);
+        {
-            if (fc)
+            if (c == '\n')
            {
-                stack.push_back(fc->get_function());
+                out.m_pending_indent = true;
            }
-        });
+            else
-    }
+            {
+                if (out.m_pending_indent)
+                {
+                    out.m_pending_indent = false;
+                    for (size_t i = 0; i < out.indent; i++)
+                    {
+                        out.m_ss << "    ";
+                    }
+                }
+            }
+            out.m_ss << c;
+        }
-    get_state().set_functions(functions);
+        return out;
+    }
-    return false;
+private:
-}
+    std::stringstream m_ss;
+    bool m_pending_indent;
+};
--- a/src/ngraph/codegen/compiler.cpp
+++ b/src/ngraph/codegen/compiler.cpp
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // ----------------------------------------------------------------------------
+#include <iostream>
 #include <clang/CodeGen/ObjectFilePCHContainerOperations.h>
 #include <clang/Driver/DriverDiagnostic.h>
 #include <clang/Driver/Options.h>
@@ -191,11 +193,15 @@ std::unique_ptr<llvm::Module> execution_state::compile(const string& source, con
    // Create and execute action
    CodeGenAction* compilerAction = new EmitCodeGenOnlyAction();
-    Clang->ExecuteAction(*compilerAction);
+    std::unique_ptr<llvm::Module> rc;
+    if (Clang->ExecuteAction(*compilerAction) == true)
+    {
+        rc = compilerAction->takeModule();
+    }
    buffer.release();
-    return compilerAction->takeModule();
+    return rc;
 }
 bool execution_state::add_module(std::unique_ptr<llvm::Module>& module)

--- a/src/ngraph/descriptor/tensor.hpp
+++ b/src/ngraph/descriptor/tensor.hpp
@@ -59,9 +59,9 @@ public:
    size_t size() const;
    void set_pool_offset(size_t);
    size_t get_pool_offset() const;
+    const element::Type& get_element_type() const { return m_element_type; }
    static std::string make_tensor_name(const Node* node, size_t value_index);
+    void set_is_output() { m_is_output = true; }
 protected:
    const element::Type& m_element_type;
    PrimaryTensorView* m_primary_tensor_view;

--- a/src/ngraph/file_util.cpp
+++ b/src/ngraph/file_util.cpp
+/*
+ Copyright 2016 Nervana Systems Inc.
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+      http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+#include <cassert>
+#include <dirent.h>
+#include <fcntl.h>
+#include <fstream>
+#include <ftw.h>
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <vector>
+#include "file_util.hpp"
+using namespace std;
+string ngraph::file_util::path_join(const string& s1, const string& s2)
+{
+    string rc;
+    if (s2.size() > 0)
+    {
+        if (s2[0] == '/')
+        {
+            rc = s2;
+        }
+        else if (s1.size() > 0)
+        {
+            rc = s1;
+            if (rc[rc.size() - 1] != '/')
+            {
+                rc += "/";
+            }
+            rc += s2;
+        }
+        else
+        {
+            rc = s2;
+        }
+    }
+    else
+    {
+        rc = s1;
+    }
+    return rc;
+}
+size_t ngraph::file_util::get_file_size(const string& filename)
+{
+    // ensure that filename exists and get its size
+    struct stat stats;
+    if (stat(filename.c_str(), &stats) == -1)
+    {
+        throw std::runtime_error("Could not find file: \"" + filename + "\"");
+    }
+    return stats.st_size;
+}
+void ngraph::file_util::remove_directory(const string& dir)
+{
+    file_util::iterate_files(dir,
+                             [](const string& file, bool is_dir) {
+                                 if (is_dir)
+                                     rmdir(file.c_str());
+                                 else
+                                     remove(file.c_str());
+                             },
+                             true);
+    rmdir(dir.c_str());
+}
+void ngraph::file_util::remove_file(const string& file)
+{
+    remove(file.c_str());
+}
+bool ngraph::file_util::make_directory(const string& dir)
+{
+    if (mkdir(dir.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH))
+    {
+        if (errno == EEXIST)
+        {
+            // not really an error, the directory already exists
+            return false;
+        }
+        throw std::runtime_error("error making directory " + dir + " " + strerror(errno));
+    }
+    return true;
+}
+string ngraph::file_util::make_temp_directory(const string& path)
+{
+    string fname = path.empty() ? file_util::get_temp_directory() : path;
+    string tmp_template = file_util::path_join(fname, "aeonXXXXXX");
+    char* tmpname = strdup(tmp_template.c_str());
+    mkdtemp(tmpname);
+    string rc = tmpname;
+    free(tmpname);
+    return rc;
+}
+std::string ngraph::file_util::get_temp_directory()
+{
+    const vector<string> potential_tmps = {"NERVANA_AEON_TMP", "TMPDIR", "TMP", "TEMP", "TEMPDIR"};
+    const char* path = nullptr;
+    for (const string& var : potential_tmps)
+    {
+        path = getenv(var.c_str());
+        if (path != nullptr)
+        {
+            break;
+        }
+    }
+    if (path == nullptr)
+    {
+        path = "/tmp";
+    }
+    return path;
+}
+vector<char> ngraph::file_util::read_file_contents(const string& path)
+{
+    size_t file_size = get_file_size(path);
+    vector<char> data;
+    data.reserve(file_size);
+    data.resize(file_size);
+    FILE* f = fopen(path.c_str(), "rb");
+    if (f)
+    {
+        char* p = data.data();
+        size_t remainder = file_size;
+        size_t offset = 0;
+        while (remainder > 0)
+        {
+            size_t rc = fread(&p[offset], 1, remainder, f);
+            offset += rc;
+            remainder -= rc;
+        }
+        fclose(f);
+    }
+    else
+    {
+        throw std::runtime_error("error opening file '" + path + "'");
+    }
+    return data;
+}
+std::string ngraph::file_util::read_file_to_string(const std::string& path)
+{
+    std::ifstream f(path);
+    std::stringstream ss;
+    ss << f.rdbuf();
+    return ss.str();
+}
+void ngraph::file_util::iterate_files(const string& path,
+                                      std::function<void(const string& file, bool is_dir)> func,
+                                      bool recurse)
+{
+    vector<string> files;
+    vector<string> dirs;
+    file_util::iterate_files_worker(path,
+                                    [&files, &dirs](const string& file, bool is_dir) {
+                                        if (is_dir)
+                                            dirs.push_back(file);
+                                        else
+                                            files.push_back(file);
+                                    },
+                                    true);
+    for (auto f : files)
+        func(f, false);
+    for (auto f : dirs)
+        func(f, true);
+}
+void ngraph::file_util::iterate_files_worker(
+    const string& path, std::function<void(const string& file, bool is_dir)> func, bool recurse)
+{
+    DIR* dir;
+    struct dirent* ent;
+    if ((dir = opendir(path.c_str())) != nullptr)
+    {
+        while ((ent = readdir(dir)) != nullptr)
+        {
+            string name = ent->d_name;
+            switch (ent->d_type)
+            {
+            case DT_DIR:
+                if (recurse && name != "." && name != "..")
+                {
+                    string dir_path = file_util::path_join(path, name);
+                    iterate_files(dir_path, func, recurse);
+                    func(dir_path, true);
+                }
+                break;
+            case DT_LNK: break;
+            case DT_REG:
+                name = file_util::path_join(path, name);
+                func(name, false);
+                break;
+            default: break;
+            }
+        }
+        closedir(dir);
+    }
+    else
+    {
+        throw std::runtime_error("error enumerating file " + path);
+    }
+}
+string ngraph::file_util::tmp_filename(const string& extension)
+{
+    string tmp_template =
+        file_util::path_join(file_util::get_temp_directory(), "ngraph_XXXXXX" + extension);
+    char* tmpname = strdup(tmp_template.c_str());
+    // mkstemp opens the file with open() so we need to close it
+    close(mkstemps(tmpname, static_cast<int>(extension.size())));
+    string rc = tmpname;
+    free(tmpname);
+    return rc;
+}
+void ngraph::file_util::touch(const std::string& filename)
+{
+    // inspired by http://chris-sharpe.blogspot.com/2013/05/better-than-systemtouch.html
+    int fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_NOCTTY | O_NONBLOCK, 0666);
+    assert(fd >= 0);
+    close(fd);
+    // update timestamp for filename
+    int rc = utimes(filename.c_str(), nullptr);
+    assert(!rc);
+}
+bool ngraph::file_util::exists(const std::string& filename)
+{
+    struct stat buffer;
+    return (stat(filename.c_str(), &buffer) == 0);
+}
+int ngraph::file_util::try_get_lock(const std::string& filename)
+{
+    mode_t m = umask(0);
+    int fd = open(filename.c_str(), O_RDWR | O_CREAT, 0666);
+    umask(m);
+    if (fd >= 0 && flock(fd, LOCK_EX | LOCK_NB) < 0)
+    {
+        close(fd);
+        fd = -1;
+    }
+    return fd;
+}
+void ngraph::file_util::release_lock(int fd, const std::string& filename)
+{
+    if (fd >= 0)
+    {
+        remove_file(filename);
+        close(fd);
+    }
+}
--- a/src/ngraph/file_util.hpp
+++ b/src/ngraph/file_util.hpp
+/*
+ Copyright 2016 Nervana Systems Inc.
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+      http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+#pragma once
+#include <functional>
+#include <string>
+#include <vector>
+namespace ngraph
+{
+    class file_util;
+}
+class ngraph::file_util
+{
+public:
+    static std::string path_join(const std::string& s1, const std::string& s2);
+    static size_t get_file_size(const std::string& filename);
+    static void remove_directory(const std::string& dir);
+    static bool make_directory(const std::string& dir);
+    static std::string make_temp_directory(const std::string& path = "");
+    static std::string get_temp_directory();
+    static void remove_file(const std::string& file);
+    static std::vector<char> read_file_contents(const std::string& path);
+    static std::string read_file_to_string(const std::string& path);
+    static void iterate_files(const std::string& path,
+                              std::function<void(const std::string& file, bool is_dir)> func,
+                              bool recurse = false);
+    static std::string tmp_filename(const std::string& extension = "");
+    static void touch(const std::string& filename);
+    static bool exists(const std::string& filename);
+    static int try_get_lock(const std::string& filename);
+    static void release_lock(int fd, const std::string& filename);
+private:
+    static void iterate_files_worker(const std::string& path,
+                                     std::function<void(const std::string& file, bool is_dir)> func,
+                                     bool recurse = false);
+};
--- a/src/ngraph/function.cpp
+++ b/src/ngraph/function.cpp
@@ -33,6 +33,7 @@ Function::Function(const std::shared_ptr<Node>& result,
    , m_ordered_ops_valid(false)
    , m_instance_id(m_next_instance_id.fetch_add(1))
 {
+    m_result->set_is_output();
    traverse_nodes(this, [&](shared_ptr<Node> node) { m_ops.push_back(node); });
 }

--- a/src/ngraph/node.cpp
+++ b/src/ngraph/node.cpp
@@ -118,6 +118,10 @@ bool Node::is_output() const
 void Node::set_is_output()
 {
    m_is_output = true;
+    for (descriptor::Output& output : get_outputs())
+    {
+        output.get_tensor().set_is_output();
+    }
 }
 std::string Node::get_node_id() const

--- a/src/ngraph/pass/dump_sorted.cpp
+++ b/src/ngraph/pass/dump_sorted.cpp
@@ -35,6 +35,9 @@ bool pass::DumpSorted::run_on_module(vector<shared_ptr<ngraph::Function>>& funct
    {
        for (shared_ptr<Function> f : functions)
        {
+            out << "=====================================================================\n";
+            out << f->get_name() << " start\n";
+            out << "=====================================================================\n";
            for (const shared_ptr<Node>& node : f->get_ordered_ops())
            {
                out << node->get_name() << "(";
@@ -67,6 +70,9 @@ bool pass::DumpSorted::run_on_module(vector<shared_ptr<ngraph::Function>>& funct
                    out << "    F " << tensor->get_name() << "\n";
                }
            }
+            out << "=====================================================================\n";
+            out << f->get_name() << " end\n";
+            out << "=====================================================================\n";
        }
    }

--- a/src/ngraph/pass/liveness.cpp
+++ b/src/ngraph/pass/liveness.cpp
@@ -101,7 +101,6 @@ bool pass::Liveness::run_on_call_graph(list<shared_ptr<Node>>& ops)
        }
        for (Tensor* tensor : outputs)
        {
-            NGRAPH_INFO << "found output";
            node->liveness_live_list.insert(tensor);
            node->liveness_free_list.erase(tensor);

--- a/src/ngraph/pass/manager.cpp
+++ b/src/ngraph/pass/manager.cpp
@@ -18,6 +18,8 @@
 #include "ngraph/function.hpp"
 #include "ngraph/log.hpp"
 #include "ngraph/node.hpp"
+#include "ngraph/ops/function_call.hpp"
+#include "ngraph/ops/reduce.hpp"
 #include "ngraph/pass/manager.hpp"
 #include "ngraph/pass/pass.hpp"
@@ -36,10 +38,37 @@ void ngraph::pass::Manager::initialize_default_passes()
 {
 }
+static void find_functions(shared_ptr<Function> f, set<shared_ptr<Function>>& funcs)
+{
+    funcs.insert(f);
+    for (shared_ptr<Node> node : f->get_ops())
+    {
+        shared_ptr<op::FunctionCall> fc = dynamic_pointer_cast<op::FunctionCall>(node);
+        if (fc)
+        {
+            find_functions(fc->get_function(), funcs);
+        }
+        shared_ptr<op::Reduce> reduce = dynamic_pointer_cast<op::Reduce>(node);
+        if (reduce)
+        {
+            find_functions(reduce->get_reduction_function(), funcs);
+        }
+    }
+}
 void ngraph::pass::Manager::run_passes(shared_ptr<Function> func)
 {
-    vector<shared_ptr<Function>> fs = {func};
+    // find all functions
-    get_state().set_functions(fs);
+    set<shared_ptr<Function>> tfs;
+    find_functions(func, tfs);
+    get_state().set_functions(tfs);
+    vector<shared_ptr<Function>> fs;
+    for (shared_ptr<Function> f : get_state().get_functions())
+    {
+        fs.push_back(f);
+    }
    for (shared_ptr<PassBase> pass : m_pass_list)
    {
@@ -77,40 +106,6 @@ void ngraph::pass::Manager::run_passes(shared_ptr<Function> func)
            }
        }
    }
-    // for (shared_ptr<ModulePass>& p : m_module_passes)
-    // {
-    //     p->set_state(get_state());
-    //     p->run_on_module(fs);
-    // }
-    // for (Function* f : fs)
-    // {
-    //     for (shared_ptr<FunctionPass> p : m_function_passes)
-    //     {
-    //         p->set_state(get_state());
-    //         p->run_on_function(f);
-    //     }
-    // }
-    // for (Function* f : fs)
-    // {
-    //     NGRAPH_INFO;
-    //     for (shared_ptr<NodePass> p : m_node_passes)
-    //     {
-    //         for (Node* node : f->get_ops())
-    //         {
-    //             NGRAPH_INFO;
-    //             p->set_state(get_state());
-    //             p->run_on_node(node);
-    //         }
-    //     }
-    // }
-    // for (shared_ptr<CallGraphPass>& p : m_call_graph_passes)
-    // {
-    //     p->set_state(get_state());
-    //     p->run_on_call_graph(func->get_ordered_ops());
-    // }
 }
 ngraph::pass::ManagerState& ngraph::pass::Manager::get_state()

--- a/src/ngraph/pass/manager_state.cpp
+++ b/src/ngraph/pass/manager_state.cpp
@@ -23,7 +23,7 @@
 using namespace std;
 using namespace ngraph;
-vector<shared_ptr<Function>>& ngraph::pass::ManagerState::get_functions()
+const vector<shared_ptr<Function>>& ngraph::pass::ManagerState::get_functions()
 {
    return m_function_list;
 }

--- a/src/ngraph/pass/manager_state.hpp
+++ b/src/ngraph/pass/manager_state.hpp
@@ -30,7 +30,7 @@ namespace ngraph
 class ngraph::pass::ManagerState
 {
 public:
-    std::vector<std::shared_ptr<Function>>& get_functions();
+    const std::vector<std::shared_ptr<Function>>& get_functions();
    template <typename T>
    void set_functions(const T& collection)

--- a/src/ngraph/pass/memory_layout.cpp
+++ b/src/ngraph/pass/memory_layout.cpp
@@ -26,9 +26,14 @@ using namespace std;
 using namespace ngraph;
 using namespace ngraph::descriptor;
+pass::MemoryLayout::MemoryLayout(size_t alignment)
+    : m_alignment(alignment)
+{
+}
 bool pass::MemoryLayout::run_on_call_graph(std::list<std::shared_ptr<Node>>& node_list)
 {
-    MemoryManager mm;
+    MemoryManager mm(m_alignment);
    for (shared_ptr<Node> node : node_list)
    {
        for (Tensor* tensor : node->liveness_new_list)

--- a/src/ngraph/pass/memory_layout.hpp
+++ b/src/ngraph/pass/memory_layout.hpp
@@ -33,9 +33,11 @@ namespace ngraph
 class ngraph::pass::MemoryLayout : public CallGraphPass
 {
 public:
+    MemoryLayout(size_t alignment = 1);
    virtual bool run_on_call_graph(std::list<std::shared_ptr<Node>>&) override;
 private:
+    size_t m_alignment;
 };
 class ngraph::pass::MemoryManager

--- a/src/ngraph/runtime/backend.hpp
+++ b/src/ngraph/runtime/backend.hpp
@@ -17,6 +17,7 @@
 #include <memory>
 #include "ngraph/common.hpp"
+#include "ngraph/log.hpp"
 #include "ngraph/runtime/ndarray.hpp"
 namespace ngraph

--- a/src/ngraph/runtime/cpu/call_frame.cpp
+++ b/src/ngraph/runtime/cpu/call_frame.cpp
@@ -15,50 +15,51 @@
 #include <algorithm>
 #include "call_frame.hpp"
+#include "ngraph/runtime/cpu/tensor_view.hpp"
 using namespace std;
 using namespace ngraph::runtime::cpu;
-CallFrame::CallFrame(EntryPoint compiled_function,
+CallFrame::CallFrame(EntryPoint compiled_function)
-                     size_t n_outputs,
+    : m_compiled_function(compiled_function)
-                     size_t n_inputs,
-                     const TensorViewPtrs& temps,
-                     const std::vector<std::shared_ptr<CallFrame>>& callees)
-    : m_n_outputs(n_outputs)
-    , m_n_inputs(n_inputs)
-    , m_tensor_views(n_outputs + n_inputs + temps.size())
-    , m_compiled_function(compiled_function)
-    , m_callees(callees)
 {
-    copy(temps.begin(), temps.end(), m_tensor_views.begin() + m_n_outputs + m_n_inputs);
 }
 void CallFrame::tensor_call(
-    const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& inputs,
+    const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& input_tvs,
-    const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& outputs)
+    const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& output_tvs)
 {
-    copy(outputs.begin(), outputs.end(), m_tensor_views.begin());
+    vector<void*> inputs;
-    copy(inputs.begin(), inputs.end(), m_tensor_views.begin() + m_n_outputs);
+    vector<void*> outputs;
+    for (size_t i = 0; i < input_tvs.size(); i++)
+    {
+        shared_ptr<runtime::cpu::CPUTensorView> tv =
+            static_pointer_cast<runtime::cpu::CPUTensorView>(input_tvs[i]);
+        inputs.push_back(tv->get_data_ptr());
+    }
+    for (size_t i = 0; i < output_tvs.size(); i++)
+    {
+        shared_ptr<runtime::cpu::CPUTensorView> tv =
+            static_pointer_cast<runtime::cpu::CPUTensorView>(output_tvs[i]);
+        outputs.push_back(tv->get_data_ptr());
+    }
    // Invoke compiled computation
-    m_compiled_function(this, m_tensor_views, m_callees);
+    m_compiled_function(inputs, outputs);
-    // Don't hold onto inputs/outputs
-    fill_n(m_tensor_views.begin(), m_n_outputs + m_n_inputs, nullptr);
 }
 void CallFrame::operator()(const std::vector<std::shared_ptr<ngraph::runtime::Value>>& arguments,
                           const std::vector<std::shared_ptr<ngraph::runtime::Value>>& results)
 {
    // TODO: Check types of args and result
-    std::vector<std::shared_ptr<ngraph::runtime::TensorView>> inputs;
+    vector<shared_ptr<ngraph::runtime::TensorView>> inputs;
-    for (auto argument : arguments)
+    for (shared_ptr<ngraph::runtime::Value> argument : arguments)
    {
        argument->collect_tensor_views(inputs, argument);
    }
-    std::vector<std::shared_ptr<ngraph::runtime::TensorView>> outputs;
+    vector<shared_ptr<ngraph::runtime::TensorView>> outputs;
-    for (auto result : results)
+    for (shared_ptr<ngraph::runtime::Value> result : results)
    {
        result->collect_tensor_views(outputs, result);
    }

--- a/src/ngraph/runtime/cpu/call_frame.hpp
+++ b/src/ngraph/runtime/cpu/call_frame.hpp
@@ -32,19 +32,16 @@ namespace ngraph
        {
            class CallFrame;
-            using EntryPoint = std::function<void(ngraph::runtime::cpu::CallFrame*,
+            using EntryPoint_t = void(const std::vector<void*>& inputs,
-                                                  ngraph::runtime::TensorViewPtrs&,
+                                      const std::vector<void*>& outputs);
-                                                  const std::vector<std::shared_ptr<CallFrame>>&)>;
+            using EntryPoint = std::function<EntryPoint_t>;
            // Compile and execute graphs
            class CallFrame : public ngraph::runtime::CallFrame
            {
            public:
-                CallFrame(EntryPoint compiled_function,
+                CallFrame(EntryPoint compiled_function);
-                          size_t n_outputs,
-                          size_t n_inputs,
-                          const TensorViewPtrs& temps,
-                          const std::vector<std::shared_ptr<CallFrame>>& callees);
                /// @brief Invoke the function with values matching the signature of the function.
                ///
@@ -53,30 +50,13 @@ namespace ngraph
                    operator()(const std::vector<std::shared_ptr<ngraph::runtime::Value>>& inputs,
                               const std::vector<std::shared_ptr<ngraph::runtime::Value>>& outputs);
-                /// @brief Invoke the function with tuples pre-expanded to their underlying tensor views.
+                /// @brief Invoke the function with tuples pre-expanded to their underlying
-                void tensor_call(const TensorViewPtrs& inputs, const TensorViewPtrs& outputs);
+                /// tensor views.
+                void tensor_call(const std::vector<std::shared_ptr<TensorView>>& inputs,
-                void set_return() { m_return = true; }
+                                 const std::vector<std::shared_ptr<TensorView>>& outputs);
-                std::shared_ptr<TensorView> get_tensor_view(size_t i) { return m_tensor_views[i]; }
-                template <typename ET>
-                ParameterizedTensorView<ET>* get_parameterized_tensor_view(size_t i)
-                {
-                    return m_tensor_views[i]->get_parameterized_tensor_view<ET>();
-                }
-                template <typename ET>
-                typename ET::type* get_tensor_view_data(size_t i)
-                {
-                    return &get_parameterized_tensor_view<ET>(i)->get_vector()[0];
-                }
            protected:
-                size_t m_n_outputs;
-                size_t m_n_inputs;
-                TensorViewPtrs m_tensor_views;
-                bool m_return;
                EntryPoint m_compiled_function;
-                std::vector<std::shared_ptr<CallFrame>> m_callees;
            };
        }
    }

--- a/src/ngraph/runtime/cpu/cpu_backend.cpp
+++ b/src/ngraph/runtime/cpu/cpu_backend.cpp
@@ -13,12 +13,22 @@
 // ----------------------------------------------------------------------------
 #include "ngraph/runtime/cpu/cpu_backend.hpp"
+#include "ngraph/runtime/cpu/tensor_view.hpp"
 #include "ngraph/runtime/external_function.hpp"
-using namespace ngraph::runtime::cpu;
+using namespace ngraph;
+using namespace std;
-std::shared_ptr<ngraph::runtime::CallFrame>
+std::shared_ptr<ngraph::runtime::CallFrame> runtime::cpu::CPUBackend::make_call_frame(
-    CPUBackend::make_call_frame(const std::shared_ptr<ExternalFunction>& external_function)
+    const std::shared_ptr<ExternalFunction>& external_function)
 {
    return external_function->make_call_frame();
 }
+std::shared_ptr<ngraph::runtime::TensorView>
+    runtime::cpu::CPUBackend::make_primary_tensor_view(const ngraph::element::Type& element_type,
+                                                       const Shape& shape)
+{
+    auto rc = make_shared<runtime::cpu::CPUTensorView>(element_type, shape);
+    return dynamic_pointer_cast<runtime::TensorView>(rc);
+}
--- a/src/ngraph/runtime/cpu/cpu_backend.hpp
+++ b/src/ngraph/runtime/cpu/cpu_backend.hpp
@@ -22,11 +22,18 @@ namespace ngraph
    {
        namespace cpu
        {
-            class CPUBackend : public Backend
+            static size_t alignment = 64;
+            class CPUBackend : public runtime::Backend
            {
            public:
-                virtual std::shared_ptr<ngraph::runtime::CallFrame> make_call_frame(
+                std::shared_ptr<ngraph::runtime::CallFrame> make_call_frame(
-                    const std::shared_ptr<ngraph::runtime::ExternalFunction>& external_function);
+                    const std::shared_ptr<ngraph::runtime::ExternalFunction>& external_function)
+                    override;
+                std::shared_ptr<ngraph::runtime::TensorView>
+                    make_primary_tensor_view(const ngraph::element::Type& element_type,
+                                             const Shape& shape) override;
            };
        }
    }

--- a/src/ngraph/runtime/cpu/eigen_utils.hpp
+++ b/src/ngraph/runtime/cpu/eigen_utils.hpp
@@ -37,25 +37,24 @@ namespace ngraph
                using DynamicStrides = Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic>;
                using VectorStrides = Eigen::Stride<Eigen::Dynamic, 1>;
-                template <typename ET>
+                template <typename T>
-                using DynamicArray =
+                using DynamicArray = Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic>;
-                    Eigen::Array<typename ET::type, Eigen::Dynamic, Eigen::Dynamic>;
-                template <typename ET>
+                template <typename T>
-                using EigenArrayBase = Eigen::Map<DynamicArray<ET>, 0, DynamicStrides>;
+                using EigenArrayBase = Eigen::Map<DynamicArray<T>, 0, DynamicStrides>;
-                template <typename ET>
+                template <typename T>
-                using DynamicMatrix = Eigen::
+                using DynamicMatrix =
-                    Matrix<typename ET::type, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
+                    Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
-                template <typename ET>
+                template <typename T>
-                using EigenMatrixBase = Eigen::Map<DynamicMatrix<ET>, 0, DynamicStrides>;
+                using EigenMatrixBase = Eigen::Map<DynamicMatrix<T>, 0, DynamicStrides>;
-                template <typename ET>
+                template <typename T>
-                using DynamicVector = Eigen::Matrix<typename ET::type, Eigen::Dynamic, 1>;
+                using DynamicVector = Eigen::Matrix<T, Eigen::Dynamic, 1>;
-                template <typename ET>
+                template <typename T>
-                using EigenVectorBase = Eigen::Map<DynamicVector<ET>, 0, VectorStrides>;
+                using EigenVectorBase = Eigen::Map<DynamicVector<T>, 0, VectorStrides>;
                namespace fmt
                {
@@ -114,10 +113,10 @@ namespace ngraph
                    };
                }
-                // ET element type
+                // T element type
                // FMT array format (fmt::V for vector, etc.)
                // BASE select array/matrix
-                template <typename ET,
+                template <typename T,
                          typename FMT,
                          typename BASE,
                          typename STRIDES = DynamicStrides>
@@ -126,26 +125,19 @@ namespace ngraph
                    using base = BASE;
                public:
-                    EigenWrapper(typename ET::type* t, const FMT& fmt)
+                    EigenWrapper(T* t, const FMT& fmt)
                        : base(t, fmt.l0, fmt.l1, STRIDES(fmt.s0, fmt.s1))
                    {
                    }
                    EigenWrapper(
-                        typename ET::type* t,
+                        T* t,
                        const std::shared_ptr<ngraph::descriptor::layout::DenseTensorViewLayout>&
                            layout)
                        : base(t, layout->get_size(), 1, DynamicStrides(1, 1))
                    {
                    }
-                    EigenWrapper(CallFrame* call_frame, const TensorViewInfo& tensor_view_info)
-                        : EigenWrapper(
-                              call_frame->get_tensor_view_data<ET>(tensor_view_info.get_index()),
-                              FMT(tensor_view_info))
-                    {
-                    }
                    template <typename U>
                    EigenWrapper& operator=(const U& other)
                    {
@@ -154,17 +146,17 @@ namespace ngraph
                    }
                };
-                template <typename ET, typename FMT = fmt::V>
+                template <typename T, typename FMT = fmt::V>
-                using EigenArray1d = EigenWrapper<ET, FMT, EigenArrayBase<ET>>;
+                using EigenArray1d = EigenWrapper<T, FMT, EigenArrayBase<T>>;
-                template <typename ET, typename FMT = fmt::M>
+                template <typename T, typename FMT = fmt::M>
-                using EigenArray2d = EigenWrapper<ET, FMT, EigenArrayBase<ET>>;
+                using EigenArray2d = EigenWrapper<T, FMT, EigenArrayBase<T>>;
-                template <typename ET, typename FMT = fmt::M>
+                template <typename T, typename FMT = fmt::M>
-                using EigenMatrix = EigenWrapper<ET, FMT, EigenMatrixBase<ET>>;
+                using EigenMatrix = EigenWrapper<T, FMT, EigenMatrixBase<T>>;
-                template <typename ET, typename FMT = fmt::V>
+                template <typename T, typename FMT = fmt::V>
-                using EigenVector = EigenWrapper<ET, FMT, EigenVectorBase<ET>, VectorStrides>;
+                using EigenVector = EigenWrapper<T, FMT, EigenVectorBase<T>, VectorStrides>;
            }
        }
    }

--- a/src/ngraph/runtime/cpu/emitter.cpp
+++ b/src/ngraph/runtime/cpu/emitter.cpp
--- a/src/ngraph/runtime/cpu/emitter.hpp
+++ b/src/ngraph/runtime/cpu/emitter.hpp
@@ -17,6 +17,7 @@
 #include <string>
 #include <vector>
+#include "ngraph/codegen/code_writer.hpp"
 #include "ngraph/node.hpp"
 #include "ngraph/runtime/cpu/external_function.hpp"
 #include "ngraph/runtime/tensor_view_info.hpp"
@@ -24,7 +25,6 @@
 #define EMITTER_DECL(E)                                                                            \
    E(const ngraph::Node* n,                                                                       \
      ExternalFunction* ef,                                                                        \
-      FunctionMap& function_map,                                                                   \
      const std::vector<TensorViewInfo>& inputs,                                                   \
      const std::vector<TensorViewInfo>& outputs)
@@ -37,14 +37,15 @@ namespace ngraph
            class Emitter
            {
            protected:
-                std::string TU;
+                codegen::CodeWriter TU;
            public:
                Emitter()
-                    : TU("")
+                    : TU()
                {
                }
-                std::string& GetTU() { return TU; }
+                std::string get_code() { return TU.get_code(); }
+                codegen::CodeWriter& get_code_writer() { return TU; }
                void EMITTER_DECL(EmitNop);
                void EMITTER_DECL(EmitAdd);
                void EMITTER_DECL(EmitDot);

--- a/src/ngraph/runtime/cpu/external_function.cpp
+++ b/src/ngraph/runtime/cpu/external_function.cpp
@@ -20,10 +20,12 @@
 #include <typeinfo>
 #include <unordered_map>
+#include "ngraph/codegen/code_writer.hpp"
 #include "ngraph/codegen/compiler.hpp"
 #include "ngraph/descriptor/input.hpp"
 #include "ngraph/descriptor/layout/dense_tensor_view_layout.hpp"
 #include "ngraph/descriptor/output.hpp"
+#include "ngraph/file_util.hpp"
 #include "ngraph/function.hpp"
 #include "ngraph/node.hpp"
 #include "ngraph/ops/abs.hpp"
@@ -66,7 +68,10 @@
 #include "ngraph/ops/tanh.hpp"
 #include "ngraph/ops/tuple.hpp"
 #include "ngraph/pass/assign_layout.hpp"
+#include "ngraph/pass/dump_sorted.hpp"
+#include "ngraph/pass/liveness.hpp"
 #include "ngraph/pass/manager.hpp"
+#include "ngraph/pass/memory_layout.hpp"
 #include "ngraph/pass/topological_sort.hpp"
 #include "ngraph/runtime/cpu/call_frame.hpp"
 #include "ngraph/runtime/cpu/emitter.hpp"
@@ -78,6 +83,25 @@ using namespace ngraph::runtime::cpu;
 using ngraph::descriptor::layout::DenseTensorViewLayout;
+extern "C" void
+    allocate_aligned_buffer(size_t size, size_t alignment, char** allocated, char** aligned_ptr)
+{
+    size_t allocation_size = size + alignment;
+    *allocated = static_cast<char*>(malloc(allocation_size));
+    *aligned_ptr = *allocated;
+    size_t mod = size_t(*aligned_ptr) % alignment;
+    if (mod != 0)
+    {
+        (*aligned_ptr) += (alignment - mod);
+    }
+}
+extern "C" void free_aligned_buffer(void* allocated)
+{
+    free(allocated);
+}
 #define TI(x) type_index(typeid(x))
 static const OpMap dispatcher{
@@ -139,16 +163,14 @@ static const OpMap dispatcher{
    {TI(ngraph::op::Atan), &Emitter::EmitAtan},
 };
-#undef TI
 ExternalFunction::ExternalFunction(const std::shared_ptr<ngraph::Function>& function,
                                   bool release_function)
    : ngraph::runtime::ExternalFunction(function, release_function)
-    , compiled_function(nullptr)
+    , m_compiled_function(nullptr)
 {
 }
-void ExternalFunction::compile(FunctionMap& function_map)
+void ExternalFunction::compile()
 {
    if (m_is_compiled)
    {
@@ -159,51 +181,17 @@ void ExternalFunction::compile(FunctionMap& function_map)
    pass_manager.register_pass<pass::TopologicalSort>();
    // For now, just make everyone row-major.
    pass_manager.register_pass<pass::AssignLayout<DenseTensorViewLayout>>();
+    pass_manager.register_pass<pass::Liveness>();
+    pass_manager.register_pass<pass::MemoryLayout>(64);
+    pass_manager.register_pass<pass::DumpSorted>("sorted_ops.txt");
    pass_manager.run_passes(m_function);
-    // Determine tensor requirements for the call frame
-    unordered_map<shared_ptr<ngraph::descriptor::TensorView>, size_t> tensor_index;
-    // First come the function outputs
-    for (const descriptor::Output& output : m_function->get_result()->get_outputs())
-    {
-        auto tv = output.get_tensor_view();
-        size_t index = tensor_index.size();
-        tensor_index[tv] = index;
-    }
-    m_n_outputs = tensor_index.size();
-    // Next are the function inputs
-    for (auto param : m_function->get_parameters())
-    {
-        for (const descriptor::Output& output : param->get_outputs())
-        {
-            auto tv = output.get_tensor_view();
-            size_t index = tensor_index.size();
-            tensor_index[tv] = index;
-        }
-    }
-    m_n_inputs = tensor_index.size() - m_n_outputs;
-    // All remaining tensor views
-    for (shared_ptr<Node> node : m_function->get_ordered_ops())
-    {
-        for (const descriptor::Output& output : node->get_outputs())
-        {
-            auto tv = output.get_tensor_view();
-            if (0 == tensor_index.count(tv))
-            {
-                size_t index = tensor_index.size();
-                tensor_index[tv] = index;
-                m_temp_views.push_back(tv);
-            }
-        }
-    }
    // Now we build the TU
    Emitter emitter;
-    auto& TU = emitter.GetTU();
+    codegen::CodeWriter& TU = emitter.get_code_writer();
-    TU += R"(// Generated by the NGraph CPU backend
+    TU +=
+        R"(// Generated by the NGraph CPU backend
 #include <algorithm>
 #include <cmath>
 #include <memory>
@@ -221,44 +209,130 @@ using namespace ngraph::element;
 using namespace ngraph::runtime;
 using namespace ngraph::runtime::cpu::eigen;
+extern "C" void allocate_aligned_buffer(
+    size_t size,
+    size_t alignment,
+    char** allocated,
+    char** aligned_ptr);
+extern "C" void free_aligned_buffer(void* allocated);
-extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
-                             ngraph::runtime::TensorViewPtrs& tensor_views,
-                             const std::vector<std::shared_ptr<ngraph::runtime::cpu::CallFrame>>& callees)
-{
 )";
-    for (shared_ptr<Node> node : m_function->get_ordered_ops())
+    TU << "// Declare any functions that are not main\n";
+    for (shared_ptr<Function> f : pass_manager.get_state().get_functions())
+    {
+        if (f != m_function)
+        {
+            TU << "extern \"C\" void " << f->get_name() << "(\n";
+            TU << "    const std::vector<void*>& inputs,\n";
+            TU << "    const std::vector<void*>& outputs);\n";
+        }
+    }
+    TU << "\n";
+    for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
    {
-        auto& n = *node; // Work around a compiler warning (*node inside typeid may have effects
+        TU << "extern \"C\" void " << current_function->get_name() << "(\n";
-                         // with shared pointers, which is fine here but clang doesn't like it.)
+        TU << "    const std::vector<void*>& inputs,\n";
-        auto handler = dispatcher.find(type_index(typeid(n)));
+        TU << "    const std::vector<void*>& outputs)\n";
-        if (handler == dispatcher.end())
+        TU << "{\n";
+        TU.indent++;
+        TU << "// Allocate the memory pool\n";
+        size_t temp_pool_size = pass_manager.get_state().get_temporary_pool_size();
+        TU << "char* allocated_buffer_pool;\n";
+        TU << "char* aligned_buffer_pool;\n";
+        TU << "allocate_aligned_buffer(" << temp_pool_size << ", 64"
+           << ", &allocated_buffer_pool, &aligned_buffer_pool);\n";
+        TU << "\n";
+        TU << "// Define temporary tensors\n";
+        for (shared_ptr<Node> node : current_function->get_ordered_ops())
        {
-            throw ngraph_error("Unhandled op during code generation : " + node->description());
+            for (descriptor::Tensor* tensor : node->liveness_new_list)
+            {
+                TU << tensor->get_element_type() << "* " << tensor->get_name() << " = ("
+                   << tensor->get_element_type() << "*)(aligned_buffer_pool + "
+                   << tensor->get_pool_offset() << ");\n";
+            }
        }
-        std::vector<TensorViewInfo> in;
+        TU << "\n";
-        for (const descriptor::Input& input : node->get_inputs())
+        TU << "// Define inputs\n";
+        size_t arg_index = 0;
+        for (shared_ptr<op::Parameter> param : current_function->get_parameters())
        {
-            const descriptor::Output& output = input.get_output();
+            for (const descriptor::Output& output : param->get_outputs())
-            auto tv = output.get_tensor_view();
+            {
-            in.push_back({tensor_index.at(tv), tv});
+                shared_ptr<descriptor::TensorView> tv = output.get_tensor_view();
+                const element::Type& et = tv->get_tensor_view_type()->get_element_type();
+                string type = et.c_type_string();
+                TU << "" << type << "* " << tv->get_tensor().get_name() << " = static_cast<" << type
+                   << "*>(inputs[" << arg_index << "]);\n";
+                arg_index++;
+            }
        }
-        std::vector<TensorViewInfo> out;
+        TU << "\n";
-        for (const descriptor::Output& output : node->get_outputs())
+        TU << "// Define outputs\n";
+        size_t output_index = 0;
+        for (const descriptor::Output& output : current_function->get_result()->get_outputs())
        {
-            auto tv = output.get_tensor_view();
+            shared_ptr<descriptor::TensorView> tv = output.get_tensor_view();
-            out.push_back({tensor_index.at(tv), tv});
+            const element::Type& et = tv->get_tensor_view_type()->get_element_type();
+            string type = et.c_type_string();
+            TU << type << "* " << tv->get_tensor().get_name() << " = static_cast<" << type
+               << "*>(outputs[" << output_index << "]);\n";
+            output_index++;
        }
-        handler->second(&emitter, node.get(), this, function_map, in, out);
+        TU << "\n";
-    }
+        TU << "// Define tensor views\n";
+        TU << "\n";
+        for (shared_ptr<Node> node : current_function->get_ordered_ops())
+        {
+            auto& n = *node; // Work around a compiler warning (*node inside typeid may have effects
+                             // with shared pointers, which is fine here but clang doesn't like it.)
+            auto handler = dispatcher.find(type_index(typeid(n)));
+            if (handler == dispatcher.end())
+            {
+                throw ngraph_error("Unhandled op during code generation : " + node->description());
+            }
+            std::vector<TensorViewInfo> in;
+            for (const descriptor::Input& input : node->get_inputs())
+            {
+                const descriptor::Output& output = input.get_output();
+                auto tv = output.get_tensor_view();
+                in.push_back({0, tv});
+            }
+            std::vector<TensorViewInfo> out;
+            for (const descriptor::Output& output : node->get_outputs())
+            {
+                auto tv = output.get_tensor_view();
+                out.push_back({0, tv});
+            }
+            handler->second(&emitter, node.get(), this, in, out);
+        }
+        TU << "\nfree_aligned_buffer(allocated_buffer_pool);\n";
-    // End TU
+        TU.indent--;
-    TU += "}\n";
+        // End TU
+        TU += "}\n\n";
+    }
    // TODO: Cleanup and make this a utility function
-    ofstream out("__ngcpu_codegen.cpp");
-    out << TU;
+    string output_dir = "cpu_codegen";
+    string function_name = m_function->get_name();
+    file_util::make_directory(output_dir);
+    string filename = file_util::path_join(output_dir, function_name + "_codegen.cpp");
+    ofstream out(filename);
+    string code = TU.get_code();
+    out << code;
    out.close();
    ngraph::codegen::execution_state estate;
@@ -271,15 +345,15 @@ extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
    estate.set_debuginfo_enabled(true);
 #endif
-    auto llvm_module = estate.compile(TU, "__ngcpu_codegen.cpp");
+    auto llvm_module = estate.compile(code, function_name + "_codegen.cpp");
-    assert(llvm_module);
+    if (llvm_module == nullptr)
+    {
+        throw runtime_error("function failed to compile");
+    }
    estate.add_module(llvm_module);
    estate.finalize();
-    compiled_function =
+    m_compiled_function = estate.find_function<EntryPoint_t>(function_name);
-        estate.find_function<void(ngraph::runtime::cpu::CallFrame*,
+    assert(m_compiled_function);
-                                  ngraph::runtime::TensorViewPtrs&,
-                                  const std::vector<std::shared_ptr<CallFrame>>&)>("__entrypoint");
-    assert(compiled_function);
    m_is_compiled = true;
    if (m_release_function)
@@ -288,73 +362,12 @@ extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
    }
 }
-// Suppress Clang's complaints about the ,##__VA_ARGS__ token-pasting hack, which is a GNU extension
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments"
-#define DO_ON_ELEMENT_TYPE(et, err_msg, macro, ...)                                                \
-    {                                                                                              \
-        if (et == element::Bool::element_type())                                                   \
-        {                                                                                          \
-            macro(element::Bool, ##__VA_ARGS__);                                                   \
-        }                                                                                          \
-        else if (et == element::Float32::element_type())                                           \
-        {                                                                                          \
-            macro(element::Float32, ##__VA_ARGS__);                                                \
-        }                                                                                          \
-        else if (et == element::Int8::element_type())                                              \
-        {                                                                                          \
-            macro(element::Int8, ##__VA_ARGS__);                                                   \
-        }                                                                                          \
-        else if (et == element::Int32::element_type())                                             \
-        {                                                                                          \
-            macro(element::Int32, ##__VA_ARGS__);                                                  \
-        }                                                                                          \
-        else if (et == element::Int64::element_type())                                             \
-        {                                                                                          \
-            macro(element::Int64, ##__VA_ARGS__);                                                  \
-        }                                                                                          \
-        else if (et == element::UInt8::element_type())                                             \
-        {                                                                                          \
-            macro(element::UInt8, ##__VA_ARGS__);                                                  \
-        }                                                                                          \
-        else if (et == element::UInt32::element_type())                                            \
-        {                                                                                          \
-            macro(element::UInt32, ##__VA_ARGS__);                                                 \
-        }                                                                                          \
-        else if (et == element::UInt64::element_type())                                            \
-        {                                                                                          \
-            macro(element::UInt64, ##__VA_ARGS__);                                                 \
-        }                                                                                          \
-        else                                                                                       \
-        {                                                                                          \
-            throw ngraph_error(err_msg);                                                           \
-        }                                                                                          \
-    }
-// Turn off complaint suppression (see above)
-#pragma clang diagnostic pop
 shared_ptr<ngraph::runtime::CallFrame> ExternalFunction::make_call_frame()
 {
-    FunctionMap function_map;
    if (!m_is_compiled)
    {
-        compile(function_map);
+        compile();
    }
-    std::vector<std::shared_ptr<ngraph::runtime::TensorView>> temps;
+    return make_shared<ngraph::runtime::cpu::CallFrame>(m_compiled_function);
-    for (auto tv : m_temp_views)
-    {
-        auto& et = tv->get_tensor_view_type()->get_element_type();
-        auto shape = tv->get_tensor_view_type()->get_shape();
-#define M(T) temps.push_back(ngraph::runtime::make_tensor<T>(shape));
-        DO_ON_ELEMENT_TYPE(
-            et, "Internal error: tried to create temporary for unhandled element type", M);
-#undef M
-    }
-    return make_shared<ngraph::runtime::cpu::CallFrame>(
-        compiled_function, m_n_outputs, m_n_inputs, temps, callees);
 }
--- a/src/ngraph/runtime/cpu/external_function.hpp
+++ b/src/ngraph/runtime/cpu/external_function.hpp
@@ -22,6 +22,7 @@
 #include "ngraph/codegen/compiler.hpp"
 #include "ngraph/function.hpp"
+#include "ngraph/runtime/cpu/call_frame.hpp"
 #include "ngraph/runtime/external_function.hpp"
 #include "ngraph/runtime/tensor_view_info.hpp"
@@ -41,32 +42,22 @@ namespace ngraph
            using OpFunction = std::function<void(Emitter*,
                                                  const ngraph::Node*,
                                                  ExternalFunction*,
-                                                  FunctionMap&,
                                                  const std::vector<TensorViewInfo>& inputs,
                                                  const std::vector<TensorViewInfo>& outputs)>;
            using OpMap = std::unordered_map<std::type_index, OpFunction>;
-            using EntryPoint = std::function<void(
-                ngraph::runtime::cpu::CallFrame*,
-                ngraph::runtime::TensorViewPtrs&,
-                const std::vector<std::shared_ptr<ngraph::runtime::cpu::CallFrame>>&)>;
            class ExternalFunction : public ngraph::runtime::ExternalFunction
            {
            public:
                ExternalFunction(const std::shared_ptr<ngraph::Function>& function,
                                 bool release_function = true);
                std::shared_ptr<ngraph::runtime::CallFrame> make_call_frame();
-                std::vector<std::shared_ptr<CallFrame>>& get_callees() { return callees; }
            protected:
-                void compile(FunctionMap& function_map);
+                void compile();
-                size_t m_n_inputs;
+                EntryPoint m_compiled_function;
-                size_t m_n_outputs;
-                ngraph::descriptor::TensorViewPtrs m_temp_views;
-                EntryPoint compiled_function;
-                std::vector<std::shared_ptr<CallFrame>> callees;
            };
        }
    }

--- a/src/ngraph/runtime/cpu/tensor_view.cpp
+++ b/src/ngraph/runtime/cpu/tensor_view.cpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+#include <memory>
+#include "cpu_backend.hpp"
+#include "tensor_view.hpp"
+using namespace ngraph;
+using namespace std;
+extern "C" void
+    allocate_aligned_buffer(size_t size, size_t alignment, char** allocated, char** aligned_ptr);
+extern "C" void free_aligned_buffer(void* allocated);
+runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_type,
+                                           const Shape& shape)
+    : runtime::TensorView(std::make_shared<ngraph::descriptor::PrimaryTensorView>(
+          std::make_shared<ngraph::TensorViewType>(element_type, shape), "external", true, true))
+{
+    m_descriptor->set_tensor_view_layout(
+        std::make_shared<ngraph::descriptor::layout::DenseTensorViewLayout>(*m_descriptor));
+    m_buffer_size = m_descriptor->get_tensor_view_layout()->get_size() * element_type.size();
+    allocate_aligned_buffer(m_buffer_size, runtime::cpu::alignment, &m_allocated, &m_buffer);
+}
+runtime::cpu::CPUTensorView::~CPUTensorView()
+{
+    free_aligned_buffer(m_allocated);
+}
+char* runtime::cpu::CPUTensorView::get_data_ptr()
+{
+    return m_buffer;
+}
+const char* runtime::cpu::CPUTensorView::get_data_ptr() const
+{
+    return m_buffer;
+}
+void runtime::cpu::CPUTensorView::write(const void* source, size_t tensor_offset, size_t n)
+{
+    if (tensor_offset + n > m_buffer_size)
+    {
+        throw out_of_range("write access past end of tensor");
+    }
+    char* target = get_data_ptr();
+    memcpy(&target[tensor_offset], source, n);
+}
+void runtime::cpu::CPUTensorView::read(void* target, size_t tensor_offset, size_t n) const
+{
+    if (tensor_offset + n > m_buffer_size)
+    {
+        throw out_of_range("read access past end of tensor");
+    }
+    const char* source = get_data_ptr();
+    memcpy(target, &source[tensor_offset], n);
+}
--- a/src/ngraph/runtime/cpu/tensor_view.hpp
+++ b/src/ngraph/runtime/cpu/tensor_view.hpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+#pragma once
+#include <memory>
+#include "ngraph/runtime/tensor_view.hpp"
+#include "ngraph/types/element_type.hpp"
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            class CPUTensorView;
+        }
+    }
+}
+class ngraph::runtime::cpu::CPUTensorView : public ngraph::runtime::TensorView
+{
+public:
+    CPUTensorView(const ngraph::element::Type& element_type, const Shape& shape);
+    virtual ~CPUTensorView();
+    char* get_data_ptr();
+    const char* get_data_ptr() const;
+    /// @brief Write bytes directly into the tensor
+    /// @param p Pointer to source of data
+    /// @param tensor_offset Offset into tensor storage to begin writing. Must be element-aligned.
+    /// @param n Number of bytes to write, must be integral number of elements.
+    void write(const void* p, size_t tensor_offset, size_t n) override;
+    /// @brief Read bytes directly from the tensor
+    /// @param p Pointer to destination for data
+    /// @param tensor_offset Offset into tensor storage to begin reading. Must be element-aligned.
+    /// @param n Number of bytes to read, must be integral number of elements.
+    void read(void* p, size_t tensor_offset, size_t n) const override;
+private:
+    char* m_allocated;
+    char* m_buffer;
+    size_t m_buffer_size;
+};
--- a/src/ngraph/runtime/ndarray.hpp
+++ b/src/ngraph/runtime/ndarray.hpp
@@ -179,6 +179,8 @@ namespace ngraph
            const vtype get_vector() const { return m_elements; }
            operator const vtype() const { return m_elements; }
            operator vtype() { return m_elements; }
+            void* data() { return m_elements.data(); }
+            const void* data() const { return m_elements.data(); }
            bool operator==(const NDArrayBase<T>& other) const
            {
                return m_shape == other.m_shape && m_elements == other.m_elements;

--- a/src/ngraph/runtime/tensor_view.hpp
+++ b/src/ngraph/runtime/tensor_view.hpp
@@ -18,8 +18,11 @@
 #include <vector>
 #include "ngraph/descriptor/tensor_view.hpp"
+#include "ngraph/log.hpp"
+#include "ngraph/runtime/ndarray.hpp"
 #include "ngraph/runtime/value.hpp"
 #include "ngraph/shape.hpp"
+#include "ngraph/util.hpp"
 namespace ngraph
 {
@@ -42,7 +45,6 @@ namespace ngraph
            }
        public:
-            TensorView() {}
            virtual ~TensorView() {}
            template <typename ET>
            ParameterizedTensorView<ET>* get_parameterized_tensor_view()
@@ -75,6 +77,29 @@ namespace ngraph
            /// @param n Number of bytes to read, must be integral number of elements.
            virtual void read(void* p, size_t tensor_offset, size_t n) const = 0;
+            // This is for unit test only
+            template <typename T>
+            bool operator==(const NDArrayBase<T>& ndarray) const
+            {
+                bool rc = false;
+                if (get_shape() == ndarray.get_shape())
+                {
+                    std::vector<T> lhs(ndarray.get_vector().size());
+                    read(lhs.data(), 0, ndarray.get_vector().size() * sizeof(T));
+                    rc = (lhs == ndarray.get_vector());
+                }
+                return rc;
+            }
+            template <typename T>
+            std::vector<T> get_vector()
+            {
+                size_t element_count = shape_size(get_shape());
+                size_t size = element_count * sizeof(T);
+                std::vector<T> rc(element_count);
+                read(rc.data(), 0, size);
+                return rc;
+            }
        protected:
            std::shared_ptr<ngraph::descriptor::TensorView> m_descriptor;
        };

--- a/src/ngraph/runtime/tensor_view_info.hpp
+++ b/src/ngraph/runtime/tensor_view_info.hpp
@@ -27,9 +27,10 @@ namespace ngraph
        {
        public:
            TensorViewInfo(size_t index,
-                           const std::shared_ptr<const ngraph::descriptor::TensorView>& descriptor)
+                           std::shared_ptr<const ngraph::descriptor::TensorView> descriptor)
                : m_index(index)
                , m_layout(descriptor->get_tensor_view_layout())
+                , m_tensor_view(descriptor)
            {
            }
@@ -46,9 +47,20 @@ namespace ngraph
                return std::static_pointer_cast<LT>(m_layout);
            }
+            std::shared_ptr<const ngraph::descriptor::TensorView> get_tensor_view() const
+            {
+                return m_tensor_view;
+            }
+            const ngraph::descriptor::Tensor& get_tensor() const
+            {
+                return m_tensor_view->get_tensor();
+            }
        protected:
            size_t m_index;
            std::shared_ptr<ngraph::descriptor::layout::TensorViewLayout> m_layout;
+            std::shared_ptr<const ngraph::descriptor::TensorView> m_tensor_view;
        };
    }
 }
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -26,6 +26,7 @@ set (SRC
    copy.cpp
    build_graph.cpp
    eigen.cpp
+    file_util.cpp
    input_output_assign.cpp
    main.cpp
    op.cpp

--- a/test/backend_test.in.cpp
+++ b/test/backend_test.in.cpp
--- a/test/file_util.cpp
+++ b/test/file_util.cpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+#include <random>
+#include <sstream>
+#include <string>
+#include <vector>
+#include "gtest/gtest.h"
+#include "ngraph/file_util.hpp"
+using namespace std;
+using namespace ngraph;
+TEST(file_util, path_join)
+{
+    {
+        string s1 = "";
+        string s2 = "";
+        EXPECT_STREQ("", file_util::path_join(s1, s2).c_str());
+    }
+    {
+        string s1 = "";
+        string s2 = "/test1/test2";
+        EXPECT_STREQ("/test1/test2", file_util::path_join(s1, s2).c_str());
+    }
+    {
+        string s1 = "";
+        string s2 = "/test1/test2/";
+        EXPECT_STREQ("/test1/test2/", file_util::path_join(s1, s2).c_str());
+    }
+    {
+        string s1 = "";
+        string s2 = "test1/test2";
+        EXPECT_STREQ("test1/test2", file_util::path_join(s1, s2).c_str());
+    }
+    {
+        string s1 = "/x1/x2";
+        string s2 = "";
+        EXPECT_STREQ("/x1/x2", file_util::path_join(s1, s2).c_str());
+    }
+    {
+        string s1 = "/x1/x2/";
+        string s2 = "/";
+        EXPECT_STREQ("/", file_util::path_join(s1, s2).c_str());
+    }
+    {
+        string s1 = "/x1/x2";
+        string s2 = "/test1/test2";
+        EXPECT_STREQ("/test1/test2", file_util::path_join(s1, s2).c_str());
+    }
+    {
+        string s1 = "/x1/x2/";
+        string s2 = "test1/test2";
+        EXPECT_STREQ("/x1/x2/test1/test2", file_util::path_join(s1, s2).c_str());
+    }
+    {
+        string s1 = "/x1/x2";
+        string s2 = "test1/test2";
+        EXPECT_STREQ("/x1/x2/test1/test2", file_util::path_join(s1, s2).c_str());
+    }
+    {
+        string s1 = "/";
+        string s2 = "test1/test2";
+        EXPECT_STREQ("/test1/test2", file_util::path_join(s1, s2).c_str());
+    }
+}
+TEST(file_util, get_temp_directory)
+{
+    string tmp = file_util::get_temp_directory();
+    EXPECT_NE(0, tmp.size());
+}
--- a/test/tensor.cpp
+++ b/test/tensor.cpp
@@ -113,3 +113,24 @@ TEST(tensor, read_write)
    test_read_write<element::Float32>({1.0, 3.0, 5.0});
    test_read_write<element::Int64>({-1, 2, 4});
 }
+TEST(tensor, output_flag)
+{
+    pass::Manager pass_manager;
+    pass_manager.register_pass<pass::TopologicalSort>();
+    pass_manager.register_pass<pass::Liveness>();
+    auto arg0 = make_shared<op::Parameter>(element::Float32::element_type(), Shape{1});
+    auto add = make_shared<op::Add>(arg0, arg0);
+    auto rt = make_shared<TensorViewType>(element::Float32::element_type(), Shape{1});
+    auto f0 = make_shared<Function>(add, rt, op::Parameters{arg0});
+    pass_manager.run_passes(f0);
+    EXPECT_TRUE(f0->get_result()->is_output());
+    for (descriptor::Output& output : f0->get_result()->get_outputs())
+    {
+        const Tensor& t = output.get_tensor();
+        EXPECT_TRUE(t.is_output());
+    }
+}
--- a/test/topological_sort.cpp
+++ b/test/topological_sort.cpp
@@ -21,7 +21,6 @@
 #include "ngraph/log.hpp"
 #include "ngraph/ngraph.hpp"
-#include "ngraph/pass/collect_functions.hpp"
 #include "ngraph/pass/dump_sorted.hpp"
 #include "ngraph/pass/manager.hpp"
 #include "ngraph/pass/topological_sort.hpp"
@@ -172,11 +171,11 @@ TEST(topological_sort, collect_functions)
                                   "h");
    pass::Manager pass_manager;
-    pass_manager.register_pass<pass::CollectFunctions>();
    pass_manager.run_passes(h);
    set<string> expected = {"f", "g", "h"};
    auto functions = pass_manager.get_state().get_functions();
    vector<string> fnames;
    for (shared_ptr<Function> func : functions)
    {