Merge branch 'master' into drwebb/cudnn_hello_world

33840ab0 · Robert Kimball · GitHub · 8af5a886 · 1d919bfc · 33840ab0
Unverified Commit 33840ab0 authored Nov 26, 2017 by Robert Kimball Committed by GitHub Nov 26, 2017
51 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -43,6 +43,9 @@ endif()
 set(CMAKE_DISABLE_SOURCE_CHANGES ON)
 set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)

+# Create compilation database compile_commands.json
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
 # set directory where the custom finders live
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")


--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -11,5 +11,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIN_NGRAPH_LIBRARY")
+
 add_subdirectory( ngraph )

--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -45,6 +45,7 @@ set (SRC
    ops/negative.cpp
    ops/op.cpp
    ops/parameter.cpp
+    ops/power.cpp
    ops/reduce.cpp
    ops/reshape.cpp
    ops/select.cpp

--- a/src/ngraph/codegen/compiler.cpp
+++ b/src/ngraph/codegen/compiler.cpp
@@ -82,7 +82,7 @@ void Compiler::set_precompiled_header_source(const std::string& source)
 std::unique_ptr<llvm::Module> Compiler::compile(const std::string& source)
 {
    lock_guard<mutex> lock(m_mutex);
-    return s_static_compiler.compile(source);
+    return s_static_compiler.compile(compiler_action, source);
 }

 static std::string GetExecutablePath(const char* Argv0)
@@ -112,11 +112,10 @@ StaticCompiler::StaticCompiler()
    args.push_back(m_source_name.c_str());

    // Prepare DiagnosticEngine
-    DiagnosticOptions DiagOpts;
-    TextDiagnosticPrinter* textDiagPrinter = new clang::TextDiagnosticPrinter(errs(), &DiagOpts);
-    IntrusiveRefCntPtr<clang::DiagnosticIDs> pDiagIDs;
-    DiagnosticsEngine* pDiagnosticsEngine =
-        new DiagnosticsEngine(pDiagIDs, &DiagOpts, textDiagPrinter);
+    IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
+    TextDiagnosticPrinter* textDiagPrinter = new clang::TextDiagnosticPrinter(errs(), &*DiagOpts);
+    IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+    DiagnosticsEngine DiagEngine(DiagID, &*DiagOpts, textDiagPrinter);

    // Create and initialize CompilerInstance
    m_compiler = std::unique_ptr<CompilerInstance>(new CompilerInstance());
@@ -124,7 +123,7 @@ StaticCompiler::StaticCompiler()

    // Initialize CompilerInvocation
    CompilerInvocation::CreateFromArgs(
-        m_compiler->getInvocation(), &args[0], &args[0] + args.size(), *pDiagnosticsEngine);
+        m_compiler->getInvocation(), &args[0], &args[0] + args.size(), DiagEngine);

    // Infer the builtin include path if unspecified.
    if (m_compiler->getHeaderSearchOpts().UseBuiltinIncludes &&
@@ -209,7 +208,7 @@ StaticCompiler::StaticCompiler()
    CGO.OmitLeafFramePointer = 1;
    CGO.VectorizeLoop = 1;
    CGO.VectorizeSLP = 1;
-    CGO.CXAAtExit = 0;
+    CGO.CXAAtExit = 1;

    if (m_debuginfo_enabled)
    {
@@ -305,7 +304,9 @@ void StaticCompiler::use_cached_files()
    }
 }

-std::unique_ptr<llvm::Module> StaticCompiler::compile(const string& source)
+std::unique_ptr<llvm::Module>
+    StaticCompiler::compile(std::unique_ptr<clang::CodeGenAction>& compiler_action,
+                            const string& source)
 {
    if (!m_precompiled_header_valid && m_precomiled_header_source.empty() == false)
    {
@@ -325,11 +326,11 @@ std::unique_ptr<llvm::Module> StaticCompiler::compile(const string& source)
    m_compiler->getInvocation().getPreprocessorOpts().addRemappedFile(m_source_name, buffer.get());

    // Create and execute action
-    CodeGenAction* compilerAction = new EmitCodeGenOnlyAction();
+    compiler_action.reset(new EmitCodeGenOnlyAction());
    std::unique_ptr<llvm::Module> rc;
-    if (m_compiler->ExecuteAction(*compilerAction) == true)
+    if (m_compiler->ExecuteAction(*compiler_action) == true)
    {
-        rc = compilerAction->takeModule();
+        rc = compiler_action->takeModule();
    }

    buffer.release();
@@ -359,4 +360,5 @@ void StaticCompiler::generate_pch(const string& source)
    buffer.release();

    m_compiler->getInvocation().getPreprocessorOpts().clearRemappedFiles();
+    delete compilerAction;
 }
--- a/src/ngraph/codegen/compiler.hpp
+++ b/src/ngraph/codegen/compiler.hpp
@@ -18,6 +18,8 @@
 #include <memory>
 #include <string>

+#include <clang/CodeGen/CodeGenAction.h>
+
 #include <llvm/ExecutionEngine/MCJIT.h> // forces JIT to link in
 #include <llvm/ExecutionEngine/SectionMemoryManager.h>
 #include <llvm/Option/Arg.h>
@@ -53,8 +55,9 @@ public:
    ~Compiler();
    void set_precompiled_header_source(const std::string& source);
    std::unique_ptr<llvm::Module> compile(const std::string& source);
-
+    std::unique_ptr<clang::CodeGenAction>& get_compiler_action() { return compiler_action; }
 private:
+    std::unique_ptr<clang::CodeGenAction> compiler_action;
 };

 class ngraph::codegen::StaticCompiler : public llvm::SectionMemoryManager
@@ -70,7 +73,9 @@ public:
        m_precomiled_header_source = source;
    }
    void add_header_search_path(const std::string& path);
-    std::unique_ptr<llvm::Module> compile(const std::string& source);
+
+    std::unique_ptr<llvm::Module> compile(std::unique_ptr<clang::CodeGenAction>& compiler_action,
+                                          const std::string& source);
    void generate_pch(const std::string& source);

 private:

--- a/src/ngraph/codegen/execution_engine.cpp
+++ b/src/ngraph/codegen/execution_engine.cpp
@@ -34,11 +34,11 @@ bool codegen::ExecutionEngine::add_module(std::unique_ptr<llvm::Module>& module)
    {
        if (!m_execution_engine)
        {
-            m_execution_engine = llvm::EngineBuilder(move(module))
-                                     .setEngineKind(llvm::EngineKind::JIT)
-                                     .setOptLevel(llvm::CodeGenOpt::Aggressive)
-                                     .setErrorStr(&m_jit_error)
-                                     .create();
+            m_execution_engine.reset(llvm::EngineBuilder(move(module))
+                                         .setEngineKind(llvm::EngineKind::JIT)
+                                         .setOptLevel(llvm::CodeGenOpt::Aggressive)
+                                         .setErrorStr(&m_jit_error)
+                                         .create());

            if (!m_execution_engine)
            {

--- a/src/ngraph/codegen/execution_engine.hpp
+++ b/src/ngraph/codegen/execution_engine.hpp
@@ -46,7 +46,7 @@ public:
    }

 private:
-    llvm::ExecutionEngine* m_execution_engine;
+    std::unique_ptr<llvm::ExecutionEngine> m_execution_engine;
    std::string m_jit_error;

    template <typename signature>

--- a/src/ngraph/descriptor/tensor.cpp
+++ b/src/ngraph/descriptor/tensor.cpp
@@ -69,6 +69,18 @@ size_t Tensor::get_pool_offset() const

 std::ostream& operator<<(std::ostream& out, const Tensor& tensor)
 {
-    out << "Tensor(" << tensor.get_name() << ")";
+    out << "Tensor(" << tensor.get_name() << ", ";
+    out << (tensor.is_persistent() ? "P" : "");
+    out << (tensor.is_constant() ? "C" : "");
+    out << (tensor.is_input() ? "I" : "");
+    out << (tensor.is_output() ? "O" : "");
+
+    if (!tensor.is_persistent() && !tensor.is_constant() && !tensor.is_input() &&
+        !tensor.is_output())
+    {
+        out << "T";
+    }
+
+    out << ")";
    return out;
 }
--- a/src/ngraph/function.cpp
+++ b/src/ngraph/function.cpp
@@ -31,9 +31,9 @@ Function::Function(const std::shared_ptr<Node>& result,
    , m_name(name)
    , m_result_type(result_type)
    , m_ordered_ops_valid(false)
+    , m_temporary_pool_size(0)
    , m_instance_id(m_next_instance_id.fetch_add(1))
 {
-    m_result->set_is_output();
    traverse_nodes(this, [&](shared_ptr<Node> node) { m_ops.push_back(node); });
 }

@@ -97,6 +97,16 @@ void Function::set_name(const string& name)
    }
 }

+size_t Function::get_temporary_pool_size()
+{
+    return m_temporary_pool_size;
+}
+
+void Function::set_temporary_pool_size(size_t size)
+{
+    m_temporary_pool_size = size;
+}
+
 std::ostream& ngraph::operator<<(std::ostream& out, const Function& f)
 {
    out << "Function(" << f.get_name() << ")";

--- a/src/ngraph/function.hpp
+++ b/src/ngraph/function.hpp
@@ -56,6 +56,9 @@ namespace ngraph
        void clear_ordered_ops_valid() { m_ordered_ops_valid = false; }
        friend std::ostream& operator<<(std::ostream&, const Function&);
        size_t get_instance_id() { return m_instance_id; }
+        size_t get_temporary_pool_size();
+        void set_temporary_pool_size(size_t);
+
    protected:
        std::shared_ptr<Node> m_result;
        std::vector<std::shared_ptr<ngraph::op::Parameter>> m_parameters;
@@ -64,6 +67,7 @@ namespace ngraph
        bool m_ordered_ops_valid;
        std::list<std::shared_ptr<Node>> m_ordered_ops;
        std::list<std::shared_ptr<Node>> m_ops;
+        size_t m_temporary_pool_size;

    private:
        Function(const Function&) = delete;

--- a/src/ngraph/json.hpp
+++ b/src/ngraph/json.hpp
--- a/src/ngraph/ngraph.hpp
+++ b/src/ngraph/ngraph.hpp
@@ -18,6 +18,10 @@

 #pragma once

+#ifdef IN_NGRAPH_LIBRARY
+#error("ngraph.hpp is for external use only")
+#endif
+
 /// @namespace ngraph
 /// @brief The Intel Nervana Graph C++ API.


--- a/src/ngraph/node.cpp
+++ b/src/ngraph/node.cpp
@@ -12,8 +12,10 @@
 // See the License for the specific language governing permissions and
 // ----------------------------------------------------------------------------

+#include "ngraph/node.hpp"
 #include "ngraph/autodiff/adjoints.hpp"
-#include "ngraph/ngraph.hpp"
+#include "ngraph/descriptor/primary_tensor_view.hpp"
+#include "ngraph/ops/parameter.hpp"

 using namespace std;
 using namespace ngraph;

--- a/src/ngraph/ops/power.cpp
+++ b/src/ngraph/ops/power.cpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+
+#include "ngraph/ops/power.hpp"
+#include "ngraph/ops/divide.hpp"
+#include "ngraph/ops/log.hpp"
+#include "ngraph/ops/multiply.hpp"
+
+void ngraph::op::Power::generate_adjoints(autodiff::Adjoints& adjoints,
+                                          const std::shared_ptr<Node>& delta)
+{
+    auto x = m_arguments[0];
+    auto y = m_arguments[1];
+
+    auto log_x = std::make_shared<op::Log>(x);
+
+    adjoints.add_delta(x, delta * y * shared_from_this() / x);
+    adjoints.add_delta(y, delta * shared_from_this() * log_x);
+}
--- a/src/ngraph/ops/power.hpp
+++ b/src/ngraph/ops/power.hpp
@@ -37,9 +37,9 @@ namespace ngraph
        ///
        /// ## Implementation Status
        ///
-        /// | Backend | Status           |
-        /// | ------- | ---------------- |
-        /// | NGVM    | Not implemented. |
+        /// | Backend | Status             |
+        /// | ------- | ------------------ |
+        /// | NGVM    | Fully implemented. |
        class Power : public BinaryElementwiseArithmetic
        {
        public:
@@ -59,6 +59,10 @@ namespace ngraph
                    throw ngraph_error("Incorrect number of new arguments");
                return std::make_shared<Power>(new_args.at(0), new_args.at(1));
            }
+
+        protected:
+            virtual void generate_adjoints(autodiff::Adjoints& adjoints,
+                                           const std::shared_ptr<Node>& delta) override;
        };
    }
 }
--- a/src/ngraph/pass/manager.cpp
+++ b/src/ngraph/pass/manager.cpp
@@ -67,6 +67,7 @@ void ngraph::pass::Manager::run_passes(shared_ptr<Function> func)
    vector<shared_ptr<Function>> fs;
    for (shared_ptr<Function> f : get_state().get_functions())
    {
+        f->get_result()->set_is_output();
        fs.push_back(f);
    }


--- a/src/ngraph/pass/manager_state.cpp
+++ b/src/ngraph/pass/manager_state.cpp
@@ -27,13 +27,3 @@ const vector<shared_ptr<Function>>& ngraph::pass::ManagerState::get_functions()
 {
    return m_function_list;
 }
-
-size_t ngraph::pass::ManagerState::get_temporary_pool_size()
-{
-    return m_temporary_pool_size;
-}
-
-void ngraph::pass::ManagerState::set_temporary_pool_size(size_t size)
-{
-    m_temporary_pool_size = size;
-}
--- a/src/ngraph/pass/manager_state.hpp
+++ b/src/ngraph/pass/manager_state.hpp
@@ -39,10 +39,6 @@ public:
        m_function_list.insert(m_function_list.begin(), collection.begin(), collection.end());
    }

-    size_t get_temporary_pool_size();
-    void set_temporary_pool_size(size_t);
-
 private:
-    size_t m_temporary_pool_size = 0;
    std::vector<std::shared_ptr<Function>> m_function_list;
 };
--- a/src/ngraph/pass/memory_layout.cpp
+++ b/src/ngraph/pass/memory_layout.cpp
@@ -31,10 +31,10 @@ pass::MemoryLayout::MemoryLayout(size_t alignment)
 {
 }

-bool pass::MemoryLayout::run_on_call_graph(std::list<std::shared_ptr<Node>>& node_list)
+bool pass::MemoryLayout::run_on_function(std::shared_ptr<ngraph::Function> function)
 {
    MemoryManager mm(m_alignment);
-    for (shared_ptr<Node> node : node_list)
+    for (shared_ptr<Node> node : function->get_ordered_ops())
    {
        for (Tensor* tensor : node->liveness_new_list)
        {
@@ -46,7 +46,7 @@ bool pass::MemoryLayout::run_on_call_graph(std::list<std::shared_ptr<Node>>& nod
            mm.free(tensor->get_pool_offset());
        }
    }
-    get_state().set_temporary_pool_size(mm.max_allocated());
+    function->set_temporary_pool_size(mm.max_allocated());

    return false;
 }

--- a/src/ngraph/pass/memory_layout.hpp
+++ b/src/ngraph/pass/memory_layout.hpp
@@ -30,11 +30,11 @@ namespace ngraph
    }
 }

-class ngraph::pass::MemoryLayout : public CallGraphPass
+class ngraph::pass::MemoryLayout : public FunctionPass
 {
 public:
    MemoryLayout(size_t alignment = 1);
-    virtual bool run_on_call_graph(std::list<std::shared_ptr<Node>>&) override;
+    bool run_on_function(std::shared_ptr<ngraph::Function>) override;

 private:
    size_t m_alignment;

--- a/src/ngraph/runtime/backend.cpp
+++ b/src/ngraph/runtime/backend.cpp
@@ -18,7 +18,6 @@
 #include "ngraph/runtime/parameterized_tensor_view.hpp"
 #include "ngraph/runtime/tensor_view.hpp"
 #include "ngraph/runtime/tuple.hpp"
-#include "ngraph/types/element_type.hpp"

 using namespace ngraph::runtime;


--- a/src/ngraph/runtime/backend.hpp
+++ b/src/ngraph/runtime/backend.hpp
@@ -18,7 +18,7 @@

 #include "ngraph/common.hpp"
 #include "ngraph/log.hpp"
-#include "ngraph/runtime/ndarray.hpp"
+#include "ngraph/types/element_type.hpp"

 namespace ngraph
 {
@@ -35,9 +35,6 @@ namespace ngraph
        class Tuple;
        class Value;

-        template <typename ET>
-        class ParameterizedTensorView;
-
        /// @brief Interface to a generic backend.
        ///
        /// Backends are responsible for function execution and value allocation.
@@ -56,12 +53,11 @@ namespace ngraph
                make_primary_tensor_view(const ngraph::element::Type& element_type,
                                         const Shape& shape);

-            template <typename ET>
-            std::shared_ptr<ngraph::runtime::ParameterizedTensorView<ET>>
-                make_parameterized_tensor_view(const Shape& shape)
+            template <typename T>
+            std::shared_ptr<ngraph::runtime::TensorView>
+                make_primary_tensor_view(const Shape& shape)
            {
-                return std::dynamic_pointer_cast<ngraph::runtime::ParameterizedTensorView<ET>>(
-                    make_primary_tensor_view(ET::element_type(), shape));
+                return make_primary_tensor_view(element::from<T>(), shape);
            }

            /// @brief Construct a tuple handle from a sequence of values.

--- a/src/ngraph/runtime/call_frame.hpp
+++ b/src/ngraph/runtime/call_frame.hpp
@@ -36,8 +36,8 @@ namespace ngraph
            ///
            /// Tuples will be expanded into their tensor views to build the call frame.
            virtual void
-                operator()(const std::vector<std::shared_ptr<ngraph::runtime::Value>>& inputs,
-                           const std::vector<std::shared_ptr<ngraph::runtime::Value>>& outputs) = 0;
+                call(const std::vector<std::shared_ptr<ngraph::runtime::Value>>& inputs,
+                     const std::vector<std::shared_ptr<ngraph::runtime::Value>>& outputs) = 0;

            /// @brief Invoke the function with tuples pre-expanded to their underlying tensor views.
            virtual void tensor_call(const TensorViewPtrs& inputs,

--- a/src/ngraph/runtime/cpu/call_frame.cpp
+++ b/src/ngraph/runtime/cpu/call_frame.cpp
@@ -20,8 +20,10 @@
 using namespace std;
 using namespace ngraph::runtime::cpu;

-CallFrame::CallFrame(EntryPoint compiled_function)
-    : m_compiled_function(compiled_function)
+CallFrame::CallFrame(std::shared_ptr<ExternalFunction> external_function,
+                     EntryPoint compiled_function)
+    : m_external_function(external_function)
+    , m_compiled_function(compiled_function)
 {
 }

@@ -45,11 +47,11 @@ void CallFrame::tensor_call(
    }

    // Invoke compiled computation
-    m_compiled_function(inputs, outputs);
+    m_compiled_function(inputs.data(), outputs.data());
 }

-void CallFrame::operator()(const std::vector<std::shared_ptr<ngraph::runtime::Value>>& arguments,
-                           const std::vector<std::shared_ptr<ngraph::runtime::Value>>& results)
+void CallFrame::call(const std::vector<std::shared_ptr<ngraph::runtime::Value>>& arguments,
+                     const std::vector<std::shared_ptr<ngraph::runtime::Value>>& results)
 {
    // TODO: Check types of args and result
    vector<shared_ptr<ngraph::runtime::TensorView>> inputs;

--- a/src/ngraph/runtime/cpu/call_frame.hpp
+++ b/src/ngraph/runtime/cpu/call_frame.hpp
@@ -31,9 +31,9 @@ namespace ngraph
        namespace cpu
        {
            class CallFrame;
+            class ExternalFunction;

-            using EntryPoint_t = void(const std::vector<void*>& inputs,
-                                      const std::vector<void*>& outputs);
+            using EntryPoint_t = void(void** inputs, void** outputs);

            using EntryPoint = std::function<EntryPoint_t>;

@@ -41,14 +41,14 @@ namespace ngraph
            class CallFrame : public ngraph::runtime::CallFrame
            {
            public:
-                CallFrame(EntryPoint compiled_function);
+                CallFrame(std::shared_ptr<ExternalFunction> external_function,
+                          EntryPoint compiled_function);

                /// @brief Invoke the function with values matching the signature of the function.
                ///
                /// Tuples will be expanded into their tensor views to build the call frame.
-                void
-                    operator()(const std::vector<std::shared_ptr<ngraph::runtime::Value>>& inputs,
-                               const std::vector<std::shared_ptr<ngraph::runtime::Value>>& outputs);
+                void call(const std::vector<std::shared_ptr<ngraph::runtime::Value>>& inputs,
+                          const std::vector<std::shared_ptr<ngraph::runtime::Value>>& outputs);

                /// @brief Invoke the function with tuples pre-expanded to their underlying
                /// tensor views.
@@ -56,6 +56,7 @@ namespace ngraph
                                 const std::vector<std::shared_ptr<TensorView>>& outputs);

            protected:
+                std::shared_ptr<ExternalFunction> m_external_function;
                EntryPoint m_compiled_function;
            };
        }

--- a/src/ngraph/runtime/cpu/emitter.cpp
+++ b/src/ngraph/runtime/cpu/emitter.cpp
--- a/src/ngraph/runtime/cpu/emitter.hpp
+++ b/src/ngraph/runtime/cpu/emitter.hpp
@@ -24,7 +24,6 @@

 #define EMITTER_DECL(E)                                                                            \
    E(const ngraph::Node* n,                                                                       \
-      ExternalFunction* ef,                                                                        \
      const std::vector<TensorViewInfo>& inputs,                                                   \
      const std::vector<TensorViewInfo>& outputs)

@@ -94,6 +93,7 @@ namespace ngraph
                void EMITTER_DECL(EmitAsin);
                void EMITTER_DECL(EmitAcos);
                void EMITTER_DECL(EmitAtan);
+                void EMITTER_DECL(EmitPower);

            private:
                void generate_call(const std::vector<TensorViewInfo>& inputs,

--- a/src/ngraph/runtime/cpu/external_function.cpp
+++ b/src/ngraph/runtime/cpu/external_function.cpp
@@ -56,6 +56,7 @@
 #include "ngraph/ops/multiply.hpp"
 #include "ngraph/ops/negative.hpp"
 #include "ngraph/ops/not_equal.hpp"
+#include "ngraph/ops/power.hpp"
 #include "ngraph/ops/reduce.hpp"
 #include "ngraph/ops/reshape.hpp"
 #include "ngraph/ops/select.hpp"
@@ -117,6 +118,7 @@ static const OpMap dispatcher{
    {TI(ngraph::op::Minimum), &Emitter::EmitMinimum},
    {TI(ngraph::op::Negative), &Emitter::EmitNegative},
    {TI(ngraph::op::NotEqual), &Emitter::EmitNotEqual},
+    {TI(ngraph::op::Power), &Emitter::EmitPower},
    {TI(ngraph::op::Select), &Emitter::EmitSelect},
    {TI(ngraph::op::Subtract), &Emitter::EmitSubtract},
    {TI(ngraph::op::ParameterizedConstant<ngraph::element::Bool>),
@@ -170,13 +172,16 @@ void ExternalFunction::compile()
        return;
    }

+    string function_name = m_function->get_name();
+    string dump_filename = file_util::path_join(s_output_dir, function_name + "_ops.txt");
+
    pass::Manager pass_manager;
    pass_manager.register_pass<pass::TopologicalSort>();
    // For now, just make everyone row-major.
    pass_manager.register_pass<pass::AssignLayout<DenseTensorViewLayout>>();
    pass_manager.register_pass<pass::Liveness>();
    pass_manager.register_pass<pass::MemoryLayout>(64);
-    pass_manager.register_pass<pass::DumpSorted>("sorted_ops.txt");
+    pass_manager.register_pass<pass::DumpSorted>(dump_filename);
    pass_manager.run_passes(m_function);

    // Now we build the TU
@@ -186,7 +191,6 @@ void ExternalFunction::compile()
    TU +=
        R"(// Generated by the NGraph CPU backend
 #include <cmath>
-#include <vector>

 #include <Eigen/Dense>

@@ -199,20 +203,24 @@ using namespace ngraph::runtime::cpu::eigen;
 )";
    string pch_header_source = TU.get_code();

+    // The "dso_handle" symbol is required by __cxa_atexit()
+    // which is enabled because the JIT uses it as the default mechanism
+    // to register cleanup handlers. We use it, and not atexit(), because
+    // atexit() happens too late, when the JIT is no longer alive
+
+    TU << "void *__dso_handle = 0;\n\n";
+
    TU << "// Declare all functions\n";
    for (shared_ptr<Function> f : pass_manager.get_state().get_functions())
    {
-        TU << "extern \"C\" void " << f->get_name() << "(\n";
-        TU << "    const std::vector<void*>& inputs,\n";
-        TU << "    const std::vector<void*>& outputs);\n";
+        TU << "extern \"C\" void " << f->get_name() << "(void** inputs, void** outputs);\n";
    }
    TU << "\n";

    for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
    {
-        TU << "extern \"C\" void " << current_function->get_name() << "(\n";
-        TU << "    const std::vector<void*>& inputs,\n";
-        TU << "    const std::vector<void*>& outputs)\n";
+        TU << "extern \"C\" void " << current_function->get_name();
+        TU << "(void** inputs, void** outputs)\n";
        TU << "{\n";
        TU.indent++;

@@ -227,7 +235,7 @@ using namespace ngraph::runtime::cpu::eigen;
        }
        if (temporaries_used)
        {
-            size_t temp_pool_size = pass_manager.get_state().get_temporary_pool_size();
+            size_t temp_pool_size = current_function->get_temporary_pool_size();
            TU << "// Allocate the memory pool\n";
            TU << "ngraph::runtime::AlignedBuffer memory_handler(" << temp_pool_size << ", "
               << ngraph::runtime::cpu::alignment << ");\n";
@@ -297,7 +305,7 @@ using namespace ngraph::runtime::cpu::eigen;
                auto tv = output.get_tensor_view();
                out.push_back({0, tv});
            }
-            handler->second(&emitter, node.get(), this, in, out);
+            handler->second(&emitter, node.get(), in, out);
        }

        TU.indent--;
@@ -308,7 +316,6 @@ using namespace ngraph::runtime::cpu::eigen;

    // TODO: Cleanup and make this a utility function

-    string function_name = m_function->get_name();
    file_util::make_directory(s_output_dir);
    string filename = file_util::path_join(s_output_dir, function_name + "_codegen.cpp");
    ofstream out(filename);
@@ -316,19 +323,20 @@ using namespace ngraph::runtime::cpu::eigen;
    out << code;
    out.close();

-    codegen::Compiler compiler;
-    codegen::ExecutionEngine execution_engine;
+    compiler.reset(new codegen::Compiler());
+    execution_engine.reset(new codegen::ExecutionEngine());
+
+    compiler->set_precompiled_header_source(pch_header_source);

-    compiler.set_precompiled_header_source(pch_header_source);
+    auto llvm_module = compiler->compile(code);

-    auto llvm_module = compiler.compile(code);
    if (llvm_module == nullptr)
    {
        throw runtime_error("function failed to compile");
    }
-    execution_engine.add_module(llvm_module);
-    execution_engine.finalize();
-    m_compiled_function = execution_engine.find_function<EntryPoint_t>(function_name);
+    execution_engine->add_module(llvm_module);
+    execution_engine->finalize();
+    m_compiled_function = execution_engine->find_function<EntryPoint_t>(function_name);
    assert(m_compiled_function);

    m_is_compiled = true;
@@ -345,5 +353,5 @@ shared_ptr<ngraph::runtime::CallFrame> ExternalFunction::make_call_frame()
        compile();
    }

-    return make_shared<ngraph::runtime::cpu::CallFrame>(m_compiled_function);
+    return make_shared<ngraph::runtime::cpu::CallFrame>(shared_from_this(), m_compiled_function);
 }
--- a/src/ngraph/runtime/cpu/external_function.hpp
+++ b/src/ngraph/runtime/cpu/external_function.hpp
@@ -21,6 +21,7 @@
 #include <unordered_map>

 #include "ngraph/codegen/compiler.hpp"
+#include "ngraph/codegen/execution_engine.hpp"
 #include "ngraph/function.hpp"
 #include "ngraph/runtime/cpu/call_frame.hpp"
 #include "ngraph/runtime/external_function.hpp"
@@ -36,18 +37,15 @@ namespace ngraph
            class Emitter;
            class CallFrame;

-            using FunctionMap =
-                std::unordered_map<std::shared_ptr<Function>, std::shared_ptr<ExternalFunction>>;
-
            using OpFunction = std::function<void(Emitter*,
                                                  const ngraph::Node*,
-                                                  ExternalFunction*,
                                                  const std::vector<TensorViewInfo>& inputs,
                                                  const std::vector<TensorViewInfo>& outputs)>;

            using OpMap = std::unordered_map<std::type_index, OpFunction>;

-            class ExternalFunction : public ngraph::runtime::ExternalFunction
+            class ExternalFunction : public ngraph::runtime::ExternalFunction,
+                                     public std::enable_shared_from_this<ExternalFunction>
            {
            public:
                ExternalFunction(const std::shared_ptr<ngraph::Function>& function,
@@ -58,6 +56,10 @@ namespace ngraph
                void compile();

                EntryPoint m_compiled_function;
+
+            private:
+                std::unique_ptr<codegen::Compiler> compiler;
+                std::unique_ptr<codegen::ExecutionEngine> execution_engine;
            };
        }
    }

--- a/src/ngraph/runtime/ngvm/call_frame.cpp
+++ b/src/ngraph/runtime/ngvm/call_frame.cpp
@@ -63,8 +63,8 @@ void CallFrame::tensor_call(
    fill_n(m_tensor_views.begin(), m_n_inputs + m_n_outputs, nullptr);
 }

-void CallFrame::operator()(const std::vector<std::shared_ptr<ngraph::runtime::Value>>& arguments,
-                           const std::vector<std::shared_ptr<ngraph::runtime::Value>>& results)
+void CallFrame::call(const std::vector<std::shared_ptr<ngraph::runtime::Value>>& arguments,
+                     const std::vector<std::shared_ptr<ngraph::runtime::Value>>& results)
 {
    // TODO: Check types of args and result
    std::vector<std::shared_ptr<ngraph::runtime::TensorView>> inputs;

--- a/src/ngraph/runtime/ngvm/call_frame.hpp
+++ b/src/ngraph/runtime/ngvm/call_frame.hpp
@@ -19,6 +19,7 @@

 #include "ngraph/function.hpp"
 #include "ngraph/runtime/call_frame.hpp"
+#include "ngraph/runtime/parameterized_tensor_view.hpp"
 #include "ngraph/runtime/tensor_view.hpp"

 namespace ngraph
@@ -46,9 +47,8 @@ namespace ngraph
                /// @brief Invoke the function with values matching the signature of the function.
                ///
                /// Tuples will be expanded into their tensor views to build the call frame.
-                void
-                    operator()(const std::vector<std::shared_ptr<ngraph::runtime::Value>>& inputs,
-                               const std::vector<std::shared_ptr<ngraph::runtime::Value>>& outputs);
+                void call(const std::vector<std::shared_ptr<ngraph::runtime::Value>>& inputs,
+                          const std::vector<std::shared_ptr<ngraph::runtime::Value>>& outputs);

                /// @brief Invoke the function with tuples pre-expanded to their underlying tensor views.
                void tensor_call(const TensorViewPtrs& inputs, const TensorViewPtrs& outputs);

--- a/src/ngraph/runtime/ngvm/eigen/call.hpp
+++ b/src/ngraph/runtime/ngvm/eigen/call.hpp
@@ -58,7 +58,7 @@ namespace ngraph
                        {
                            outputs.push_back(call_frame.get_tensor_view(out.get_index()));
                        }
-                        (*cf)(inputs, outputs);
+                        cf->call(inputs, outputs);
                    }

                protected:

--- a/src/ngraph/runtime/ngvm/eigen/power.hpp
+++ b/src/ngraph/runtime/ngvm/eigen/power.hpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+
+#pragma once
+
+#include "ngraph/runtime/ngvm/call_frame.hpp"
+#include "ngraph/runtime/ngvm/eigen/utils.hpp"
+#include "ngraph/runtime/ngvm/instruction.hpp"
+#include "ngraph/runtime/tensor_view.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace ngvm
+        {
+            namespace eigen
+            {
+                template <typename ET>
+                class PowerInstruction : public Instruction
+                {
+                public:
+                    PowerInstruction(const TensorViewInfo& arg0,
+                                     const TensorViewInfo& arg1,
+                                     const TensorViewInfo& out)
+                        : m_arg0(arg0)
+                        , m_arg1(arg1)
+                        , m_out(out)
+                    {
+                    }
+
+                    virtual void execute(CallFrame& call_frame) const override
+                    {
+                        EigenArray1d<ET>(call_frame, m_out) =
+                            EigenArray1d<ET>(call_frame, m_arg0)
+                                .pow(EigenArray1d<ET>(call_frame, m_arg1));
+                    }
+
+                protected:
+                    TensorViewInfo m_arg0;
+                    TensorViewInfo m_arg1;
+                    TensorViewInfo m_out;
+                };
+            }
+        }
+    }
+}
--- a/src/ngraph/runtime/ngvm/eigen/reduce_matrix_columns.hpp
+++ b/src/ngraph/runtime/ngvm/eigen/reduce_matrix_columns.hpp
@@ -55,7 +55,7 @@ namespace ngraph
                            auto ty = ngraph::runtime::make_tensor<ET>(Shape{}, {y});
                            auto tr = ngraph::runtime::make_tensor<ET>(Shape{});

-                            (*cf)({tx, ty}, {tr});
+                            cf->call({tx, ty}, {tr});
                            return tr->get_vector()[0];
                        };
                        EigenVector<ET>(call_frame, m_out) =

--- a/src/ngraph/runtime/ngvm/eigen/reduce_matrix_rows.hpp
+++ b/src/ngraph/runtime/ngvm/eigen/reduce_matrix_rows.hpp
@@ -55,7 +55,7 @@ namespace ngraph
                            auto ty = ngraph::runtime::make_tensor<ET>(Shape{}, {y});
                            auto tr = ngraph::runtime::make_tensor<ET>(Shape{});

-                            (*cf)({tx, ty}, {tr});
+                            cf->call({tx, ty}, {tr});
                            return tr->get_vector()[0];
                        };
                        EigenVector<ET>(call_frame, m_out) =

--- a/src/ngraph/runtime/ngvm/eigen/reduce_to_scalar.hpp
+++ b/src/ngraph/runtime/ngvm/eigen/reduce_to_scalar.hpp
@@ -55,7 +55,7 @@ namespace ngraph
                            auto ty = ngraph::runtime::make_tensor<ET>(Shape{}, {y});
                            auto tr = ngraph::runtime::make_tensor<ET>(Shape{});

-                            (*cf)({tx, ty}, {tr});
+                            cf->call({tx, ty}, {tr});
                            return tr->get_vector()[0];
                        };
                        EigenArray1d<ET>(call_frame, m_out) =

--- a/src/ngraph/runtime/ngvm/external_function.cpp
+++ b/src/ngraph/runtime/ngvm/external_function.cpp
@@ -50,6 +50,7 @@
 #include "ngraph/ops/multiply.hpp"
 #include "ngraph/ops/negative.hpp"
 #include "ngraph/ops/not_equal.hpp"
+#include "ngraph/ops/power.hpp"
 #include "ngraph/ops/reduce.hpp"
 #include "ngraph/ops/reshape.hpp"
 #include "ngraph/ops/select.hpp"
@@ -98,6 +99,7 @@
 #include "ngraph/runtime/ngvm/eigen/multiply.hpp"
 #include "ngraph/runtime/ngvm/eigen/negate.hpp"
 #include "ngraph/runtime/ngvm/eigen/not_equal.hpp"
+#include "ngraph/runtime/ngvm/eigen/power.hpp"
 #include "ngraph/runtime/ngvm/eigen/reduce_matrix_columns.hpp"
 #include "ngraph/runtime/ngvm/eigen/reduce_matrix_rows.hpp"
 #include "ngraph/runtime/ngvm/eigen/reduce_to_scalar.hpp"
@@ -380,6 +382,7 @@ ExternalFunction::OpMap& ExternalFunction::get_op_map()
        REGISTER_NUMERIC_BINOP(op::Maximum, eigen::MaximumInstruction);
        REGISTER_NUMERIC_BINOP(op::Minimum, eigen::MinimumInstruction);
        REGISTER_NUMERIC_BINOP(op::Multiply, eigen::MultiplyInstruction);
+        REGISTER_NUMERIC_BINOP(op::Power, eigen::PowerInstruction);
        REGISTER_NUMERIC_BINOP(op::Subtract, eigen::SubtractInstruction);

        REGISTER_TO_OP_MAP(op::Constant)

--- a/src/ngraph/types/element_type.cpp
+++ b/src/ngraph/types/element_type.cpp
@@ -25,9 +25,11 @@ const element::Type element::boolean(8, false, false, "bool");
 const element::Type element::f32(32, true, true, "float");
 const element::Type element::f64(64, true, true, "double");
 const element::Type element::i8(8, false, true, "int8_t");
+const element::Type element::i16(16, false, true, "int16_t");
 const element::Type element::i32(32, false, true, "int32_t");
 const element::Type element::i64(64, false, true, "int64_t");
 const element::Type element::u8(8, false, false, "uint8_t");
+const element::Type element::u16(16, false, false, "uint16_t");
 const element::Type element::u32(32, false, false, "uint32_t");
 const element::Type element::u64(64, false, false, "uint64_t");


--- a/src/ngraph/types/element_type.hpp
+++ b/src/ngraph/types/element_type.hpp
@@ -29,14 +29,22 @@

 namespace ngraph
 {
-    namespace runtime
-    {
-        template <typename ET>
-        class ParameterizedTensorView;
-    }
-
    namespace element
    {
+        class Type;
+
+        extern const Type boolean;
+        extern const Type f32;
+        extern const Type f64;
+        extern const Type i8;
+        extern const Type i16;
+        extern const Type i32;
+        extern const Type i64;
+        extern const Type u8;
+        extern const Type u16;
+        extern const Type u32;
+        extern const Type u64;
+
        class Type
        {
            Type(const Type&) = delete;
@@ -66,15 +74,58 @@ namespace ngraph
            const std::string m_cname;
        };

-        extern const Type boolean;
-        extern const Type f32;
-        extern const Type f64;
-        extern const Type i8;
-        extern const Type i32;
-        extern const Type i64;
-        extern const Type u8;
-        extern const Type u32;
-        extern const Type u64;
+        template <typename T>
+        const Type& from()
+        {
+            if (typeid(T) == typeid(char) || typeid(T) == typeid(bool))
+            {
+                return boolean;
+            }
+            else if (typeid(T) == typeid(float))
+            {
+                return f32;
+            }
+            else if (typeid(T) == typeid(double))
+            {
+                return f64;
+            }
+            else if (typeid(T) == typeid(int8_t))
+            {
+                return i8;
+            }
+            else if (typeid(T) == typeid(int16_t))
+            {
+                return i16;
+            }
+            else if (typeid(T) == typeid(int32_t))
+            {
+                return i32;
+            }
+            else if (typeid(T) == typeid(int64_t))
+            {
+                return i64;
+            }
+            else if (typeid(T) == typeid(uint8_t))
+            {
+                return u8;
+            }
+            else if (typeid(T) == typeid(uint16_t))
+            {
+                return u16;
+            }
+            else if (typeid(T) == typeid(uint32_t))
+            {
+                return u32;
+            }
+            else if (typeid(T) == typeid(uint64_t))
+            {
+                return u64;
+            }
+            else
+            {
+                throw std::invalid_argument("Unknown type");
+            }
+        }

        std::ostream& operator<<(std::ostream& out, const ngraph::element::Type& obj);


--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -23,9 +23,10 @@ include_directories(

 set (SRC
    autodiff.cpp
-    copy.cpp
    build_graph.cpp
+    copy.cpp
    eigen.cpp
+    element_type.cpp
    file_util.cpp
    input_output_assign.cpp
    main.cpp
@@ -33,6 +34,7 @@ set (SRC
    pass_liveness.cpp
    pass_manager.cpp
    pass_memory_layout.cpp
+    serialize.cpp
    shape.cpp
    tensor.cpp
    topological_sort.cpp

--- a/test/autodiff.cpp
+++ b/test/autodiff.cpp
--- a/test/backend_test.in.cpp
+++ b/test/backend_test.in.cpp
--- a/test/copy.cpp
+++ b/test/copy.cpp
@@ -39,7 +39,6 @@ bool check_unary()

    auto node = make_shared<OP>(arg0);
    auto new_node = node->copy_with_new_args(new_args);
-    auto node_cast = dynamic_pointer_cast<OP>(new_node);

    return (nullptr != new_node) && (new_args == new_node->get_arguments());
 }
@@ -56,7 +55,6 @@ bool check_binary()

    auto node = make_shared<OP>(arg0, arg1);
    auto new_node = node->copy_with_new_args(new_args);
-    auto node_cast = dynamic_pointer_cast<OP>(new_node);

    return (nullptr != new_node) && (new_args == new_node->get_arguments());
 }
@@ -99,6 +97,7 @@ TEST(copy, broadcast)
    auto node = make_shared<op::Broadcast>(arg0, shape, axes);
    auto new_node = node->copy_with_new_args(new_args);
    auto node_cast = dynamic_pointer_cast<op::Broadcast>(new_node);
+    ASSERT_NE(node_cast, nullptr);

    ASSERT_TRUE(nullptr != new_node);
    ASSERT_TRUE(new_args == new_node->get_arguments());
@@ -123,6 +122,7 @@ TEST(copy, concat)
    auto node = make_shared<op::Concat>(Nodes{arg0, arg1}, axis);
    auto new_node = node->copy_with_new_args(new_args);
    auto node_cast = dynamic_pointer_cast<op::Concat>(new_node);
+    ASSERT_NE(node_cast, nullptr);

    ASSERT_TRUE(nullptr != new_node);
    ASSERT_TRUE(new_args == new_node->get_arguments());
@@ -140,9 +140,11 @@ TEST(copy, parameterized_constant)

    Shape shape{2, 2};
    auto cptv = dynamic_pointer_cast<ngraph::runtime::ParameterizedTensorView<element::Float32>>(c);
+    ASSERT_NE(cptv, nullptr);
    auto node = make_shared<op::ParameterizedConstant<element::Float32>>(shape, cptv);
    auto new_node = node->copy_with_new_args(Nodes{});
    auto node_cast = dynamic_pointer_cast<op::ParameterizedConstant<element::Float32>>(new_node);
+    ASSERT_NE(node_cast, nullptr);
    ASSERT_TRUE(nullptr != new_node);
    ASSERT_TRUE(Nodes{} == new_node->get_arguments());
    ASSERT_TRUE(node_cast->get_value() == c);
@@ -157,6 +159,7 @@ TEST(copy, constant)
    auto node = make_shared<op::Constant>(et, shape, c);
    auto new_node = node->copy_with_new_args(Nodes{});
    auto node_cast = dynamic_pointer_cast<op::Constant>(new_node);
+    ASSERT_NE(node_cast, nullptr);
    ASSERT_TRUE(nullptr != new_node);
    ASSERT_TRUE(Nodes{} == new_node->get_arguments());
    ASSERT_TRUE(node_cast->get_value_strings() == c);
@@ -175,6 +178,7 @@ TEST(copy, convert)
    auto node = make_shared<op::Convert>(arg0, et);
    auto new_node = node->copy_with_new_args(new_args);
    auto node_cast = dynamic_pointer_cast<op::Convert>(new_node);
+    ASSERT_NE(node_cast, nullptr);

    ASSERT_TRUE(nullptr != new_node);
    ASSERT_TRUE(new_args == new_node->get_arguments());
@@ -236,6 +240,7 @@ TEST(copy, FunctionCall)
        make_shared<op::Parameter>(element::Float32::element_type(), shape)};
    auto new_node = node->copy_with_new_args(new_args);
    auto node_cast = dynamic_pointer_cast<op::FunctionCall>(new_node);
+    ASSERT_NE(node_cast, nullptr);

    ASSERT_TRUE(nullptr != new_node);
    ASSERT_TRUE(new_args == new_node->get_arguments());
@@ -255,6 +260,7 @@ TEST(copy, GetTupleElement)
    auto node = make_shared<op::GetTupleElement>(arg0, n);
    auto new_node = node->copy_with_new_args(new_args);
    auto node_cast = dynamic_pointer_cast<op::GetTupleElement>(new_node);
+    ASSERT_NE(node_cast, nullptr);

    ASSERT_TRUE(nullptr != new_node);
    ASSERT_TRUE(new_args == new_node->get_arguments());
@@ -317,6 +323,7 @@ TEST(copy, parameter)
    auto node = make_shared<op::Parameter>(element::Float32::element_type(), shape);
    auto new_node = node->copy_with_new_args({});
    auto node_cast = dynamic_pointer_cast<op::Parameter>(new_node);
+    ASSERT_NE(node_cast, nullptr);

    ASSERT_TRUE(nullptr != new_node);
    ASSERT_TRUE(new_node->get_arguments().size() == 0);
@@ -347,6 +354,7 @@ TEST(copy, reduce)
    auto node = make_shared<op::Reduce>(arg0, arg_init, f, axes);
    auto new_node = node->copy_with_new_args(new_args);
    auto node_cast = dynamic_pointer_cast<op::Reduce>(new_node);
+    ASSERT_NE(node_cast, nullptr);

    ASSERT_TRUE(nullptr != new_node);
    ASSERT_TRUE(new_args == new_node->get_arguments());
@@ -372,6 +380,7 @@ TEST(copy, reshape)
    auto node = make_shared<op::Reshape>(arg0, axes, shape_out);
    auto new_node = node->copy_with_new_args(new_args);
    auto node_cast = dynamic_pointer_cast<op::Reshape>(new_node);
+    ASSERT_NE(node_cast, nullptr);

    ASSERT_TRUE(nullptr != new_node);
    ASSERT_TRUE(new_args == new_node->get_arguments());
@@ -393,6 +402,7 @@ TEST(copy, select)
    auto node = make_shared<op::Select>(arg0, arg1, arg2);
    auto new_node = node->copy_with_new_args(new_args);
    auto node_cast = dynamic_pointer_cast<op::Select>(new_node);
+    ASSERT_NE(node_cast, nullptr);

    ASSERT_TRUE(nullptr != new_node);
    ASSERT_TRUE(new_args == new_node->get_arguments());
@@ -427,6 +437,7 @@ TEST(copy, slice)
    auto node = make_shared<op::Slice>(arg0, lower, upper, step);
    auto new_node = node->copy_with_new_args(new_args);
    auto node_cast = dynamic_pointer_cast<op::Slice>(new_node);
+    ASSERT_NE(node_cast, nullptr);

    ASSERT_TRUE(nullptr != new_node);
    ASSERT_TRUE(new_args == new_node->get_arguments());
@@ -451,6 +462,7 @@ TEST(copy, sum)
    auto node = make_shared<op::Sum>(arg0, axes);
    auto new_node = node->copy_with_new_args(new_args);
    auto node_cast = dynamic_pointer_cast<op::Sum>(new_node);
+    ASSERT_NE(node_cast, nullptr);

    ASSERT_TRUE(nullptr != new_node);
    ASSERT_TRUE(new_args == new_node->get_arguments());
@@ -479,6 +491,7 @@ TEST(copy, tuple)
    auto node = make_shared<op::Tuple>(Nodes{arg0, arg1});
    auto new_node = node->copy_with_new_args(new_args);
    auto node_cast = dynamic_pointer_cast<op::Tuple>(new_node);
+    ASSERT_NE(node_cast, nullptr);

    ASSERT_TRUE(nullptr != new_node);
    ASSERT_TRUE(new_args == new_node->get_arguments());

--- a/test/element_type.cpp
+++ b/test/element_type.cpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+
+#include "gtest/gtest.h"
+
+#include "ngraph/types/element_type.hpp"
+
+using namespace ngraph;
+
+TEST(element_type, from)
+{
+    EXPECT_EQ(element::from<char>(), element::boolean);
+    EXPECT_EQ(element::from<bool>(), element::boolean);
+    EXPECT_EQ(element::from<float>(), element::f32);
+    EXPECT_EQ(element::from<double>(), element::f64);
+    EXPECT_EQ(element::from<int8_t>(), element::i8);
+    EXPECT_EQ(element::from<int16_t>(), element::i16);
+    EXPECT_EQ(element::from<int32_t>(), element::i32);
+    EXPECT_EQ(element::from<int64_t>(), element::i64);
+    EXPECT_EQ(element::from<uint8_t>(), element::u8);
+    EXPECT_EQ(element::from<uint16_t>(), element::u16);
+    EXPECT_EQ(element::from<uint32_t>(), element::u32);
+    EXPECT_EQ(element::from<uint64_t>(), element::u64);
+}
--- a/test/pass_memory_layout.cpp
+++ b/test/pass_memory_layout.cpp
@@ -215,7 +215,7 @@ TEST(memory_layout, basic)
    auto graph = make_test_graph();
    pass_manager.run_passes(graph);
    auto sorted = graph->get_ordered_ops();
-    size_t temporary_pool_size = pass_manager.get_state().get_temporary_pool_size();
+    size_t temporary_pool_size = graph->get_temporary_pool_size();
    EXPECT_EQ(12, temporary_pool_size);
 }

@@ -235,6 +235,6 @@ TEST(memory_layout, constant)

    pass_manager.run_passes(f);
    auto sorted = f->get_ordered_ops();
-    size_t temporary_pool_size = pass_manager.get_state().get_temporary_pool_size();
+    size_t temporary_pool_size = f->get_temporary_pool_size();
    EXPECT_EQ(0, temporary_pool_size);
 }
--- a/test/serialize.cpp
+++ b/test/serialize.cpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+
+#include "gtest/gtest.h"
--- a/test/util.cpp
+++ b/test/util.cpp
@@ -185,20 +185,20 @@ TEST(util, all_close)
    auto backend = manager->allocate_backend();

    // Create some tensors for input/output
-    auto a = backend->make_parameterized_tensor_view<element::Float32>(Shape{2, 3});
-    auto b = backend->make_parameterized_tensor_view<element::Float32>(Shape{2, 3});
+    auto a = backend->make_primary_tensor_view(element::Float32::element_type(), Shape{2, 3});
+    auto b = backend->make_primary_tensor_view(element::Float32::element_type(), Shape{2, 3});

    copy_data(a, runtime::NDArray<float, 2>({{1, 2, 3}, {3, 4, 5}}).get_vector());
    copy_data(b, runtime::NDArray<float, 2>({{1, 2, 3}, {3, 4, 5}}).get_vector());

-    EXPECT_TRUE(ngraph::test::all_close(a, b));
+    EXPECT_TRUE(ngraph::test::all_close<float>(a, b));

-    auto c = backend->make_parameterized_tensor_view<element::Float32>(Shape{2, 3});
+    auto c = backend->make_primary_tensor_view(element::Float32::element_type(), Shape{2, 3});
    copy_data(c, runtime::NDArray<float, 2>({{1.1f, 2, 3}, {3, 4, 5}}).get_vector());

-    EXPECT_FALSE(ngraph::test::all_close(c, a, 0, .05f));
-    EXPECT_TRUE(ngraph::test::all_close(c, a, 0, .11f));
+    EXPECT_FALSE(ngraph::test::all_close<float>(c, a, 0, .05f));
+    EXPECT_TRUE(ngraph::test::all_close<float>(c, a, 0, .11f));

-    EXPECT_FALSE(ngraph::test::all_close(c, a, .05f, 0));
-    EXPECT_TRUE(ngraph::test::all_close(c, a, .11f, 0));
+    EXPECT_FALSE(ngraph::test::all_close<float>(c, a, .05f, 0));
+    EXPECT_TRUE(ngraph::test::all_close<float>(c, a, .11f, 0));
 }
--- a/test/util/all_close.hpp
+++ b/test/util/all_close.hpp
@@ -54,11 +54,11 @@ namespace ngraph
        /// @param rtol Relative tolerance
        /// @param atol Absolute tolerance
        /// Returns true if shapes match and for all elements, |a_i-b_i| <= atol + rtol*|b_i|.
-        template <typename ET>
-        bool all_close(const std::shared_ptr<ngraph::runtime::ParameterizedTensorView<ET>>& a,
-                       const std::shared_ptr<ngraph::runtime::ParameterizedTensorView<ET>>& b,
-                       typename ET::type rtol = 1e-5f,
-                       typename ET::type atol = 1e-8f)
+        template <typename T>
+        bool all_close(const std::shared_ptr<ngraph::runtime::TensorView>& a,
+                       const std::shared_ptr<ngraph::runtime::TensorView>& b,
+                       T rtol = 1e-5f,
+                       T atol = 1e-8f)
        {
            // Check that the layouts are compatible
            if (*a->get_tensor_view_layout() != *b->get_tensor_view_layout())
@@ -69,7 +69,7 @@ namespace ngraph
            if (a->get_shape() != b->get_shape())
                return false;

-            return all_close(a->get_vector(), b->get_vector(), rtol, atol);
+            return all_close(a->get_vector<T>(), b->get_vector<T>(), rtol, atol);
        }

        /// @brief Same as numpy.allclose
@@ -78,12 +78,11 @@ namespace ngraph
        /// @param rtol Relative tolerance
        /// @param atol Absolute tolerance
        /// Returns true if shapes match and for all elements, |a_i-b_i| <= atol + rtol*|b_i|.
-        template <typename ET>
-        bool all_close(
-            const std::vector<std::shared_ptr<ngraph::runtime::ParameterizedTensorView<ET>>>& as,
-            const std::vector<std::shared_ptr<ngraph::runtime::ParameterizedTensorView<ET>>>& bs,
-            typename ET::type rtol,
-            typename ET::type atol)
+        template <typename T>
+        bool all_close(const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& as,
+                       const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& bs,
+                       T rtol,
+                       T atol)
        {
            if (as.size() != bs.size())
            {

--- a/test/util/autodiff/backprop_derivative.hpp
+++ b/test/util/autodiff/backprop_derivative.hpp
@@ -39,24 +39,24 @@ namespace ngraph
        /// @returns f'(X_i..., c) where f'(x_i, ..., c)_j is backprop for X_j
        std::shared_ptr<Function> backprop_function(const std::shared_ptr<Function>& f);

-        template <typename ET>
-        std::vector<std::shared_ptr<runtime::ParameterizedTensorView<ET>>> backprop_derivative(
-            const std::shared_ptr<runtime::Manager>& manager,
-            const std::shared_ptr<runtime::Backend>& backend,
-            const std::shared_ptr<Function>& f,
-            const std::vector<std::shared_ptr<runtime::ParameterizedTensorView<ET>>>& args)
+        template <typename T>
+        std::vector<std::shared_ptr<runtime::TensorView>>
+            backprop_derivative(const std::shared_ptr<runtime::Manager>& manager,
+                                const std::shared_ptr<runtime::Backend>& backend,
+                                const std::shared_ptr<Function>& f,
+                                const std::vector<std::shared_ptr<runtime::TensorView>>& args)
        {
            auto y = f->get_result();
            Shape y_shape =
                std::dynamic_pointer_cast<const TensorViewType>(y->get_value_type())->get_shape();

-            auto c_param = std::make_shared<op::Parameter>(ET::element_type(), y_shape);
-            auto c_arg = backend->make_parameterized_tensor_view<ET>(y_shape);
+            auto c_param = std::make_shared<op::Parameter>(element::from<T>(), y_shape);
+            auto c_arg = backend->make_primary_tensor_view<T>(y_shape);
            auto params = f->get_parameters();

            std::vector<std::shared_ptr<Node>> deriv_nodes;
-            std::vector<std::shared_ptr<runtime::ParameterizedTensorView<ET>>> bprops;
-            std::vector<std::shared_ptr<runtime::ParameterizedTensorView<ET>>> results;
+            std::vector<std::shared_ptr<runtime::TensorView>> bprops;
+            std::vector<std::shared_ptr<runtime::TensorView>> results;
            for (auto param : params)
            {
                Shape s = y_shape;
@@ -64,8 +64,8 @@ namespace ngraph
                    std::dynamic_pointer_cast<const TensorViewType>(param->get_value_type())
                        ->get_shape();
                s.insert(s.end(), param_shape.begin(), param_shape.end());
-                results.push_back(backend->make_parameterized_tensor_view<ET>(s));
-                bprops.push_back(backend->make_parameterized_tensor_view<ET>(param_shape));
+                results.push_back(backend->make_primary_tensor_view<T>(s));
+                bprops.push_back(backend->make_primary_tensor_view<T>(param_shape));
                deriv_nodes.push_back(y->backprop_node(param, c_param));
            }

@@ -78,22 +78,22 @@ namespace ngraph
            auto cf = backend->make_call_frame(external);

            // We compute the derivatives chunk by chunk
-            std::vector<typename std::vector<typename ET::type>::iterator> result_pos;
-            std::vector<std::vector<typename ET::type>> result_vect;
+            std::vector<typename std::vector<T>::iterator> result_pos;
+            std::vector<std::vector<T>> result_vect;
            for (auto result : results)
            {
-                result_vect.push_back(result->get_vector()); // storage for results
+                result_vect.push_back(result->get_vector<T>()); // storage for results
                result_pos.push_back(result_vect.back().begin());
            }

-            ngraph::runtime::TensorViewPtrs args_tv;
+            std::vector<std::shared_ptr<ngraph::runtime::TensorView>> args_tv;
            args_tv.insert(args_tv.begin(), args.begin(), args.end());
            args_tv.push_back(c_arg);

-            runtime::TensorViewPtrs bprops_tv;
+            std::vector<std::shared_ptr<ngraph::runtime::TensorView>> bprops_tv;
            bprops_tv.insert(bprops_tv.begin(), bprops.begin(), bprops.end());

-            auto c_vec = c_arg->get_vector();
+            auto c_vec = c_arg->template get_vector<T>();
            for (size_t i = 0; i < c_vec.size(); i++)
            {
                c_vec[i] = 1;
@@ -103,7 +103,7 @@ namespace ngraph
                c_arg->write(c_vec);
                for (size_t j = 0; j < results.size(); j++)
                {
-                    auto bprop_vec = bprops[j]->get_vector();
+                    auto bprop_vec = bprops[j]->get_vector<T>();
                    result_pos[j] = std::copy(bprop_vec.begin(), bprop_vec.end(), result_pos[j]);
                }
            }

--- a/test/util/autodiff/numeric_derivative.hpp
+++ b/test/util/autodiff/numeric_derivative.hpp
@@ -33,13 +33,13 @@ namespace ngraph
        /// @param args Values for the arguments (the independent variables)
        /// @param delta increment for the variables
        /// @returns vector of dy/dvar, where each dy/dvar's shape is concat(y.shape(), var.shape())
-        template <typename ET>
-        std::vector<std::shared_ptr<runtime::ParameterizedTensorView<ET>>> numeric_derivative(
-            const std::shared_ptr<runtime::Manager>& manager,
-            const std::shared_ptr<runtime::Backend>& backend,
-            const std::shared_ptr<Function>& f,
-            const std::vector<std::shared_ptr<runtime::ParameterizedTensorView<ET>>>& args,
-            typename ET::type delta)
+        template <typename T>
+        std::vector<std::shared_ptr<runtime::TensorView>>
+            numeric_derivative(const std::shared_ptr<runtime::Manager>& manager,
+                               const std::shared_ptr<runtime::Backend>& backend,
+                               const std::shared_ptr<Function>& f,
+                               const std::vector<std::shared_ptr<runtime::TensorView>>& args,
+                               T delta)
        {
            auto y = f->get_result();

@@ -49,7 +49,7 @@ namespace ngraph
            auto params = f->get_parameters();

            // Results for each derivative, shape Y|X_i
-            std::vector<std::shared_ptr<runtime::ParameterizedTensorView<ET>>> results;
+            std::vector<std::shared_ptr<runtime::TensorView>> results;
            for (auto param : params)
            {
                Shape s = y_shape;
@@ -57,39 +57,36 @@ namespace ngraph
                    std::dynamic_pointer_cast<const TensorViewType>(param->get_value_type())
                        ->get_shape();
                s.insert(s.end(), param_shape.begin(), param_shape.end());
-                results.push_back(backend->make_parameterized_tensor_view<ET>(s));
+                results.push_back(backend->make_primary_tensor_view<T>(s));
            }

            auto external = manager->compile(f);
            auto cf = backend->make_call_frame(external);

            // ref_y is the function evaluated at the args
-            auto ref_y = backend->make_parameterized_tensor_view<ET>(y_shape);
+            auto ref_y = backend->make_primary_tensor_view<T>(y_shape);

-            ngraph::runtime::TensorViewPtrs args_tv;
-            args_tv.insert(args_tv.begin(), args.begin(), args.end());
-
-            cf->tensor_call(args_tv, runtime::TensorViewPtrs{ref_y});
-            auto& ref_vec = ref_y->get_vector();
+            cf->tensor_call(args, std::vector<std::shared_ptr<ngraph::runtime::TensorView>>{ref_y});
+            auto ref_vec = ref_y->template get_vector<T>();

            // inc_y will hold f(x+dx) values
-            auto inc_y = backend->make_parameterized_tensor_view<ET>(y_shape);
-            auto& inc_vec = inc_y->get_vector();
+            auto inc_y = backend->make_primary_tensor_view<T>(y_shape);

            // Assuming vars, y, and results are row-major

-            typename ET::type inv_delta = 1 / delta;
+            T inv_delta = 1 / delta;
            for (size_t i = 0; i < args.size(); ++i)
            {
                auto arg = args[i];
-                auto res = results[i]->get_vector();
-                auto vec = arg->get_vector();
+                auto res = results[i]->get_vector<T>();
+                auto vec = arg->get_vector<T>();
                for (size_t j = 0; j < vec.size(); j++)
                {
                    auto old_val = vec[j];
                    vec[j] += delta;
                    arg->write(vec);
-                    cf->tensor_call(args_tv, {inc_y});
+                    cf->tensor_call(args, {inc_y});
+                    auto inc_vec = inc_y->template get_vector<T>();
                    vec[j] = old_val;
                    arg->write(vec);
                    size_t res_k = j;

--- a/test/util/random.hpp
+++ b/test/util/random.hpp
@@ -25,13 +25,11 @@ namespace ngraph
    {
        /// @brief A predictable pseudo-random number generator
        /// The seed is initialized so that we get repeatable pseudo-random numbers for tests
-        template <typename ET>
+        template <typename T>
        class Uniform
        {
        public:
-            using type = typename ET::type;
-
-            Uniform(type min, type max, type seed = 0)
+            Uniform(T min, T max, T seed = 0)
                : m_engine(seed)
                , m_distribution(min, max)
                , m_r(std::bind(m_distribution, m_engine))
@@ -40,11 +38,11 @@ namespace ngraph

            /// @brief Randomly initialize a tensor
            /// @param ptv The tensor to initialize
-            const std::shared_ptr<runtime::ParameterizedTensorView<ET>>
-                initialize(const std::shared_ptr<runtime::ParameterizedTensorView<ET>>& ptv)
+            const std::shared_ptr<runtime::TensorView>
+                initialize(const std::shared_ptr<runtime::TensorView>& ptv)
            {
-                auto vec = ptv->get_vector();
-                for (auto& elt : vec)
+                std::vector<T> vec = ptv->get_vector<T>();
+                for (T& elt : vec)
                {
                    elt = m_r();
                }
@@ -54,8 +52,8 @@ namespace ngraph

        protected:
            std::default_random_engine m_engine;
-            std::uniform_real_distribution<type> m_distribution;
-            std::function<type()> m_r;
+            std::uniform_real_distribution<T> m_distribution;
+            std::function<T()> m_r;
        };
    }
 }