input tensors defined

b0de2d3e · Robert Kimball · 073adbfb · b0de2d3e · b0de2d3e · b0de2d3e
Commit b0de2d3e authored Nov 08, 2017 by Robert Kimball
20 changed files
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -112,6 +112,7 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
        runtime/cpu/cpu_kernels.cpp
        runtime/cpu/emitter.cpp
        runtime/cpu/external_function.cpp
+        runtime/cpu/tensor_view.cpp
        )
    # LLVM binary builds are typically built without RTTI
    # The built-in headers are in a version-specific directory

--- a/src/ngraph/descriptor/tensor.hpp
+++ b/src/ngraph/descriptor/tensor.hpp
@@ -59,7 +59,7 @@ public:
    size_t size() const;
    void set_pool_offset(size_t);
    size_t get_pool_offset() const;
-
+    const element::Type& get_element_type() const { return m_element_type; }
    static std::string make_tensor_name(const Node* node, size_t value_index);

 protected:

--- a/src/ngraph/pass/memory_layout.cpp
+++ b/src/ngraph/pass/memory_layout.cpp
@@ -26,9 +26,14 @@ using namespace std;
 using namespace ngraph;
 using namespace ngraph::descriptor;

+pass::MemoryLayout::MemoryLayout(size_t alignment)
+    : m_alignment(alignment)
+{
+}
+
 bool pass::MemoryLayout::run_on_call_graph(std::list<std::shared_ptr<Node>>& node_list)
 {
-    MemoryManager mm;
+    MemoryManager mm(m_alignment);
    for (shared_ptr<Node> node : node_list)
    {
        for (Tensor* tensor : node->liveness_new_list)

--- a/src/ngraph/pass/memory_layout.hpp
+++ b/src/ngraph/pass/memory_layout.hpp
@@ -33,9 +33,11 @@ namespace ngraph
 class ngraph::pass::MemoryLayout : public CallGraphPass
 {
 public:
+    MemoryLayout(size_t alignment = 1);
    virtual bool run_on_call_graph(std::list<std::shared_ptr<Node>>&) override;

 private:
+    size_t m_alignment;
 };

 class ngraph::pass::MemoryManager

--- a/src/ngraph/runtime/backend.cpp
+++ b/src/ngraph/runtime/backend.cpp
@@ -24,6 +24,7 @@ using namespace ngraph::runtime;
 std::shared_ptr<TensorView>
    Backend::make_primary_tensor_view(const ngraph::element::Type& element_type, const Shape& shape)
 {
+    NGRAPH_INFO;
    return element_type.make_primary_tensor_view(shape);
 }


--- a/src/ngraph/runtime/backend.hpp
+++ b/src/ngraph/runtime/backend.hpp
@@ -17,6 +17,7 @@
 #include <memory>

 #include "ngraph/common.hpp"
+#include "ngraph/log.hpp"
 #include "ngraph/runtime/ndarray.hpp"

 namespace ngraph
@@ -59,6 +60,7 @@ namespace ngraph
            std::shared_ptr<ngraph::runtime::ParameterizedTensorView<ET>>
                make_parameterized_tensor_view(const Shape& shape)
            {
+                NGRAPH_INFO;
                return std::dynamic_pointer_cast<ngraph::runtime::ParameterizedTensorView<ET>>(
                    make_primary_tensor_view(ET::element_type(), shape));
            }
@@ -67,6 +69,7 @@ namespace ngraph
            std::shared_ptr<ngraph::runtime::ParameterizedTensorView<ET>>
                make_parameterized_tensor_view(const NDArrayBase<typename ET::type>& ndarray)
            {
+                NGRAPH_INFO;
                auto result =
                    std::dynamic_pointer_cast<ngraph::runtime::ParameterizedTensorView<ET>>(
                        make_primary_tensor_view(ET::element_type(), ndarray.get_shape()));

--- a/src/ngraph/runtime/cpu/call_frame.cpp
+++ b/src/ngraph/runtime/cpu/call_frame.cpp
@@ -15,53 +15,68 @@
 #include <algorithm>

 #include "call_frame.hpp"
+#include "ngraph/runtime/cpu/tensor_view.hpp"

 using namespace std;
 using namespace ngraph::runtime::cpu;

 CallFrame::CallFrame(EntryPoint compiled_function,
-                     size_t n_outputs,
-                     size_t n_inputs,
-                     const TensorViewPtrs& temps,
                     const std::vector<std::shared_ptr<CallFrame>>& callees)
-    : m_n_outputs(n_outputs)
-    , m_n_inputs(n_inputs)
-    , m_tensor_views(n_outputs + n_inputs + temps.size())
-    , m_compiled_function(compiled_function)
+    : m_compiled_function(compiled_function)
    , m_callees(callees)
 {
-    copy(temps.begin(), temps.end(), m_tensor_views.begin() + m_n_outputs + m_n_inputs);
 }

 void CallFrame::tensor_call(
-    const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& inputs,
-    const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& outputs)
+    const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& input_tvs,
+    const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& output_tvs)
 {
-    copy(outputs.begin(), outputs.end(), m_tensor_views.begin());
-    copy(inputs.begin(), inputs.end(), m_tensor_views.begin() + m_n_outputs);
+    m_inputs.clear();
+    m_outputs.clear();
+    for (size_t i = 0; i < input_tvs.size(); i++)
+    {
+        shared_ptr<runtime::cpu::CPUTensorView> tv =
+            static_pointer_cast<runtime::cpu::CPUTensorView>(input_tvs[i]);
+        m_inputs.push_back(tv->get_data_ptr());
+    }
+    for (size_t i = 0; i < output_tvs.size(); i++)
+    {
+        shared_ptr<runtime::cpu::CPUTensorView> tv =
+            static_pointer_cast<runtime::cpu::CPUTensorView>(output_tvs[i]);
+        m_outputs.push_back(tv->get_data_ptr());
+    }

    // Invoke compiled computation
-    m_compiled_function(this, m_tensor_views, m_callees);
-
-    // Don't hold onto inputs/outputs
-    fill_n(m_tensor_views.begin(), m_n_outputs + m_n_inputs, nullptr);
+    m_compiled_function(this);
 }

 void CallFrame::operator()(const std::vector<std::shared_ptr<ngraph::runtime::Value>>& arguments,
                           const std::vector<std::shared_ptr<ngraph::runtime::Value>>& results)
 {
    // TODO: Check types of args and result
-    std::vector<std::shared_ptr<ngraph::runtime::TensorView>> inputs;
-    for (auto argument : arguments)
+    vector<shared_ptr<ngraph::runtime::TensorView>> inputs;
+    for (shared_ptr<ngraph::runtime::Value> argument : arguments)
    {
        argument->collect_tensor_views(inputs, argument);
    }

-    std::vector<std::shared_ptr<ngraph::runtime::TensorView>> outputs;
-    for (auto result : results)
+    vector<shared_ptr<ngraph::runtime::TensorView>> outputs;
+    for (shared_ptr<ngraph::runtime::Value> result : results)
    {
        result->collect_tensor_views(outputs, result);
    }

    tensor_call(inputs, outputs);
 }
+
+void* CallFrame::get_input_data(size_t index)
+{
+    void* rc = m_inputs.at(index);
+    return rc;
+}
+
+void* CallFrame::get_output_data(size_t index)
+{
+    void* rc = m_outputs.at(index);
+    return rc;
+}
--- a/src/ngraph/runtime/cpu/call_frame.hpp
+++ b/src/ngraph/runtime/cpu/call_frame.hpp
@@ -32,18 +32,15 @@ namespace ngraph
        {
            class CallFrame;

-            using EntryPoint = std::function<void(ngraph::runtime::cpu::CallFrame*,
-                                                  ngraph::runtime::TensorViewPtrs&,
-                                                  const std::vector<std::shared_ptr<CallFrame>>&)>;
+            using EntryPoint_t = void(ngraph::runtime::cpu::CallFrame* call_frame);
+
+            using EntryPoint = std::function<EntryPoint_t>;

            // Compile and execute graphs
            class CallFrame : public ngraph::runtime::CallFrame
            {
            public:
                CallFrame(EntryPoint compiled_function,
-                          size_t n_outputs,
-                          size_t n_inputs,
-                          const TensorViewPtrs& temps,
                          const std::vector<std::shared_ptr<CallFrame>>& callees);

                /// @brief Invoke the function with values matching the signature of the function.
@@ -53,30 +50,25 @@ namespace ngraph
                    operator()(const std::vector<std::shared_ptr<ngraph::runtime::Value>>& inputs,
                               const std::vector<std::shared_ptr<ngraph::runtime::Value>>& outputs);

-                /// @brief Invoke the function with tuples pre-expanded to their underlying tensor views.
-                void tensor_call(const TensorViewPtrs& inputs, const TensorViewPtrs& outputs);
+                /// @brief Invoke the function with tuples pre-expanded to their underlying
+                /// tensor views.
+                void tensor_call(const std::vector<std::shared_ptr<TensorView>>& inputs,
+                                 const std::vector<std::shared_ptr<TensorView>>& outputs);

                void set_return() { m_return = true; }
-                std::shared_ptr<TensorView> get_tensor_view(size_t i) { return m_tensor_views[i]; }
-                template <typename ET>
-                ParameterizedTensorView<ET>* get_parameterized_tensor_view(size_t i)
-                {
-                    return m_tensor_views[i]->get_parameterized_tensor_view<ET>();
-                }
+                // const std::vector<std::shared_ptr<ngraph::runtime::Value>>& get_inputs();
+                // const std::vector<std::shared_ptr<ngraph::runtime::Value>>& get_outputs();

-                template <typename ET>
-                typename ET::type* get_tensor_view_data(size_t i)
-                {
-                    return &get_parameterized_tensor_view<ET>(i)->get_vector()[0];
-                }
+                void* get_input_data(size_t index);
+                void* get_output_data(size_t index);

            protected:
-                size_t m_n_outputs;
-                size_t m_n_inputs;
-                TensorViewPtrs m_tensor_views;
                bool m_return;
                EntryPoint m_compiled_function;
                std::vector<std::shared_ptr<CallFrame>> m_callees;
+
+                std::vector<void*> m_inputs;
+                std::vector<void*> m_outputs;
            };
        }
    }

--- a/src/ngraph/runtime/cpu/cpu_backend.cpp
+++ b/src/ngraph/runtime/cpu/cpu_backend.cpp
@@ -13,12 +13,25 @@
 // ----------------------------------------------------------------------------

 #include "ngraph/runtime/cpu/cpu_backend.hpp"
+#include "ngraph/runtime/cpu/tensor_view.hpp"
 #include "ngraph/runtime/external_function.hpp"

-using namespace ngraph::runtime::cpu;
+using namespace ngraph;
+using namespace std;

-std::shared_ptr<ngraph::runtime::CallFrame>
-    CPUBackend::make_call_frame(const std::shared_ptr<ExternalFunction>& external_function)
+extern "C" void
+    allocate_aligned_buffer(size_t size, size_t alignment, char** allocated, char** aligned_ptr);
+
+std::shared_ptr<ngraph::runtime::CallFrame> runtime::cpu::CPUBackend::make_call_frame(
+    const std::shared_ptr<ExternalFunction>& external_function)
 {
    return external_function->make_call_frame();
 }
+
+std::shared_ptr<ngraph::runtime::TensorView>
+    runtime::cpu::CPUBackend::make_primary_tensor_view(const ngraph::element::Type& element_type,
+                                                       const Shape& shape)
+{
+    auto rc = make_shared<runtime::cpu::CPUTensorView>(element_type, shape);
+    return dynamic_pointer_cast<runtime::TensorView>(rc);
+}
--- a/src/ngraph/runtime/cpu/cpu_backend.hpp
+++ b/src/ngraph/runtime/cpu/cpu_backend.hpp
@@ -22,11 +22,18 @@ namespace ngraph
    {
        namespace cpu
        {
-            class CPUBackend : public Backend
+            static size_t alignment = 64;
+
+            class CPUBackend : public runtime::Backend
            {
            public:
-                virtual std::shared_ptr<ngraph::runtime::CallFrame> make_call_frame(
-                    const std::shared_ptr<ngraph::runtime::ExternalFunction>& external_function);
+                std::shared_ptr<ngraph::runtime::CallFrame> make_call_frame(
+                    const std::shared_ptr<ngraph::runtime::ExternalFunction>& external_function)
+                    override;
+
+                std::shared_ptr<ngraph::runtime::TensorView>
+                    make_primary_tensor_view(const ngraph::element::Type& element_type,
+                                             const Shape& shape) override;
            };
        }
    }

--- a/src/ngraph/runtime/cpu/eigen_utils.hpp
+++ b/src/ngraph/runtime/cpu/eigen_utils.hpp
@@ -37,25 +37,24 @@ namespace ngraph
                using DynamicStrides = Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic>;
                using VectorStrides = Eigen::Stride<Eigen::Dynamic, 1>;

-                template <typename ET>
-                using DynamicArray =
-                    Eigen::Array<typename ET::type, Eigen::Dynamic, Eigen::Dynamic>;
+                template <typename T>
+                using DynamicArray = Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic>;

-                template <typename ET>
-                using EigenArrayBase = Eigen::Map<DynamicArray<ET>, 0, DynamicStrides>;
+                template <typename T>
+                using EigenArrayBase = Eigen::Map<DynamicArray<T>, 0, DynamicStrides>;

-                template <typename ET>
-                using DynamicMatrix = Eigen::
-                    Matrix<typename ET::type, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
+                template <typename T>
+                using DynamicMatrix =
+                    Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;

-                template <typename ET>
-                using EigenMatrixBase = Eigen::Map<DynamicMatrix<ET>, 0, DynamicStrides>;
+                template <typename T>
+                using EigenMatrixBase = Eigen::Map<DynamicMatrix<T>, 0, DynamicStrides>;

-                template <typename ET>
-                using DynamicVector = Eigen::Matrix<typename ET::type, Eigen::Dynamic, 1>;
+                template <typename T>
+                using DynamicVector = Eigen::Matrix<T, Eigen::Dynamic, 1>;

-                template <typename ET>
-                using EigenVectorBase = Eigen::Map<DynamicVector<ET>, 0, VectorStrides>;
+                template <typename T>
+                using EigenVectorBase = Eigen::Map<DynamicVector<T>, 0, VectorStrides>;

                namespace fmt
                {
@@ -117,7 +116,7 @@ namespace ngraph
                // ET element type
                // FMT array format (fmt::V for vector, etc.)
                // BASE select array/matrix
-                template <typename ET,
+                template <typename T,
                          typename FMT,
                          typename BASE,
                          typename STRIDES = DynamicStrides>
@@ -126,26 +125,19 @@ namespace ngraph
                    using base = BASE;

                public:
-                    EigenWrapper(typename ET::type* t, const FMT& fmt)
+                    EigenWrapper(T* t, const FMT& fmt)
                        : base(t, fmt.l0, fmt.l1, STRIDES(fmt.s0, fmt.s1))
                    {
                    }

                    EigenWrapper(
-                        typename ET::type* t,
+                        T* t,
                        const std::shared_ptr<ngraph::descriptor::layout::DenseTensorViewLayout>&
                            layout)
                        : base(t, layout->get_size(), 1, DynamicStrides(1, 1))
                    {
                    }

-                    EigenWrapper(CallFrame* call_frame, const TensorViewInfo& tensor_view_info)
-                        : EigenWrapper(
-                              call_frame->get_tensor_view_data<ET>(tensor_view_info.get_index()),
-                              FMT(tensor_view_info))
-                    {
-                    }
-
                    template <typename U>
                    EigenWrapper& operator=(const U& other)
                    {
@@ -154,17 +146,17 @@ namespace ngraph
                    }
                };

-                template <typename ET, typename FMT = fmt::V>
-                using EigenArray1d = EigenWrapper<ET, FMT, EigenArrayBase<ET>>;
+                template <typename T, typename FMT = fmt::V>
+                using EigenArray1d = EigenWrapper<T, FMT, EigenArrayBase<T>>;

-                template <typename ET, typename FMT = fmt::M>
-                using EigenArray2d = EigenWrapper<ET, FMT, EigenArrayBase<ET>>;
+                template <typename T, typename FMT = fmt::M>
+                using EigenArray2d = EigenWrapper<T, FMT, EigenArrayBase<T>>;

-                template <typename ET, typename FMT = fmt::M>
-                using EigenMatrix = EigenWrapper<ET, FMT, EigenMatrixBase<ET>>;
+                template <typename T, typename FMT = fmt::M>
+                using EigenMatrix = EigenWrapper<T, FMT, EigenMatrixBase<T>>;

-                template <typename ET, typename FMT = fmt::V>
-                using EigenVector = EigenWrapper<ET, FMT, EigenVectorBase<ET>, VectorStrides>;
+                template <typename T, typename FMT = fmt::V>
+                using EigenVector = EigenWrapper<T, FMT, EigenVectorBase<T>, VectorStrides>;
            }
        }
    }

--- a/src/ngraph/runtime/cpu/emitter.cpp
+++ b/src/ngraph/runtime/cpu/emitter.cpp
@@ -34,8 +34,10 @@
 #include "ngraph/runtime/cpu/emitter.hpp"
 #include "ngraph/runtime/cpu/external_function.hpp"
 #include "ngraph/runtime/tensor_view_info.hpp"
+#include "ngraph/util.hpp"

 using namespace std;
+using namespace ngraph;
 using namespace ngraph::runtime::cpu;

 using ngraph::descriptor::layout::DenseTensorViewLayout;
@@ -54,34 +56,18 @@ static unordered_map<type_index, string> element_type_names = {

 #define EIGEN_VECTOR_FORMAT(x) "fmt::V{" + to_string(x) + "}"

+string eigen_vector_format(const runtime::TensorViewInfo& info)
+{
+    stringstream ss;
+    ss << "fmt::V{" << info.get_layout<DenseTensorViewLayout>()->get_size() << "}";
+    return ss.str();
+}
+
 static std::string EIGEN_MATRIX_FORMAT(const ngraph::Shape& shape, const ngraph::Strides& strides)
 {
-    std::string I;
-    for (size_t i = 0; i < shape.size(); i++)
-    {
-        if (!i)
-        {
-            I += "fmt::M{{" + to_string(shape[i]);
-        }
-        else
-        {
-            I += ", " + to_string(shape[i]);
-        }
-    }
-    I += "}, ";
-    for (size_t i = 0; i < strides.size(); i++)
-    {
-        if (!i)
-        {
-            I += "{" + to_string(strides[i]);
-        }
-        else
-        {
-            I += ", " + to_string(strides[i]);
-        }
-    }
-    I += "}}";
-    return I;
+    stringstream ss;
+    ss << "fmt::M{{" << join(shape) << "}, {" << join(strides) << "}}";
+    return ss.str();
 }

 void Emitter::EMITTER_DECL(EmitNop)
@@ -93,18 +79,21 @@ void Emitter::EMITTER_DECL(EmitAdd)
    const element::Type& et =
        (dynamic_pointer_cast<const TensorViewType>(n->get_arguments().at(0)->get_value_type()))
            ->get_element_type();
-
-    TU += "    {\n"
-          "        auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" + to_string(inputs[0].get_index()) + ");\n"
-          "        auto arg1 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" + to_string(inputs[1].get_index()) + ");\n"
-          "        auto out  = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" + to_string(outputs[0].get_index()) + ");\n"
-          "        EigenArray1d<" + element_type_names[TI(et)] + ">(out, "
-                   EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
-          "        EigenArray1d<" + element_type_names[TI(et)] + ">(arg0, "
-                   EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") +\n"
-          "        EigenArray1d<" + element_type_names[TI(et)] + ">(arg1, "
-                   EIGEN_VECTOR_FORMAT(inputs[1].get_layout<DenseTensorViewLayout>()->get_size()) ");\n"
-          "    }\n";
+    string type = et.c_type_string();
+
+    TU.indent++;
+    TU << "{ // " << n->get_name() << "\n";
+    TU.indent++;
+    TU << "EigenArray1d<" << type << ">(" << outputs[0].get_tensor().get_name() << ", "
+       << eigen_vector_format(outputs[0]) << ") =\n";
+    TU.indent++;
+    TU << "EigenArray1d<" << type << ">(" << inputs[0].get_tensor().get_name() << ", "
+       << eigen_vector_format(inputs[0]) << ") +\n";
+    TU << "EigenArray1d<" << type << ">(" << inputs[1].get_tensor().get_name() << ", "
+       << eigen_vector_format(inputs[1]) << ");\n";
+    TU.indent -= 2;
+    TU << "}\n";
+    TU.indent--;
 }

 void Emitter::EMITTER_DECL(EmitDot)
@@ -248,16 +237,14 @@ void Emitter::EMITTER_DECL(EmitMultiply)
    const element::Type& et =
        (dynamic_pointer_cast<const TensorViewType>(n->get_arguments().at(0)->get_value_type()))
            ->get_element_type();
+    string type = et.c_type_string();

-    TU += "    {\n"
-          "        auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" + to_string(inputs[0].get_index()) + ");\n"
-          "        auto arg1 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" + to_string(inputs[1].get_index()) + ");\n"
-          "        auto out  = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" + to_string(outputs[0].get_index()) + ");\n"
-          "        EigenArray1d<" + element_type_names[TI(et)] + ">(out, "
+    TU += "    { // " + n->get_name() + "\n"
+          "        EigenArray1d<" + type + ">(" + outputs[0].get_tensor().get_name() + ", "
                   EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
-          "        EigenArray1d<" + element_type_names[TI(et)] + ">(arg0, "
+          "            EigenArray1d<" + type + ">(" + inputs[0].get_tensor().get_name() + ", "
                   EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") *\n"
-          "        EigenArray1d<" + element_type_names[TI(et)] + ">(arg1, "
+          "            EigenArray1d<" + type + ">(" + inputs[1].get_tensor().get_name() + ", "
                   EIGEN_VECTOR_FORMAT(inputs[1].get_layout<DenseTensorViewLayout>()->get_size()) ");\n"
          "    }\n";
 }
@@ -1058,6 +1045,7 @@ void Emitter::EMITTER_DECL(EmitReshape)

 void Emitter::EMITTER_DECL(EmitFunctionCall)
 {
+    NGRAPH_INFO;
    auto function_call = static_cast<const op::FunctionCall*>(n);
    auto function = function_call->get_function();

@@ -1073,6 +1061,7 @@ void Emitter::EMITTER_DECL(EmitFunctionCall)
        function_map.insert({function, external});
    }

+    NGRAPH_INFO;
    std::shared_ptr<CallFrame> cf =
        std::dynamic_pointer_cast<CallFrame>(external->make_call_frame());

@@ -1110,6 +1099,7 @@ void Emitter::EMITTER_DECL(EmitFunctionCall)

 void Emitter::EMITTER_DECL(EmitReduce)
 {
+    NGRAPH_INFO;
    auto reduce = static_cast<const op::Reduce*>(n);
    auto reduction_function = reduce->get_reduction_function();


--- a/src/ngraph/runtime/cpu/external_function.cpp
+++ b/src/ngraph/runtime/cpu/external_function.cpp
@@ -162,12 +162,20 @@ static const OpMap dispatcher{
    {TI(ngraph::op::Atan), &Emitter::EmitAtan},
 };

-#undef TI
+static unordered_map<type_index, string> element_type_names = {
+    {TI(ngraph::element::Bool), "Bool"},
+    {TI(ngraph::element::Float32), "Float32"},
+    {TI(ngraph::element::Int8), "Int8"},
+    {TI(ngraph::element::Int32), "Int32"},
+    {TI(ngraph::element::Int64), "Int64"},
+    {TI(ngraph::element::UInt8), "UInt8"},
+    {TI(ngraph::element::UInt32), "UInt32"},
+    {TI(ngraph::element::UInt64), "UInt64"}};

 ExternalFunction::ExternalFunction(const std::shared_ptr<ngraph::Function>& function,
                                   bool release_function)
    : ngraph::runtime::ExternalFunction(function, release_function)
-    , compiled_function(nullptr)
+    , m_compiled_function(nullptr)
 {
 }

@@ -183,7 +191,7 @@ void ExternalFunction::compile(FunctionMap& function_map)
    // For now, just make everyone row-major.
    pass_manager.register_pass<pass::AssignLayout<DenseTensorViewLayout>>();
    pass_manager.register_pass<pass::Liveness>();
-    pass_manager.register_pass<pass::MemoryLayout>();
+    pass_manager.register_pass<pass::MemoryLayout>(64);
    pass_manager.run_passes(m_function);

    // Determine tensor requirements for the call frame
@@ -228,7 +236,10 @@ void ExternalFunction::compile(FunctionMap& function_map)
    // Now we build the TU
    Emitter emitter;
    codegen::CodeWriter& TU = emitter.get_code_writer();
-    TU += R"(// Generated by the NGraph CPU backend
+    string function_name = m_function->get_name() + "_entrypoint";
+
+    TU +=
+        R"(// Generated by the NGraph CPU backend
 #include <algorithm>
 #include <cmath>
 #include <memory>
@@ -246,16 +257,20 @@ using namespace ngraph::element;
 using namespace ngraph::runtime;
 using namespace ngraph::runtime::cpu::eigen;

-extern "C" void allocate_aligned_buffer(size_t size, size_t alignment, char** allocated, char** aligned_ptr);
-extern "C" void free_aligned_buffer(void* allocated);
+extern "C" void allocate_aligned_buffer(
+    size_t size,
+    size_t alignment,
+    char** allocated,
+    char** aligned_ptr);

+extern "C" void free_aligned_buffer(void* allocated);

-extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
-                             ngraph::runtime::TensorViewPtrs& tensor_views,
-                             const std::vector<std::shared_ptr<ngraph::runtime::cpu::CallFrame>>& callees)
-{
 )";

+    TU << "extern \"C\" void " << function_name << "(\n";
+    TU << "    ngraph::runtime::cpu::CallFrame * call_frame)\n";
+    TU << "{\n";
+
    TU.indent++;
    TU << "// Allocate the memory pool\n";
    size_t temp_pool_size = pass_manager.get_state().get_temporary_pool_size();
@@ -265,19 +280,50 @@ extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
       << ", &allocated_buffer_pool, &aligned_buffer_pool);\n";
    TU << "\n";

-    TU << "// Define tensors\n";
-    // for (shared_ptr<Node> node : m_function->get_ordered_ops())
-    // {
-    //     NGRAPH_INFO << *node;
-    //     for (descriptor::Tensor* tensor : node->liveness_new_list)
-    //     {
-    //         NGRAPH_INFO << *tensor;
-    //     }
-    // }
+    TU << "// Define temporary tensors\n";
+    for (shared_ptr<Node> node : m_function->get_ordered_ops())
+    {
+        for (descriptor::Tensor* tensor : node->liveness_new_list)
+        {
+            TU << tensor->get_element_type() << "* " << tensor->get_name() << " = ("
+               << tensor->get_element_type() << "*)(aligned_buffer_pool + "
+               << tensor->get_pool_offset() << ");\n";
+        }
+    }
+    TU << "\n";
+
+    TU << "// Define inputs\n";
+    size_t arg_index = 0;
+    for (shared_ptr<op::Parameter> param : m_function->get_parameters())
+    {
+        for (const descriptor::Output& output : param->get_outputs())
+        {
+            shared_ptr<descriptor::TensorView> tv = output.get_tensor_view();
+            const element::Type& et = tv->get_tensor_view_type()->get_element_type();
+            string type = et.c_type_string();
+            TU << "" << type << "* " << tv->get_tensor().get_name() << " = static_cast<" << type
+               << "*>(call_frame->get_input_data(" << arg_index << "));\n";
+            arg_index++;
+        }
+    }
+    TU << "\n";
+
+    TU << "// Define outputs\n";
+    size_t output_index = 0;
+    for (const descriptor::Output& output : m_function->get_result()->get_outputs())
+    {
+        shared_ptr<descriptor::TensorView> tv = output.get_tensor_view();
+        const element::Type& et = tv->get_tensor_view_type()->get_element_type();
+        string type = et.c_type_string();
+        TU << type << "* " << tv->get_tensor().get_name() << " = static_cast<" << type
+           << "*>(call_frame->get_output_data(" << output_index << "));\n";
+        output_index++;
+    }
    TU << "\n";

    TU << "// Define tensor views\n";
    TU << "\n";
+
    TU.indent--;

    for (shared_ptr<Node> node : m_function->get_ordered_ops())
@@ -328,11 +374,8 @@ extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
    assert(llvm_module);
    estate.add_module(llvm_module);
    estate.finalize();
-    compiled_function =
-        estate.find_function<void(ngraph::runtime::cpu::CallFrame*,
-                                  ngraph::runtime::TensorViewPtrs&,
-                                  const std::vector<std::shared_ptr<CallFrame>>&)>("__entrypoint");
-    assert(compiled_function);
+    m_compiled_function = estate.find_function<EntryPoint_t>(function_name);
+    assert(m_compiled_function);

    m_is_compiled = true;
    if (m_release_function)
@@ -341,53 +384,6 @@ extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
    }
 }

-// Suppress Clang's complaints about the ,##__VA_ARGS__ token-pasting hack, which is a GNU extension
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments"
-
-#define DO_ON_ELEMENT_TYPE(et, err_msg, macro, ...)                                                \
-    {                                                                                              \
-        if (et == element::Bool::element_type())                                                   \
-        {                                                                                          \
-            macro(element::Bool, ##__VA_ARGS__);                                                   \
-        }                                                                                          \
-        else if (et == element::Float32::element_type())                                           \
-        {                                                                                          \
-            macro(element::Float32, ##__VA_ARGS__);                                                \
-        }                                                                                          \
-        else if (et == element::Int8::element_type())                                              \
-        {                                                                                          \
-            macro(element::Int8, ##__VA_ARGS__);                                                   \
-        }                                                                                          \
-        else if (et == element::Int32::element_type())                                             \
-        {                                                                                          \
-            macro(element::Int32, ##__VA_ARGS__);                                                  \
-        }                                                                                          \
-        else if (et == element::Int64::element_type())                                             \
-        {                                                                                          \
-            macro(element::Int64, ##__VA_ARGS__);                                                  \
-        }                                                                                          \
-        else if (et == element::UInt8::element_type())                                             \
-        {                                                                                          \
-            macro(element::UInt8, ##__VA_ARGS__);                                                  \
-        }                                                                                          \
-        else if (et == element::UInt32::element_type())                                            \
-        {                                                                                          \
-            macro(element::UInt32, ##__VA_ARGS__);                                                 \
-        }                                                                                          \
-        else if (et == element::UInt64::element_type())                                            \
-        {                                                                                          \
-            macro(element::UInt64, ##__VA_ARGS__);                                                 \
-        }                                                                                          \
-        else                                                                                       \
-        {                                                                                          \
-            throw ngraph_error(err_msg);                                                           \
-        }                                                                                          \
-    }
-
-// Turn off complaint suppression (see above)
-#pragma clang diagnostic pop
-
 shared_ptr<ngraph::runtime::CallFrame> ExternalFunction::make_call_frame()
 {
    FunctionMap function_map;
@@ -397,17 +393,5 @@ shared_ptr<ngraph::runtime::CallFrame> ExternalFunction::make_call_frame()
        compile(function_map);
    }

-    std::vector<std::shared_ptr<ngraph::runtime::TensorView>> temps;
-    for (auto tv : m_temp_views)
-    {
-        auto& et = tv->get_tensor_view_type()->get_element_type();
-        auto shape = tv->get_tensor_view_type()->get_shape();
-
-#define M(T) temps.push_back(ngraph::runtime::make_tensor<T>(shape));
-        DO_ON_ELEMENT_TYPE(
-            et, "Internal error: tried to create temporary for unhandled element type", M);
-#undef M
-    }
-    return make_shared<ngraph::runtime::cpu::CallFrame>(
-        compiled_function, m_n_outputs, m_n_inputs, temps, callees);
+    return make_shared<ngraph::runtime::cpu::CallFrame>(m_compiled_function, callees);
 }
--- a/src/ngraph/runtime/cpu/external_function.hpp
+++ b/src/ngraph/runtime/cpu/external_function.hpp
@@ -22,6 +22,7 @@

 #include "ngraph/codegen/compiler.hpp"
 #include "ngraph/function.hpp"
+#include "ngraph/runtime/cpu/call_frame.hpp"
 #include "ngraph/runtime/external_function.hpp"
 #include "ngraph/runtime/tensor_view_info.hpp"

@@ -47,11 +48,6 @@ namespace ngraph

            using OpMap = std::unordered_map<std::type_index, OpFunction>;

-            using EntryPoint = std::function<void(
-                ngraph::runtime::cpu::CallFrame*,
-                ngraph::runtime::TensorViewPtrs&,
-                const std::vector<std::shared_ptr<ngraph::runtime::cpu::CallFrame>>&)>;
-
            class ExternalFunction : public ngraph::runtime::ExternalFunction
            {
            public:
@@ -65,7 +61,7 @@ namespace ngraph
                size_t m_n_inputs;
                size_t m_n_outputs;
                ngraph::descriptor::TensorViewPtrs m_temp_views;
-                EntryPoint compiled_function;
+                EntryPoint m_compiled_function;
                std::vector<std::shared_ptr<CallFrame>> callees;
            };
        }

--- a/src/ngraph/runtime/cpu/tensor_view.cpp
+++ b/src/ngraph/runtime/cpu/tensor_view.cpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+
+#include <memory>
+
+#include "cpu_backend.hpp"
+#include "tensor_view.hpp"
+
+using namespace ngraph;
+using namespace std;
+
+extern "C" void
+    allocate_aligned_buffer(size_t size, size_t alignment, char** allocated, char** aligned_ptr);
+extern "C" void free_aligned_buffer(void* allocated);
+
+runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_type,
+                                           const Shape& shape)
+    : runtime::TensorView(std::make_shared<ngraph::descriptor::PrimaryTensorView>(
+          std::make_shared<ngraph::TensorViewType>(element_type, shape), "external", true, true))
+{
+    m_descriptor->set_tensor_view_layout(
+        std::make_shared<ngraph::descriptor::layout::DenseTensorViewLayout>(*m_descriptor));
+
+    m_buffer_size = m_descriptor->get_tensor_view_layout()->get_size() * element_type.size();
+    NGRAPH_INFO << m_buffer_size;
+    allocate_aligned_buffer(m_buffer_size, runtime::cpu::alignment, &m_allocated, &m_buffer);
+}
+
+runtime::cpu::CPUTensorView::~CPUTensorView()
+{
+    NGRAPH_INFO;
+    free_aligned_buffer(m_allocated);
+    NGRAPH_INFO;
+}
+
+char* runtime::cpu::CPUTensorView::get_data_ptr()
+{
+    return m_buffer;
+}
+
+const char* runtime::cpu::CPUTensorView::get_data_ptr() const
+{
+    return m_buffer;
+}
+
+void runtime::cpu::CPUTensorView::write(const void* source, size_t tensor_offset, size_t n)
+{
+    if (tensor_offset + n > m_buffer_size)
+    {
+        NGRAPH_INFO << m_buffer_size;
+        NGRAPH_INFO << n;
+        NGRAPH_INFO << tensor_offset;
+        throw out_of_range("write access past end of tensor");
+    }
+    char* target = get_data_ptr();
+    memcpy(&target[tensor_offset], source, n);
+}
+
+void runtime::cpu::CPUTensorView::read(void* target, size_t tensor_offset, size_t n) const
+{
+    if (tensor_offset + n > m_buffer_size)
+    {
+        NGRAPH_INFO << m_buffer_size;
+        NGRAPH_INFO << n;
+        NGRAPH_INFO << tensor_offset;
+        throw out_of_range("read access past end of tensor");
+    }
+    const char* source = get_data_ptr();
+    memcpy(target, &source[tensor_offset], n);
+}
--- a/src/ngraph/runtime/cpu/tensor_view.hpp
+++ b/src/ngraph/runtime/cpu/tensor_view.hpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+
+#pragma once
+
+#include <memory>
+
+#include "ngraph/runtime/tensor_view.hpp"
+#include "ngraph/types/element_type.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            class CPUTensorView;
+        }
+    }
+}
+
+class ngraph::runtime::cpu::CPUTensorView : public ngraph::runtime::TensorView
+{
+public:
+    CPUTensorView(const ngraph::element::Type& element_type, const Shape& shape);
+    virtual ~CPUTensorView();
+
+    char* get_data_ptr();
+    const char* get_data_ptr() const;
+
+    /// @brief Write bytes directly into the tensor
+    /// @param p Pointer to source of data
+    /// @param tensor_offset Offset into tensor storage to begin writing. Must be element-aligned.
+    /// @param n Number of bytes to write, must be integral number of elements.
+    void write(const void* p, size_t tensor_offset, size_t n) override;
+
+    /// @brief Read bytes directly from the tensor
+    /// @param p Pointer to destination for data
+    /// @param tensor_offset Offset into tensor storage to begin reading. Must be element-aligned.
+    /// @param n Number of bytes to read, must be integral number of elements.
+    void read(void* p, size_t tensor_offset, size_t n) const override;
+
+private:
+    char* m_allocated;
+    char* m_buffer;
+    size_t m_buffer_size;
+};
--- a/src/ngraph/runtime/ndarray.hpp
+++ b/src/ngraph/runtime/ndarray.hpp
@@ -179,6 +179,8 @@ namespace ngraph
            const vtype get_vector() const { return m_elements; }
            operator const vtype() const { return m_elements; }
            operator vtype() { return m_elements; }
+            void* data() { return m_elements.data(); }
+            const void* data() const { return m_elements.data(); }
            bool operator==(const NDArrayBase<T>& other) const
            {
                return m_shape == other.m_shape && m_elements == other.m_elements;

--- a/src/ngraph/runtime/tensor_view.hpp
+++ b/src/ngraph/runtime/tensor_view.hpp
@@ -18,8 +18,11 @@
 #include <vector>

 #include "ngraph/descriptor/tensor_view.hpp"
+#include "ngraph/log.hpp"
+#include "ngraph/runtime/ndarray.hpp"
 #include "ngraph/runtime/value.hpp"
 #include "ngraph/shape.hpp"
+#include "ngraph/util.hpp"

 namespace ngraph
 {
@@ -42,7 +45,6 @@ namespace ngraph
            }

        public:
-            TensorView() {}
            virtual ~TensorView() {}
            template <typename ET>
            ParameterizedTensorView<ET>* get_parameterized_tensor_view()
@@ -75,6 +77,29 @@ namespace ngraph
            /// @param n Number of bytes to read, must be integral number of elements.
            virtual void read(void* p, size_t tensor_offset, size_t n) const = 0;

+            // This is for unit test only
+            template <typename T>
+            bool operator==(const NDArrayBase<T>& ndarray) const
+            {
+                bool rc = false;
+                if (get_shape() == ndarray.get_shape())
+                {
+                    std::vector<T> lhs(ndarray.get_vector().size());
+                    read(lhs.data(), 0, ndarray.get_vector().size() * sizeof(T));
+                    rc = (lhs == ndarray.get_vector());
+                }
+                return rc;
+            }
+            template <typename T>
+            std::vector<T> get_vector()
+            {
+                size_t element_count = shape_size(get_shape());
+                size_t size = element_count * sizeof(T);
+                std::vector<T> rc(element_count);
+                read(rc.data(), 0, size);
+                return rc;
+            }
+
        protected:
            std::shared_ptr<ngraph::descriptor::TensorView> m_descriptor;
        };

--- a/src/ngraph/runtime/tensor_view_info.hpp
+++ b/src/ngraph/runtime/tensor_view_info.hpp
@@ -27,9 +27,10 @@ namespace ngraph
        {
        public:
            TensorViewInfo(size_t index,
-                           const std::shared_ptr<const ngraph::descriptor::TensorView>& descriptor)
+                           std::shared_ptr<const ngraph::descriptor::TensorView> descriptor)
                : m_index(index)
                , m_layout(descriptor->get_tensor_view_layout())
+                , m_tensor_view(descriptor)
            {
            }

@@ -46,9 +47,20 @@ namespace ngraph
                return std::static_pointer_cast<LT>(m_layout);
            }

+            std::shared_ptr<const ngraph::descriptor::TensorView> get_tensor_view() const
+            {
+                return m_tensor_view;
+            }
+
+            const ngraph::descriptor::Tensor& get_tensor() const
+            {
+                return m_tensor_view->get_tensor();
+            }
+
        protected:
            size_t m_index;
            std::shared_ptr<ngraph::descriptor::layout::TensorViewLayout> m_layout;
+            std::shared_ptr<const ngraph::descriptor::TensorView> m_tensor_view;
        };
    }
 }
--- a/test/backend_test.in.cpp
+++ b/test/backend_test.in.cpp