Interpreter rework (#2030)

* all tests passing * rename a few vars to be consistent with new tensor names

Interpreter rework (#2030)
* all tests passing * rename a few vars to be consistent with new tensor names
6f511762 · Robert Kimball · GitHub · b52a7798 · 6f511762 · 6f511762
Unverified Commit 6f511762 authored Nov 09, 2018 by Robert Kimball Committed by GitHub Nov 09, 2018
5 changed files
--- a/src/ngraph/runtime/interpreter/int_backend.cpp
+++ b/src/ngraph/runtime/interpreter/int_backend.cpp
@@ -24,6 +24,7 @@
 #include "ngraph/pass/like_replacement.hpp"
 #include "ngraph/pass/liveness.hpp"
 #include "ngraph/pass/manager.hpp"
+#include "ngraph/pass/memory_layout.hpp"
 #include "ngraph/util.hpp"

 using namespace std;
@@ -31,6 +32,8 @@ using namespace ngraph;

 using descriptor::layout::DenseTensorLayout;

+const int runtime::interpreter::INTBackend::m_alignment = 64;
+
 extern "C" const char* get_ngraph_version_string()
 {
    return NGRAPH_VERSION;
@@ -63,8 +66,12 @@ bool runtime::interpreter::INTBackend::compile(shared_ptr<Function> function)
        pass_manager.register_pass<pass::LikeReplacement>();
        pass_manager.register_pass<pass::AssignLayout<DenseTensorLayout>>();
        pass_manager.register_pass<pass::Liveness>();
+        pass_manager.register_pass<pass::MemoryLayout>(m_alignment);
        pass_manager.run_passes(function);

+        size_t memory_pool_size = function->get_temporary_pool_size();
+        instance.m_temporary_memory.reset(new AlignedBuffer(memory_pool_size, m_alignment));
+
        for (const shared_ptr<Node>& node : function->get_ordered_ops())
        {
            instance.m_wrapped_nodes.emplace_back(node);
@@ -84,32 +91,36 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
    FunctionInstance& instance = m_function_map[function];

    // convert inputs to HostTensor
-    vector<shared_ptr<runtime::HostTensor>> func_inputs;
-    for (auto tv : inputs)
+    vector<void*> func_inputs;
+    vector<shared_ptr<runtime::HostTensor>> htv_inputs;
+    for (auto tensor : inputs)
    {
-        func_inputs.push_back(static_pointer_cast<runtime::HostTensor>(tv));
+        auto host_tensor = static_pointer_cast<runtime::HostTensor>(tensor);
+        func_inputs.push_back(static_cast<void*>(host_tensor->get_data_ptr()));
+        htv_inputs.push_back(host_tensor);
    }
    if (instance.m_nan_check_enabled)
    {
-        perform_nan_check(func_inputs);
+        perform_nan_check(htv_inputs);
    }

    // convert outputs to HostTensor
-    vector<shared_ptr<runtime::HostTensor>> func_outputs;
-    for (auto tv : outputs)
+    vector<void*> func_outputs;
+    for (auto tensor : outputs)
    {
-        func_outputs.push_back(static_pointer_cast<runtime::HostTensor>(tv));
+        auto host_tensor = static_pointer_cast<runtime::HostTensor>(tensor);
+        func_outputs.push_back(static_cast<void*>(host_tensor->get_data_ptr()));
    }

    // map function params -> HostTensor
-    unordered_map<descriptor::Tensor*, shared_ptr<runtime::HostTensor>> tensor_map;
+    unordered_map<descriptor::Tensor*, void*> tensor_map;
    size_t input_count = 0;
    for (auto param : function->get_parameters())
    {
        for (size_t i = 0; i < param->get_output_size(); ++i)
        {
-            descriptor::Tensor* tv = param->get_output_tensor_ptr(i).get();
-            tensor_map.insert({tv, func_inputs[input_count++]});
+            descriptor::Tensor* tensor = param->get_output_tensor_ptr(i).get();
+            tensor_map.insert({tensor, func_inputs[input_count++]});
        }
    }

@@ -121,8 +132,8 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
        {
            throw ngraph_error("One of function's outputs isn't op::Result");
        }
-        descriptor::Tensor* tv = output->get_output_tensor_ptr(0).get();
-        tensor_map.insert({tv, func_outputs[output_count]});
+        descriptor::Tensor* tensor = output->get_output_tensor_ptr(0).get();
+        tensor_map.insert({tensor, func_outputs[output_count]});
    }

    // for each ordered op in the graph
@@ -134,35 +145,42 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
        {
            continue;
        }
+        if (type_id == OP_TYPEID::Constant)
+        {
+            const op::Constant* c = static_cast<const op::Constant*>(op);
+            descriptor::Tensor* tensor = op->get_output_tensor_ptr(0).get();
+            tensor_map.insert({tensor, const_cast<void*>(c->get_data_ptr())});
+            continue;
+        }
        // get op inputs from map
-        vector<shared_ptr<runtime::HostTensor>> op_inputs;
+        vector<const void*> op_inputs;
        for (const descriptor::Input& input : op->get_inputs())
        {
-            descriptor::Tensor* tv = input.get_output().get_tensor_ptr().get();
-            op_inputs.push_back(tensor_map.at(tv));
+            descriptor::Tensor* tensor = input.get_output().get_tensor_ptr().get();
+            op_inputs.push_back(tensor_map.at(tensor));
        }

        // get op outputs from map or create
-        vector<shared_ptr<runtime::HostTensor>> op_outputs;
+        vector<void*> op_outputs;
+        vector<shared_ptr<runtime::HostTensor>> htv_outputs;
        for (size_t i = 0; i < op->get_output_size(); ++i)
        {
-            descriptor::Tensor* tv = op->get_output_tensor_ptr(i).get();
-            shared_ptr<runtime::HostTensor> htv;
-            auto it = tensor_map.find(tv);
+            descriptor::Tensor* tensor = op->get_output_tensor_ptr(i).get();
+            void* host_tensor = nullptr;
+            auto it = tensor_map.find(tensor);
            if (it == tensor_map.end())
            {
-                // the output tensor is not in the tensor map so create a new tensor
-                const Shape& shape = op->get_output_shape(i);
-                const element::Type& type = op->get_output_element_type(i);
-                string name = op->get_output_tensor(i).get_name();
-                htv = make_shared<runtime::HostTensor>(type, shape, name);
-                tensor_map.insert({tv, htv});
+                auto offset = op->get_output_tensor(i).get_pool_offset();
+                host_tensor = instance.get_temporary_pointer(offset);
+                tensor_map.insert({tensor, host_tensor});
            }
            else
            {
-                htv = it->second;
+                host_tensor = it->second;
            }
-            op_outputs.push_back(htv);
+            op_outputs.push_back(host_tensor);
+            htv_outputs.push_back(make_shared<runtime::HostTensor>(
+                tensor->get_element_type(), tensor->get_shape(), host_tensor));
        }

        // get op type
@@ -202,20 +220,7 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
        }
        if (instance.m_nan_check_enabled)
        {
-            perform_nan_check(op_outputs, op);
-        }
-
-        // delete any obsolete tensors
-        for (const descriptor::Tensor* t : op->liveness_free_list)
-        {
-            for (auto it = tensor_map.begin(); it != tensor_map.end(); ++it)
-            {
-                if (it->second->get_name() == t->get_name())
-                {
-                    tensor_map.erase(it);
-                    break;
-                }
-            }
+            perform_nan_check(htv_outputs, op);
        }
    }

@@ -224,8 +229,8 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,

 void runtime::interpreter::INTBackend::generate_calls(const element::Type& type,
                                                      const NodeWrapper& op,
-                                                      const vector<shared_ptr<HostTensor>>& outputs,
-                                                      const vector<shared_ptr<HostTensor>>& inputs,
+                                                      const vector<void*>& outputs,
+                                                      const vector<const void*>& inputs,
                                                      FunctionInstance& instance)
 {
    if (type == element::boolean)
@@ -307,17 +312,17 @@ vector<runtime::PerformanceCounter>
    return rc;
 }

-void runtime::interpreter::INTBackend::perform_nan_check(const vector<shared_ptr<HostTensor>>& tvs,
-                                                         const Node* op)
+void runtime::interpreter::INTBackend::perform_nan_check(
+    const vector<shared_ptr<HostTensor>>& tensors, const Node* op)
 {
    size_t arg_number = 1;
-    for (shared_ptr<HostTensor> tv : tvs)
+    for (const shared_ptr<HostTensor>& tensor : tensors)
    {
-        const element::Type& type = tv->get_element_type();
+        const element::Type& type = tensor->get_element_type();
        if (type == element::f32)
        {
-            const float* data = tv->get_data_ptr<float>();
-            for (size_t i = 0; i < tv->get_element_count(); i++)
+            const float* data = tensor->get_data_ptr<float>();
+            for (size_t i = 0; i < tensor->get_element_count(); i++)
            {
                if (std::isnan(data[i]))
                {
@@ -335,8 +340,8 @@ void runtime::interpreter::INTBackend::perform_nan_check(const vector<shared_ptr
        }
        else if (type == element::f64)
        {
-            const double* data = tv->get_data_ptr<double>();
-            for (size_t i = 0; i < tv->get_element_count(); i++)
+            const double* data = tensor->get_data_ptr<double>();
+            for (size_t i = 0; i < tensor->get_element_count(); i++)
            {
                if (std::isnan(data[i]))
                {

--- a/src/ngraph/runtime/interpreter/int_backend.hpp
+++ b/src/ngraph/runtime/interpreter/int_backend.hpp
--- a/src/ngraph/runtime/reference/relu.hpp
+++ b/src/ngraph/runtime/reference/relu.hpp
@@ -34,7 +34,7 @@ namespace ngraph
                }
            }
            template <typename T>
-            void relu_backprop(const T* arg, T* delta_arg, T* out, size_t count)
+            void relu_backprop(const T* arg, const T* delta_arg, T* out, size_t count)
            {
                T zero = 0;
                for (size_t i = 0; i < count; i++)

--- a/src/ngraph/runtime/reference/reverse_sequence.hpp
+++ b/src/ngraph/runtime/reference/reverse_sequence.hpp
@@ -34,7 +34,7 @@ namespace ngraph
                                  const Shape& arg_shape,
                                  size_t batch_axis,
                                  size_t sequence_axis,
-                                  U* sequence_lengths)
+                                  const U* sequence_lengths)
            {
                CoordinateTransform input_transform(arg_shape);
                for (const Coordinate& in_coord : input_transform)

--- a/src/ngraph/runtime/reference/sigmoid.hpp
+++ b/src/ngraph/runtime/reference/sigmoid.hpp
@@ -37,7 +37,7 @@ namespace ngraph
            }

            template <typename T>
-            void sigmoid_backprop(const T* arg, T* delta_arg, T* out, size_t count)
+            void sigmoid_backprop(const T* arg, const T* delta_arg, T* out, size_t count)
            {
                T exp_value;
                T func_x;