extend executable create tensor APIs (#4163)

* style apply * update CPUTensorView with memory_pointers * add wait_to_read and wait_to_write * remove nullptr default and add second set of APIs * fix int_executable APIs Co-authored-by: Scott Cyphers <diyessi@users.noreply.github.com>

extend executable create tensor APIs (#4163)
* style apply * update CPUTensorView with memory_pointers * add wait_to_read and wait_to_write * remove nullptr default and add second set of APIs * fix int_executable APIs Co-authored-by: Scott Cyphers <diyessi@users.noreply.github.com>
b8419c35 · Ashok Emani · GitHub · 2651f738 · b8419c35 · b8419c35
Unverified Commit b8419c35 authored Jan 29, 2020 by Ashok Emani Committed by GitHub Jan 29, 2020
5 changed files
--- a/src/ngraph/runtime/cpu/cpu_backend.cpp
+++ b/src/ngraph/runtime/cpu/cpu_backend.cpp
@@ -231,6 +231,14 @@ shared_ptr<runtime::Tensor> runtime::cpu::CPU_Executable::create_input_tensor(si
                                                    parameter->get_shape());
 }

+shared_ptr<runtime::Tensor> runtime::cpu::CPU_Executable::create_input_tensor(size_t input_index,
+                                                                              void* memory_pointer)
+{
+    shared_ptr<op::Parameter> parameter = get_parameter(input_index);
+    return make_shared<runtime::cpu::CPUTensorView>(
+        parameter->get_element_type(), parameter->get_shape(), memory_pointer);
+}
+
 shared_ptr<runtime::Tensor> runtime::cpu::CPU_Executable::create_output_tensor(size_t output_index)
 {
    shared_ptr<op::Result> result = get_result(output_index);
@@ -238,16 +246,37 @@ shared_ptr<runtime::Tensor> runtime::cpu::CPU_Executable::create_output_tensor(s
                                                    result->get_shape());
 }

+shared_ptr<runtime::Tensor> runtime::cpu::CPU_Executable::create_output_tensor(size_t output_index,
+                                                                               void* memory_pointer)
+{
+    shared_ptr<op::Result> result = get_result(output_index);
+    return make_shared<runtime::cpu::CPUTensorView>(
+        result->get_element_type(), result->get_shape(), memory_pointer);
+}
+
 vector<shared_ptr<runtime::Tensor>>
    runtime::cpu::CPU_Executable::create_input_tensor(size_t input_index, size_t pipeline_depth)
 {
+    return create_input_tensor(input_index, pipeline_depth, std::vector<void*>{});
+}
+vector<shared_ptr<runtime::Tensor>> runtime::cpu::CPU_Executable::create_input_tensor(
+    size_t input_index, size_t pipeline_depth, std::vector<void*> memory_pointers)
+{
+    bool mem_ptr_size = memory_pointers.size();
+    if (mem_ptr_size > 0)
+    {
+        NGRAPH_CHECK(pipeline_depth == mem_ptr_size,
+                     "create_input_tensor mismatch in pipeline_depth and memory_pointers");
+    }
    vector<shared_ptr<runtime::cpu::CPUTensorView>> tensors;
    shared_ptr<op::Parameter> parameter = get_parameter(input_index);
    for (size_t i = 0; i < pipeline_depth; i++)
    {
        shared_ptr<runtime::cpu::CPUTensorView> tensor;
        auto t = make_shared<runtime::cpu::CPUTensorView>(parameter->get_element_type(),
-                                                          parameter->get_shape());
+                                                          parameter->get_shape(),
+                                                          mem_ptr_size > 0 ? memory_pointers[i]
+                                                                           : nullptr);
        tensor = static_pointer_cast<runtime::cpu::CPUTensorView>(t);
        tensors.push_back(tensor);
    }
@@ -262,13 +291,26 @@ vector<shared_ptr<runtime::Tensor>>
 vector<shared_ptr<runtime::Tensor>>
    runtime::cpu::CPU_Executable::create_output_tensor(size_t output_index, size_t pipeline_depth)
 {
+    return create_output_tensor(output_index, pipeline_depth, std::vector<void*>{});
+}
+vector<shared_ptr<runtime::Tensor>> runtime::cpu::CPU_Executable::create_output_tensor(
+    size_t output_index, size_t pipeline_depth, std::vector<void*> memory_pointers)
+{
+    bool mem_ptr_size = memory_pointers.size();
+    if (mem_ptr_size > 0)
+    {
+        NGRAPH_CHECK(pipeline_depth == mem_ptr_size,
+                     "create_output_tensor mismatch in pipeline_depth and memory_pointers");
+    }
    vector<shared_ptr<runtime::cpu::CPUTensorView>> tensors;
    shared_ptr<op::Result> result = get_result(output_index);
    for (size_t i = 0; i < pipeline_depth; i++)
    {
        shared_ptr<runtime::cpu::CPUTensorView> tensor;
        auto t = make_shared<runtime::cpu::CPUTensorView>(result->get_element_type(),
-                                                          result->get_shape());
+                                                          result->get_shape(),
+                                                          mem_ptr_size > 0 ? memory_pointers[i]
+                                                                           : nullptr);
        tensor = static_pointer_cast<runtime::cpu::CPUTensorView>(t);
        tensors.push_back(tensor);
    }

--- a/src/ngraph/runtime/cpu/cpu_backend.hpp
+++ b/src/ngraph/runtime/cpu/cpu_backend.hpp
@@ -96,14 +96,30 @@ namespace ngraph

                std::shared_ptr<runtime::Tensor> create_input_tensor(size_t input_index) override;

+                std::shared_ptr<runtime::Tensor> create_input_tensor(size_t input_index,
+                                                                     void* memory_pointer) override;
+
                std::shared_ptr<runtime::Tensor> create_output_tensor(size_t output_index) override;

+                std::shared_ptr<runtime::Tensor>
+                    create_output_tensor(size_t output_index, void* memory_pointer) override;
+
+                std::vector<std::shared_ptr<runtime::Tensor>>
+                    create_input_tensor(size_t input_index,
+                                        size_t pipeline_depth,
+                                        std::vector<void*> memory_pointers) override;
+
                std::vector<std::shared_ptr<runtime::Tensor>>
                    create_input_tensor(size_t input_index, size_t pipeline_depth) override;

                std::vector<std::shared_ptr<runtime::Tensor>>
                    create_output_tensor(size_t output_index, size_t pipeline_depth) override;

+                std::vector<std::shared_ptr<runtime::Tensor>>
+                    create_output_tensor(size_t output_index,
+                                         size_t pipeline_depth,
+                                         std::vector<void*> memory_pointers) override;
+
            private:
                std::shared_ptr<ngraph::op::Parameter> get_parameter(size_t index) const;
                std::shared_ptr<ngraph::op::Result> get_result(size_t index) const;

--- a/src/ngraph/runtime/executable.cpp
+++ b/src/ngraph/runtime/executable.cpp
@@ -145,20 +145,44 @@ shared_ptr<runtime::Tensor> runtime::Executable::create_input_tensor(size_t /* i
    throw runtime_error("create_input_tensor unimplemented");
 }

+shared_ptr<runtime::Tensor> runtime::Executable::create_input_tensor(size_t /* input_index */,
+                                                                     void* /* memory_pointer */)
+{
+    throw runtime_error("create_input_tensor unimplemented");
+}
+
 shared_ptr<runtime::Tensor> runtime::Executable::create_output_tensor(size_t /* output_index */)
 {
    throw runtime_error("create_output_tensor unimplemented");
 }

+shared_ptr<runtime::Tensor> runtime::Executable::create_output_tensor(size_t /* output_index */,
+                                                                      void* /* memory_pointer */)
+{
+    throw runtime_error("create_output_tensor unimplemented");
+}
+
 vector<shared_ptr<runtime::Tensor>>
    runtime::Executable::create_input_tensor(size_t /* input_index */, size_t /* pipeline_depth */)
 {
    throw runtime_error("create_input_tensor unimplemented");
 }

+vector<shared_ptr<runtime::Tensor>> runtime::Executable::create_input_tensor(
+    size_t /* input_index */, size_t /* pipeline_depth */, std::vector<void*> /* memory_pointer */)
+{
+    throw runtime_error("create_input_tensor unimplemented");
+}
+
 vector<shared_ptr<runtime::Tensor>>
    runtime::Executable::create_output_tensor(size_t /* output_index */,
                                              size_t /* pipeline_depth */)
 {
    throw runtime_error("create_output_tensor unimplemented");
 }
+
+vector<shared_ptr<runtime::Tensor>> runtime::Executable::create_output_tensor(
+    size_t /* output_index */, size_t /* pipeline_depth */, std::vector<void*> /* memory_pointer */)
+{
+    throw runtime_error("create_output_tensor unimplemented");
+}
--- a/src/ngraph/runtime/executable.hpp
+++ b/src/ngraph/runtime/executable.hpp
@@ -91,12 +91,32 @@ public:
    /// \returns A Tensor
    virtual std::shared_ptr<runtime::Tensor> create_input_tensor(size_t input_index);

+    /// \brief Create an input Tensor
+    /// \param input_index The index position in the input Parameter vector. This would be the same
+    /// order of Parameters passed into the inputs in the call() method.
+    /// \param memory_pointer A pointer to a buffer used for this tensor. The size of the buffer
+    ///     must be sufficient to contain the tensor. The lifetime of the buffer is the
+    ///     responsibility of the caller and must outlive the created Tensor.
+    /// \returns A Tensor
+    virtual std::shared_ptr<runtime::Tensor> create_input_tensor(size_t input_index,
+                                                                 void* memory_pointer);
+
    /// \brief Create an output Tensor
    /// \param output_index The index position in the output Result vector. This would be the same
    /// order of Results passed into the outputs in the call() method.
    /// \returns A Tensor
    virtual std::shared_ptr<runtime::Tensor> create_output_tensor(size_t output_index);

+    /// \brief Create an output Tensor
+    /// \param output_index The index position in the output Result vector. This would be the same
+    /// order of Results passed into the outputs in the call() method.
+    /// \param memory_pointer A pointer to a buffer used for this tensor. The size of the buffer
+    ///     must be sufficient to contain the tensor. The lifetime of the buffer is the
+    ///     responsibility of the caller and must outlive the created Tensor.
+    /// \returns A Tensor
+    virtual std::shared_ptr<runtime::Tensor> create_output_tensor(size_t output_index,
+                                                                  void* memory_pointer);
+
    /// \brief Create a vector of input Tensors
    /// \param input_index The index position in the input Parameter vector. This would be the same
    /// order of Parameters passed into the inputs in the call() method.
@@ -106,6 +126,18 @@ public:
    virtual std::vector<std::shared_ptr<runtime::Tensor>>
        create_input_tensor(size_t input_index, size_t pipeline_depth);

+    /// \brief Create a vector of input Tensors
+    /// \param input_index The index position in the input Parameter vector. This would be the same
+    /// order of Parameters passed into the inputs in the call() method.
+    /// \param pipeline_depth The number of stages in the input pipeline. For double-buffered input
+    /// you would specify pipeline_depth=2
+    /// \param memory_pointers A vector of pointers to buffers used for this tensors. The size of
+    ///     the buffer must be sufficient to contain the tensor. The lifetime of the buffers is the
+    ///     responsibility of the caller and must outlive the created Tensor.
+    /// \returns A vector of Tensors, one for each stage of the pipeline
+    virtual std::vector<std::shared_ptr<runtime::Tensor>> create_input_tensor(
+        size_t input_index, size_t pipeline_depth, std::vector<void*> memory_pointers);
+
    /// \brief Create a vector of output Tensors
    /// \param output_index The index position in the output Result vector. This would be the same
    ///                     order of Results passed into the outputs in the call() method.
@@ -115,6 +147,18 @@ public:
    virtual std::vector<std::shared_ptr<runtime::Tensor>>
        create_output_tensor(size_t output_index, size_t pipeline_depth);

+    /// \brief Create a vector of output Tensors
+    /// \param output_index The index position in the output Result vector. This would be the same
+    ///                     order of Results passed into the outputs in the call() method.
+    /// \param pipeline_depth The number of stages in the output pipeline. For double-buffered
+    ///                       output you would specify pipeline_depth=2
+    /// \param memory_pointers A vector of pointers to buffers used for this tensors. The size of
+    ///     the buffer must be sufficient to contain the tensor. The lifetime of the buffers is the
+    ///     responsibility of the caller and must outlive the created Tensor.
+    /// \returns A vector of Tensors, one for each stage of the pipeline
+    virtual std::vector<std::shared_ptr<runtime::Tensor>> create_output_tensor(
+        size_t output_index, size_t pipeline_depth, std::vector<void*> memory_pointers);
+
 protected:
    /// \brief Called at the end of compile to the values to be returned by get_parameters
    ///        and get_results

--- a/src/ngraph/runtime/tensor.hpp
+++ b/src/ngraph/runtime/tensor.hpp
@@ -103,6 +103,12 @@ namespace ngraph
            /// \param n Number of bytes to read, must be integral number of elements.
            virtual void read(void* p, size_t n) const = 0;

+            /// \brief check tensor for new data, call may block.
+            ///    backends may use this to ensure tensor is updated (eg: lazy eval).
+            virtual void wait_for_read_ready() {}
+            /// \brief notify tensor of new data, call may block.
+            ///    backends may use this as indication of new data in tensor.
+            virtual void wait_for_write_ready() {}
            /// \brief copy bytes directly from source to this tensor
            /// \param source The source tensor
            virtual void copy_from(const ngraph::runtime::Tensor& source) NGRAPH_DEPRECATED(