[PRIVATE] Jbobba/memory allocator (#2707)

* Change Allocator arguments to raw pointers * Change backend API to return a single device allocator object

[PRIVATE] Jbobba/memory allocator (#2707)
* Change Allocator arguments to raw pointers * Change backend API to return a single device allocator object
23c3d356 · Jayaram Bobba · GitHub · 58acd894 · 23c3d356 · 23c3d356
Unverified Commit 23c3d356 authored Apr 04, 2019 by Jayaram Bobba Committed by GitHub Apr 04, 2019
12 changed files
--- a/src/ngraph/runtime/aligned_buffer.cpp
+++ b/src/ngraph/runtime/aligned_buffer.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //*****************************************************************************

+#include <iostream>
 #include <memory>

 #include "ngraph/runtime/aligned_buffer.hpp"
@@ -21,6 +22,7 @@
 #include "ngraph/util.hpp"

 using namespace ngraph;
+using namespace std;

 runtime::AlignedBuffer::AlignedBuffer()
    : m_allocator(nullptr)
@@ -30,22 +32,22 @@ runtime::AlignedBuffer::AlignedBuffer()
 {
 }

-runtime::AlignedBuffer::AlignedBuffer(size_t byte_size,
-                                      size_t alignment,
-                                      std::shared_ptr<ngraph::runtime::Allocator> allocator)
+runtime::AlignedBuffer::AlignedBuffer(size_t byte_size, size_t alignment, Allocator* allocator)
 {
    m_byte_size = byte_size;
    m_allocator = allocator;
    if (m_byte_size > 0)
    {
-        size_t allocation_size = m_byte_size + alignment;
-        m_allocated_buffer = static_cast<char*>(m_allocator->Malloc(allocation_size, alignment));
+        m_allocated_buffer = static_cast<char*>(m_allocator->Malloc(m_byte_size, alignment));
        m_aligned_buffer = m_allocated_buffer;
        size_t mod = size_t(m_aligned_buffer) % alignment;

        if (mod != 0)
        {
-            m_aligned_buffer += (alignment - mod);
+            ostringstream os;
+            os << m_aligned_buffer;
+            throw ngraph_error("Incorrect alignment on newly allocated buffer at address: 0x" +
+                               os.str() + ". Expected alignment: " + to_string(alignment));
        }
    }
    else

--- a/src/ngraph/runtime/aligned_buffer.hpp
+++ b/src/ngraph/runtime/aligned_buffer.hpp
@@ -19,12 +19,13 @@
 #include <cstddef>
 #include <memory>

+#include "ngraph/runtime/allocator.hpp"
+
 namespace ngraph
 {
    namespace runtime
    {
        class AlignedBuffer;
-        class Allocator;
    }
 }

@@ -36,8 +37,7 @@ class ngraph::runtime::AlignedBuffer
 public:
    AlignedBuffer(size_t byte_size,
                  size_t alignment,
-                  std::shared_ptr<ngraph::runtime::Allocator> allocator =
-                      std::make_shared<runtime::Allocator>());
+                  Allocator* allocator = get_ngraph_allocator());
    AlignedBuffer();
    ~AlignedBuffer();

@@ -49,7 +49,7 @@ private:
    AlignedBuffer(AlignedBuffer&&) = delete;
    AlignedBuffer& operator=(const AlignedBuffer&) = delete;

-    std::shared_ptr<ngraph::runtime::Allocator> m_allocator;
+    Allocator* m_allocator;
    char* m_allocated_buffer;
    char* m_aligned_buffer;
    size_t m_byte_size;

--- a/src/ngraph/runtime/allocator.cpp
+++ b/src/ngraph/runtime/allocator.cpp
@@ -16,22 +16,33 @@

 #include "ngraph/runtime/allocator.hpp"

-void* ngraph::runtime::Allocator::Malloc(size_t size, size_t alignment)
+class DefaultNgraphAllocator : public ngraph::runtime::Allocator
 {
-    void* ptr = ngraph::aligned_alloc(alignment, size);
-
-    // check for exception
-    if (!ptr)
+public:
+    void* Malloc(size_t size, size_t alignment)
    {
-        throw ngraph_error("malloc failed to allocate memory of size " + std::to_string(size));
+        void* ptr = ngraph::aligned_alloc(alignment, size);
+
+        // check for exception
+        if (!ptr)
+        {
+            throw ngraph::ngraph_error("malloc failed to allocate memory of size " +
+                                       std::to_string(size));
+        }
+        return ptr;
    }
-    return ptr;
-}

-void ngraph::runtime::Allocator::Free(void* ptr)
-{
-    if (ptr)
+    void Free(void* ptr)
    {
-        ngraph::aligned_free(ptr);
+        if (ptr)
+        {
+            ngraph::aligned_free(ptr);
+        }
    }
+};
+
+ngraph::runtime::Allocator* ngraph::runtime::get_ngraph_allocator()
+{
+    static DefaultNgraphAllocator* allocator = new DefaultNgraphAllocator();
+    return allocator;
 }
--- a/src/ngraph/runtime/allocator.hpp
+++ b/src/ngraph/runtime/allocator.hpp
@@ -27,20 +27,22 @@ namespace ngraph
    namespace runtime
    {
        class Allocator;
+        /// \brief Returns a statically allocated default ngraph allocator
+        //         that calls into system allocation libraries
+        Allocator* get_ngraph_allocator();
    }
 }

-/// \brief Abstract class for the allocator, for allocating and deallocating device memory
+/// \brief Abstract class for the allocator
 class ngraph::runtime::Allocator
 {
 public:
-    virtual ~Allocator() = default;
-    /// \brief allocates the memory on the device with the given size and alignment requirement
+    /// \brief allocates memory with the given size and alignment requirement
    /// \param size exact size of bytes to allocate
    /// \param alignment specifies the alignment. Must be a valid alignment supported by the implementation.
-    virtual void* Malloc(size_t size, size_t alignment);
+    virtual void* Malloc(size_t size, size_t alignment) = 0;

    /// \brief deallocates the memory pointed by ptr
    /// \param ptr pointer to the aligned memory to be released
-    virtual void Free(void* ptr);
+    virtual void Free(void* ptr) = 0;
 };
--- a/src/ngraph/runtime/backend.cpp
+++ b/src/ngraph/runtime/backend.cpp
@@ -69,32 +69,6 @@ void runtime::Backend::remove_compiled_function(std::shared_ptr<Executable> exec
 {
 }

-std::shared_ptr<ngraph::runtime::Allocator> runtime::Backend::get_framework_memory_allocator()
-{
-    return nullptr;
-}
-
-void runtime::Backend::set_framework_memory_allocator(
-    const std::shared_ptr<ngraph::runtime::Allocator>& allocator)
-{
-    // override this method from all supported backends to set its memory allocator to
-    // framework passed memory allocator
-}
-
-ngraph::runtime::AllocateFunc runtime::Backend::get_device_memory_alloc()
-{
-    // override this method from all supported backends to return memory allocator
-    // which allocates device pinned memory
-    return nullptr;
-}
-
-ngraph::runtime::DestroyFunc runtime::Backend::get_device_memory_dealloc()
-{
-    // override this method from all supported backends to return memory de-allocator
-    // which de-allocates device pinned memory
-    return nullptr;
-}
-
 bool runtime::Backend::is_device_memory(void* ptr)
 {
    // override this method for each supported backend to determine if the passed pointer is in

--- a/src/ngraph/runtime/backend.hpp
+++ b/src/ngraph/runtime/backend.hpp
@@ -31,10 +31,6 @@ namespace ngraph
 {
    namespace runtime
    {
-        // aliases for framework provided function pointers as defined in onnx runtime
-        using AllocateFunc = void* (*)(void*, size_t, size_t);
-        using DestroyFunc = void (*)(void*, void*);
-
        class Tensor;
        class Backend;
    }
@@ -124,19 +120,21 @@ public:
    // \returns a shared pointer to the op if found, else nullptr
    virtual std::shared_ptr<ngraph::Node> get_backend_op(const std::string& op_name, ...);

-    /// \brief method which returns the framework passed memory allocator
-    virtual std::shared_ptr<ngraph::runtime::Allocator> get_framework_memory_allocator();
-
-    /// \brief method for the framework to set its memory allocator object in the backend.
-    /// \param allocator reference to framework memory allocator object
-    virtual void set_framework_memory_allocator(
-        const std::shared_ptr<ngraph::runtime::Allocator>& allocator);
+    /// \brief Returns memory allocator used by backend for host allocations
+    virtual Allocator* get_host_memory_allocator()
+    {
+        return ngraph::runtime::get_ngraph_allocator();
+    };

-    /// \brief method to return memory de-allocator which de-allocates device pinned memory
-    virtual ngraph::runtime::AllocateFunc get_device_memory_alloc();
+    /// \brief Set the host memory allocator to be used by the backend
+    /// \param pointer to host memory allocator object
+    virtual void set_host_memory_allocator(Allocator* allocator){};

-    /// \brief method to return memory allocator which allocates device pinned memory
-    virtual ngraph::runtime::DestroyFunc get_device_memory_dealloc();
+    /// \brief Returns memory allocator used by backend for device allocations
+    virtual Allocator* get_device_memory_allocator()
+    {
+        return ngraph::runtime::get_ngraph_allocator();
+    };

    /// \brief method for each supported backend to determine if the passed pointer is in device pinned memory or not
    /// \param ptr pointer to the memory to determine if its in device memory or not

--- a/src/ngraph/runtime/cpu/cpu_backend.cpp
+++ b/src/ngraph/runtime/cpu/cpu_backend.cpp
@@ -50,18 +50,10 @@ namespace
    } s_cpu_static_init;
 }

-runtime::cpu::CPU_Backend::CPU_Backend()
-{
-    m_allocator = make_shared<ngraph::runtime::Allocator>();
-}
-
-runtime::cpu::CPU_Backend::~CPU_Backend()
-{
-}
 shared_ptr<runtime::cpu::CPU_CallFrame> runtime::cpu::CPU_Backend::make_call_frame(
    const shared_ptr<runtime::cpu::CPU_ExternalFunction>& external_function,
    ngraph::pass::PassConfig& pass_config,
-    std::shared_ptr<ngraph::runtime::Allocator> allocator)
+    Allocator* allocator)
 {
    return external_function->make_call_frame(pass_config, allocator);
 }
@@ -72,17 +64,6 @@ shared_ptr<runtime::Tensor>
    return make_shared<runtime::cpu::CPUTensorView>(element_type, shape, this);
 }

-shared_ptr<ngraph::runtime::Allocator> runtime::cpu::CPU_Backend::get_framework_memory_allocator()
-{
-    return m_allocator;
-}
-
-void runtime::cpu::CPU_Backend::set_framework_memory_allocator(
-    const std::shared_ptr<ngraph::runtime::Allocator>& allocator)
-{
-    m_allocator = allocator;
-}
-
 shared_ptr<runtime::Tensor> runtime::cpu::CPU_Backend::create_tensor(
    const element::Type& element_type, const Shape& shape, void* memory_pointer)
 {
@@ -110,7 +91,7 @@ shared_ptr<runtime::Executable>
    else
    {
        rc = make_shared<CPU_Executable>(
-            func, pass_config, m_allocator, performance_counters_enabled);
+            func, pass_config, get_host_memory_allocator(), performance_counters_enabled);
        m_exec_map.insert({func, rc});
    }
    return rc;
@@ -118,7 +99,7 @@ shared_ptr<runtime::Executable>

 runtime::cpu::CPU_Executable::CPU_Executable(shared_ptr<Function> func,
                                             ngraph::pass::PassConfig& pass_config,
-                                             std::shared_ptr<ngraph::runtime::Allocator> allocator,
+                                             Allocator* allocator,
                                             bool performance_counters_enabled)
 {
    FunctionInstance& instance = m_function_instance;
@@ -167,6 +148,23 @@ void runtime::cpu::CPU_Backend::remove_compiled_function(shared_ptr<Executable>
    }
 }

+runtime::Allocator* runtime::cpu::CPU_Backend::get_host_memory_allocator()
+{
+    if (m_allocator)
+    {
+        return m_allocator;
+    }
+    else
+    {
+        runtime::get_ngraph_allocator();
+    }
+}
+
+void runtime::cpu::CPU_Backend::set_host_memory_allocator(runtime::Allocator* allocator)
+{
+    m_allocator = allocator;
+}
+
 vector<runtime::PerformanceCounter> runtime::cpu::CPU_Executable::get_performance_data() const
 {
    vector<runtime::PerformanceCounter> rc;

--- a/src/ngraph/runtime/cpu/cpu_backend.hpp
+++ b/src/ngraph/runtime/cpu/cpu_backend.hpp
@@ -36,12 +36,10 @@ namespace ngraph
            class CPU_BACKEND_API CPU_Backend : public runtime::Backend
            {
            public:
-                CPU_Backend();
-                ~CPU_Backend();
                std::shared_ptr<CPU_CallFrame>
                    make_call_frame(const std::shared_ptr<CPU_ExternalFunction>& external_function,
                                    ngraph::pass::PassConfig& pass_config,
-                                    std::shared_ptr<ngraph::runtime::Allocator> allocator);
+                                    Allocator* allocator);

                std::shared_ptr<ngraph::runtime::Tensor>
                    create_tensor(const ngraph::element::Type& element_type,
@@ -63,17 +61,16 @@ namespace ngraph

                void remove_compiled_function(std::shared_ptr<Executable> exec) override;

-                std::shared_ptr<ngraph::runtime::Allocator>
-                    get_framework_memory_allocator() override;
-                void set_framework_memory_allocator(
-                    const std::shared_ptr<ngraph::runtime::Allocator>& allocator) override;
+                Allocator* get_host_memory_allocator() override;
+                void set_host_memory_allocator(Allocator* allocator) override;
+
                bool is_supported(const Node& node) const override;
                bool is_supported_property(const Property prop) const override;

            private:
                std::unordered_map<std::shared_ptr<Function>, std::shared_ptr<Executable>>
                    m_exec_map;
-                std::shared_ptr<ngraph::runtime::Allocator> m_allocator = nullptr;
+                Allocator* m_allocator;
            };

            class CPU_BACKEND_API CPU_Executable : public runtime::Executable
@@ -81,7 +78,7 @@ namespace ngraph
            public:
                CPU_Executable(std::shared_ptr<Function> func,
                               ngraph::pass::PassConfig& pass_config,
-                               std::shared_ptr<ngraph::runtime::Allocator> allocator,
+                               Allocator* allocator,
                               bool performance_counters_enabled);
                bool call(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
                          const std::vector<std::shared_ptr<runtime::Tensor>>& inputs) override;

--- a/src/ngraph/runtime/cpu/cpu_call_frame.cpp
+++ b/src/ngraph/runtime/cpu/cpu_call_frame.cpp
@@ -33,7 +33,7 @@ runtime::cpu::CPU_CallFrame::CPU_CallFrame(std::shared_ptr<CPU_ExternalFunction>
                                           InitContextFuncCG compiled_init_ctx_func,
                                           DestroyContextFuncCG compiled_destroy_ctx_func,
                                           EntryPoint compiled_function,
-                                           std::shared_ptr<ngraph::runtime::Allocator> allocator)
+                                           runtime::Allocator* allocator)
    : m_external_function(external_function)
    , m_compiled_init_ctx_func(compiled_init_ctx_func)
    , m_compiled_destroy_ctx_func(compiled_destroy_ctx_func)
@@ -126,8 +126,7 @@ void runtime::cpu::CPU_CallFrame::propagate_layouts(
    }
 }

-void runtime::cpu::CPU_CallFrame::setup_runtime_context(
-    std::shared_ptr<ngraph::runtime::Allocator> allocator)
+void runtime::cpu::CPU_CallFrame::setup_runtime_context(Allocator* allocator)
 {
    ctx = new CPURuntimeContext;
    ctx->pc = 0;

--- a/src/ngraph/runtime/cpu/cpu_call_frame.hpp
+++ b/src/ngraph/runtime/cpu/cpu_call_frame.hpp
@@ -22,6 +22,7 @@
 #include <vector>

 #include "ngraph/function.hpp"
+#include "ngraph/runtime/allocator.hpp"
 #include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
 #include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
 #include "ngraph/runtime/tensor.hpp"
@@ -58,7 +59,7 @@ namespace ngraph
                              InitContextFuncCG compiled_init_ctx_func,
                              DestroyContextFuncCG compiled_destroy_ctx_func,
                              EntryPoint compiled_function,
-                              std::shared_ptr<ngraph::runtime::Allocator> allocator);
+                              runtime::Allocator* allocator);
                ~CPU_CallFrame();

                /// \brief Invoke the function with values matching the signature of the function.
@@ -70,7 +71,7 @@ namespace ngraph
                void propagate_layouts(const std::vector<std::shared_ptr<runtime::Tensor>>& tvs,
                                       const LayoutDescriptorPtrs& layouts) const;

-                void setup_runtime_context(std::shared_ptr<ngraph::runtime::Allocator> allocator);
+                void setup_runtime_context(runtime::Allocator* allocator);
                void setup_cg_runtime_context();
                void cleanup_runtime_context();


--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
@@ -1716,8 +1716,9 @@ void*& runtime::cpu::CPU_ExternalFunction::get_tensor_data(const std::string& na
    }
 }

-shared_ptr<ngraph::runtime::cpu::CPU_CallFrame> runtime::cpu::CPU_ExternalFunction::make_call_frame(
-    ngraph::pass::PassConfig& pass_config, std::shared_ptr<ngraph::runtime::Allocator> allocator)
+shared_ptr<ngraph::runtime::cpu::CPU_CallFrame>
+    runtime::cpu::CPU_ExternalFunction::make_call_frame(ngraph::pass::PassConfig& pass_config,
+                                                        Allocator* allocator)
 {
 #if defined(NGRAPH_DEX_ONLY)
    if (pass_config.get_compilation_mode() == ngraph::pass::CompilationMode::CODEGEN)

--- a/src/ngraph/runtime/cpu/cpu_external_function.hpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.hpp
@@ -100,8 +100,7 @@ namespace ngraph
                                     bool release_function = true);
                ~CPU_ExternalFunction();
                std::shared_ptr<ngraph::runtime::cpu::CPU_CallFrame>
-                    make_call_frame(ngraph::pass::PassConfig& pass_config,
-                                    std::shared_ptr<ngraph::runtime::Allocator> allocator);
+                    make_call_frame(ngraph::pass::PassConfig& pass_config, Allocator* allocator);
                const LayoutDescriptorPtrs& get_parameter_layout_descriptors();
                const LayoutDescriptorPtrs& get_result_layout_descriptors();
                const std::vector<size_t>& get_memory_buffer_sizes() const