Merge branch 'master' into leona/doc_igpu

f946f097 · Scott Cyphers · GitHub · 63d0fd90 · 005ba206 · f946f097
Unverified Commit f946f097 authored Jul 03, 2019 by Scott Cyphers Committed by GitHub Jul 03, 2019
15 changed files
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -458,6 +458,8 @@ set (SRC
    rank.hpp
    runtime/aligned_buffer.cpp
    runtime/aligned_buffer.hpp
+    runtime/allocator.cpp
+    runtime/allocator.hpp
    runtime/backend.cpp
    runtime/backend.hpp
    runtime/backend_manager.cpp

--- a/src/ngraph/distributed.cpp
+++ b/src/ngraph/distributed.cpp
@@ -46,7 +46,7 @@ static std::unique_ptr<DistributedInterface> s_distributed_interface;

 void ngraph::set_distributed_interface(std::unique_ptr<DistributedInterface> distributed_interface)
 {
-    NGRAPH_DEBUG << "Setting distributed interfsce to: " << distributed_interface->get_name();
+    NGRAPH_DEBUG << "Setting distributed interface to: " << distributed_interface->get_name();
    s_distributed_interface = std::move(distributed_interface);
 }


--- a/src/ngraph/runtime/aligned_buffer.cpp
+++ b/src/ngraph/runtime/aligned_buffer.cpp
@@ -18,22 +18,34 @@
 #include <memory>

 #include "ngraph/runtime/aligned_buffer.hpp"
+#include "ngraph/runtime/allocator.hpp"
 #include "ngraph/util.hpp"

 using namespace ngraph;
+using namespace std;

 runtime::AlignedBuffer::AlignedBuffer()
-    : m_allocated_buffer(nullptr)
+    : m_allocator(nullptr)
+    , m_allocated_buffer(nullptr)
    , m_aligned_buffer(nullptr)
    , m_byte_size(0)
 {
 }

-runtime::AlignedBuffer::AlignedBuffer(size_t byte_size, size_t alignment)
+runtime::AlignedBuffer::AlignedBuffer(size_t byte_size, size_t alignment, Allocator* allocator)
+    : m_allocator(allocator)
+    , m_byte_size(byte_size)
 {
    m_byte_size = std::max<size_t>(1, byte_size);
    size_t allocation_size = m_byte_size + alignment;
-    m_allocated_buffer = static_cast<char*>(ngraph_malloc(allocation_size));
+    if (allocator)
+    {
+        m_allocated_buffer = static_cast<char*>(m_allocator->malloc(allocation_size, alignment));
+    }
+    else
+    {
+        m_allocated_buffer = static_cast<char*>(malloc(allocation_size));
+    }
    m_aligned_buffer = m_allocated_buffer;
    size_t mod = size_t(m_aligned_buffer) % alignment;

@@ -44,10 +56,12 @@ runtime::AlignedBuffer::AlignedBuffer(size_t byte_size, size_t alignment)
 }

 runtime::AlignedBuffer::AlignedBuffer(AlignedBuffer&& other)
-    : m_allocated_buffer(other.m_allocated_buffer)
+    : m_allocator(other.m_allocator)
+    , m_allocated_buffer(other.m_allocated_buffer)
    , m_aligned_buffer(other.m_aligned_buffer)
    , m_byte_size(other.m_byte_size)
 {
+    other.m_allocator = nullptr;
    other.m_allocated_buffer = nullptr;
    other.m_aligned_buffer = nullptr;
    other.m_byte_size = 0;
@@ -57,7 +71,14 @@ runtime::AlignedBuffer::~AlignedBuffer()
 {
    if (m_allocated_buffer != nullptr)
    {
-        ngraph_free(m_allocated_buffer);
+        if (m_allocator)
+        {
+            m_allocator->free(m_allocated_buffer);
+        }
+        else
+        {
+            free(m_allocated_buffer);
+        }
    }
 }

@@ -65,9 +86,11 @@ runtime::AlignedBuffer& runtime::AlignedBuffer::operator=(AlignedBuffer&& other)
 {
    if (this != &other)
    {
+        m_allocator = other.m_allocator;
        m_allocated_buffer = other.m_allocated_buffer;
        m_aligned_buffer = other.m_aligned_buffer;
        m_byte_size = other.m_byte_size;
+        other.m_allocator = nullptr;
        other.m_allocated_buffer = nullptr;
        other.m_aligned_buffer = nullptr;
        other.m_byte_size = 0;

--- a/src/ngraph/runtime/aligned_buffer.hpp
+++ b/src/ngraph/runtime/aligned_buffer.hpp
@@ -18,6 +18,8 @@

 #include <cstddef>

+#include "ngraph/runtime/allocator.hpp"
+
 namespace ngraph
 {
    namespace runtime
@@ -32,7 +34,11 @@ namespace ngraph
 class ngraph::runtime::AlignedBuffer
 {
 public:
-    AlignedBuffer(size_t byte_size, size_t alignment);
+    // Allocator objects and the allocation interfaces are owned by the
+    // creators of AlignedBuffers. They need to ensure that the lifetime of
+    // allocator exceeds the lifetime of this AlignedBuffer.
+    AlignedBuffer(size_t byte_size, size_t alignment, Allocator* allocator = nullptr);
+
    AlignedBuffer();
    ~AlignedBuffer();

@@ -46,6 +52,7 @@ private:
    AlignedBuffer(const AlignedBuffer&) = delete;
    AlignedBuffer& operator=(const AlignedBuffer&) = delete;

+    Allocator* m_allocator;
    char* m_allocated_buffer;
    char* m_aligned_buffer;
    size_t m_byte_size;

--- a/src/ngraph/runtime/allocator.cpp
+++ b/src/ngraph/runtime/allocator.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/runtime/allocator.hpp"
+
+ngraph::runtime::Allocator::~Allocator()
+{
+}
+
+class ngraph::runtime::DefaultAllocator : public ngraph::runtime::Allocator
+{
+public:
+    void* malloc(size_t size, size_t alignment)
+    {
+        // If allocation succeeds, returns a pointer to the lowest (first) byte in the
+        // allocated memory block that is suitably aligned for any scalar type.
+        // TODO(pruthvi): replace std::malloc with custom aligned_alloc implementation
+        // which is portable and work on all alignment requirement.
+        void* ptr = std::malloc(size);
+
+        // check for exception
+        if (!ptr)
+        {
+            throw ngraph::ngraph_error("malloc failed to allocate memory of size " +
+                                       std::to_string(size));
+        }
+        return ptr;
+    }
+
+    void free(void* ptr)
+    {
+        if (ptr)
+        {
+            std::free(ptr);
+        }
+    }
+};
+
+std::unique_ptr<ngraph::runtime::Allocator> ngraph::runtime::create_default_allocator()
+{
+    return std::unique_ptr<DefaultAllocator>(new DefaultAllocator());
+}
--- a/src/ngraph/runtime/allocator.hpp
+++ b/src/ngraph/runtime/allocator.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include "ngraph/except.hpp"
+#include "ngraph/util.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        class Allocator;
+        class DefaultAllocator;
+        /// \brief Create a default allocator that calls into system
+        ///        allocation libraries
+        std::unique_ptr<Allocator> create_default_allocator();
+    }
+}
+
+/// \brief Abstract class for the allocator
+class ngraph::runtime::Allocator
+{
+public:
+    virtual ~Allocator() = 0;
+    /// \brief allocates memory with the given size and alignment requirement
+    /// \param size exact size of bytes to allocate
+    /// \param alignment specifies the alignment. Must be a valid alignment supported by the implementation.
+    virtual void* malloc(size_t size, size_t alignment) = 0;
+
+    /// \brief deallocates the memory pointed by ptr
+    /// \param ptr pointer to the aligned memory to be released
+    virtual void free(void* ptr) = 0;
+};
--- a/src/ngraph/runtime/backend.cpp
+++ b/src/ngraph/runtime/backend.cpp
@@ -86,6 +86,13 @@ void runtime::Backend::remove_compiled_function(std::shared_ptr<Executable> exec
 {
 }

+bool runtime::Backend::is_device_memory(void* ptr)
+{
+    // override this method for each supported backend to determine if the passed pointer is in
+    // device pinned memory or not
+    return false;
+}
+
 std::shared_ptr<runtime::Executable> runtime::Backend::load(istream& input_stream)
 {
    throw runtime_error("load opertion unimplemented.");

--- a/src/ngraph/runtime/backend.hpp
+++ b/src/ngraph/runtime/backend.hpp
@@ -20,10 +20,12 @@

 #include "ngraph/function.hpp"
 #include "ngraph/pass/pass_config.hpp"
+#include "ngraph/runtime/allocator.hpp"
 #include "ngraph/runtime/executable.hpp"
 #include "ngraph/runtime/performance_counter.hpp"
 #include "ngraph/shape.hpp"
 #include "ngraph/type/element_type.hpp"
+#include "ngraph/util.hpp"

 namespace ngraph
 {
@@ -140,6 +142,23 @@ public:
    /// \returns a shared pointer to the op if found, else nullptr
    virtual std::shared_ptr<ngraph::Node> get_backend_op(const std::string& op_name, ...);

+    /// \brief Returns memory allocator used by backend for host allocations
+    virtual Allocator* get_host_memory_allocator() { return nullptr; }
+    /// \brief Set the host memory allocator to be used by the backend
+    /// \param allocator is pointer to host memory allocator object
+    virtual void set_host_memory_allocator(std::unique_ptr<Allocator> allocator) {}
+    /// \brief Returns memory allocator used by backend for device allocations
+    virtual Allocator* get_device_memory_allocator()
+    {
+        // override this method from each supported backend to return
+        // its own device memory allocator
+        return nullptr;
+    }
+
+    /// \brief method for each supported backend to determine if the passed pointer is in device pinned memory or not
+    /// \param ptr pointer to the memory to determine if its in device memory or not
+    virtual bool is_device_memory(void* ptr);
+
    /// \brief Allows sending backend specific configuration. The map contains key, value pairs
    ///     specific to a particluar backend. The definition of these key, value pairs is
    ///     defined by each backend.

--- a/src/ngraph/runtime/cpu/cpu_backend.cpp
+++ b/src/ngraph/runtime/cpu/cpu_backend.cpp
@@ -63,11 +63,17 @@ namespace
    } s_cpu_static_init;
 }

+runtime::cpu::CPU_Backend::~CPU_Backend()
+{
+    m_exec_map.clear();
+}
+
 shared_ptr<runtime::cpu::CPU_CallFrame> runtime::cpu::CPU_Backend::make_call_frame(
    const shared_ptr<runtime::cpu::CPU_ExternalFunction>& external_function,
-    ngraph::pass::PassConfig& pass_config)
+    ngraph::pass::PassConfig& pass_config,
+    Allocator* allocator)
 {
-    return external_function->make_call_frame(pass_config);
+    return external_function->make_call_frame(pass_config, allocator);
 }

 shared_ptr<runtime::Tensor>
@@ -114,7 +120,8 @@ shared_ptr<runtime::Executable>
            return rc;
        }
    }
-    rc = make_shared<CPU_Executable>(func, pass_config, performance_counters_enabled);
+    rc = make_shared<CPU_Executable>(
+        func, pass_config, get_host_memory_allocator(), performance_counters_enabled);
    {
        std::lock_guard<std::mutex> guard(m_exec_map_mutex);
        m_exec_map.insert({func, rc});
@@ -124,6 +131,7 @@ shared_ptr<runtime::Executable>

 runtime::cpu::CPU_Executable::CPU_Executable(shared_ptr<Function> func,
                                             ngraph::pass::PassConfig& pass_config,
+                                             Allocator* allocator,
                                             bool performance_counters_enabled)
 {
    FunctionInstance& instance = m_function_instance;
@@ -131,7 +139,7 @@ runtime::cpu::CPU_Executable::CPU_Executable(shared_ptr<Function> func,
    {
        instance.m_external_function = make_shared<CPU_ExternalFunction>(func);
        instance.m_external_function->m_emit_timing = performance_counters_enabled;
-        auto cf = instance.m_external_function->make_call_frame(pass_config);
+        auto cf = instance.m_external_function->make_call_frame(pass_config, allocator);
        instance.m_call_frame = dynamic_pointer_cast<CPU_CallFrame>(cf);
    }
    set_parameters_and_results(*func);
@@ -173,6 +181,28 @@ void runtime::cpu::CPU_Backend::remove_compiled_function(shared_ptr<Executable>
    }
 }

+runtime::Allocator* runtime::cpu::CPU_Backend::get_host_memory_allocator()
+{
+    if (!m_allocator)
+    {
+        m_allocator = create_default_allocator();
+    }
+    return m_allocator.get();
+}
+
+void runtime::cpu::CPU_Backend::set_host_memory_allocator(
+    std::unique_ptr<runtime::Allocator> allocator)
+{
+    if (m_allocator)
+    {
+        // Resources allocated with the existing allocator might still be around and expect it
+        // to be available for freeing. We cannot switch to the new allocator
+        throw ngraph_error(
+            "Allocator already exists. Changing allocators mid-execution is not permitted.");
+    }
+    m_allocator = std::move(allocator);
+}
+
 vector<runtime::PerformanceCounter> runtime::cpu::CPU_Executable::get_performance_data() const
 {
    vector<runtime::PerformanceCounter> rc;
@@ -190,6 +220,7 @@ bool runtime::cpu::CPU_Backend::is_supported(const Node& op) const
 {
    return true;
 }
+
 bool runtime::cpu::CPU_Backend::is_supported_property(const Property prop) const
 {
    if (prop == Property::memory_attach)

--- a/src/ngraph/runtime/cpu/cpu_backend.hpp
+++ b/src/ngraph/runtime/cpu/cpu_backend.hpp
@@ -22,6 +22,7 @@

 #include "cpu_backend_visibility.h"
 #include "ngraph/pass/pass_config.hpp"
+#include "ngraph/runtime/allocator.hpp"
 #include "ngraph/runtime/backend.hpp"

 namespace ngraph
@@ -36,9 +37,12 @@ namespace ngraph
            class CPU_BACKEND_API CPU_Backend : public runtime::Backend
            {
            public:
+                ~CPU_Backend() override;
+
                std::shared_ptr<CPU_CallFrame>
                    make_call_frame(const std::shared_ptr<CPU_ExternalFunction>& external_function,
-                                    ngraph::pass::PassConfig& pass_config);
+                                    ngraph::pass::PassConfig& pass_config,
+                                    Allocator* allocator);

                std::shared_ptr<ngraph::runtime::Tensor>
                    create_tensor(const ngraph::element::Type& element_type,
@@ -60,6 +64,9 @@ namespace ngraph

                void remove_compiled_function(std::shared_ptr<Executable> exec) override;

+                Allocator* get_host_memory_allocator() override;
+                void set_host_memory_allocator(std::unique_ptr<Allocator> allocator) override;
+
                bool is_supported(const Node& node) const override;
                bool is_supported_property(const Property prop) const override;

@@ -69,6 +76,7 @@ namespace ngraph
                std::mutex m_exec_map_mutex;
                std::unordered_map<std::shared_ptr<Function>, std::shared_ptr<Executable>>
                    m_exec_map;
+                std::unique_ptr<Allocator> m_allocator;
            };

            class CPU_BACKEND_API CPU_Executable : public runtime::Executable
@@ -76,6 +84,7 @@ namespace ngraph
            public:
                CPU_Executable(std::shared_ptr<Function> func,
                               ngraph::pass::PassConfig& pass_config,
+                               Allocator* allocator,
                               bool performance_counters_enabled);
                bool call(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
                          const std::vector<std::shared_ptr<runtime::Tensor>>& inputs) override;

--- a/src/ngraph/runtime/cpu/cpu_call_frame.cpp
+++ b/src/ngraph/runtime/cpu/cpu_call_frame.cpp
@@ -30,7 +30,8 @@ using namespace ngraph;
 runtime::cpu::CPU_CallFrame::CPU_CallFrame(std::shared_ptr<CPU_ExternalFunction> external_function,
                                           InitContextFuncCG compiled_init_ctx_func,
                                           DestroyContextFuncCG compiled_destroy_ctx_func,
-                                           EntryPoint compiled_function)
+                                           EntryPoint compiled_function,
+                                           runtime::Allocator* allocator)
    : m_external_function(external_function)
    , m_compiled_init_ctx_func(compiled_init_ctx_func)
    , m_compiled_destroy_ctx_func(compiled_destroy_ctx_func)
@@ -47,7 +48,7 @@ runtime::cpu::CPU_CallFrame::CPU_CallFrame(std::shared_ptr<CPU_ExternalFunction>
            std::to_string(std::thread::hardware_concurrency()) + "]");
    }

-    setup_runtime_context();
+    setup_runtime_context(allocator);
    if (!m_external_function->is_direct_execution())
    {
        // Invoke codegen runtime context initialization function.
@@ -178,7 +179,7 @@ void runtime::cpu::CPU_CallFrame::propagate_layouts(
    }
 }

-void runtime::cpu::CPU_CallFrame::setup_runtime_context()
+void runtime::cpu::CPU_CallFrame::setup_runtime_context(Allocator* allocator)
 {
    for (auto i = 0; i < m_num_ctx; i++)
    {
@@ -202,7 +203,7 @@ void runtime::cpu::CPU_CallFrame::setup_runtime_context()
        size_t alignment = runtime::cpu::CPU_ExternalFunction::s_memory_pool_alignment;
        for (auto buffer_size : m_external_function->get_memory_buffer_sizes())
        {
-            auto buffer = new AlignedBuffer(buffer_size, alignment);
+            auto buffer = new AlignedBuffer(buffer_size, alignment, allocator);
            ctx->memory_buffers.push_back(buffer);
        }
        const auto& mkldnn_emitter = m_external_function->get_mkldnn_emitter();

--- a/src/ngraph/runtime/cpu/cpu_call_frame.hpp
+++ b/src/ngraph/runtime/cpu/cpu_call_frame.hpp
@@ -24,6 +24,7 @@
 #include <vector>

 #include "ngraph/function.hpp"
+#include "ngraph/runtime/allocator.hpp"
 #include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
 #include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
 #include "ngraph/runtime/tensor.hpp"
@@ -59,7 +60,8 @@ namespace ngraph
                CPU_CallFrame(std::shared_ptr<CPU_ExternalFunction> external_function,
                              InitContextFuncCG compiled_init_ctx_func,
                              DestroyContextFuncCG compiled_destroy_ctx_func,
-                              EntryPoint compiled_function);
+                              EntryPoint compiled_function,
+                              runtime::Allocator* allocator);
                ~CPU_CallFrame();

                /// \brief Invoke the function with values matching the signature of the function.
@@ -71,7 +73,7 @@ namespace ngraph
                void propagate_layouts(const std::vector<std::shared_ptr<runtime::Tensor>>& tvs,
                                       const LayoutDescriptorPtrs& layouts) const;

-                void setup_runtime_context();
+                void setup_runtime_context(runtime::Allocator* allocator);
                void setup_cg_runtime_context();
                void cleanup_runtime_context();


--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
@@ -1825,7 +1825,8 @@ bool runtime::cpu::CPU_ExternalFunction::is_codegen(const ngraph::pass::PassConf
 }

 shared_ptr<ngraph::runtime::cpu::CPU_CallFrame>
-    runtime::cpu::CPU_ExternalFunction::make_call_frame(ngraph::pass::PassConfig& pass_config)
+    runtime::cpu::CPU_ExternalFunction::make_call_frame(ngraph::pass::PassConfig& pass_config,
+                                                        Allocator* allocator)
 {
 #if defined(NGRAPH_DEX_ONLY)
    if (is_codegen(pass_config))
@@ -1853,7 +1854,8 @@ shared_ptr<ngraph::runtime::cpu::CPU_CallFrame>
    return make_shared<ngraph::runtime::cpu::CPU_CallFrame>(shared_from_this(),
                                                            m_compiled_init_ctx_func,
                                                            m_compiled_destroy_ctx_func,
-                                                            m_compiled_function);
+                                                            m_compiled_function,
+                                                            allocator);
 }

 const runtime::cpu::LayoutDescriptorPtrs&

--- a/src/ngraph/runtime/cpu/cpu_external_function.hpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.hpp
@@ -100,8 +100,7 @@ namespace ngraph
                                     bool release_function = true);
                ~CPU_ExternalFunction();
                std::shared_ptr<ngraph::runtime::cpu::CPU_CallFrame>
-                    make_call_frame(ngraph::pass::PassConfig& pass_config);
-
+                    make_call_frame(ngraph::pass::PassConfig& pass_config, Allocator* allocator);
                const LayoutDescriptorPtrs& get_parameter_layout_descriptors();
                const LayoutDescriptorPtrs& get_result_layout_descriptors();
                const std::vector<size_t>& get_memory_buffer_sizes() const

--- a/src/ngraph/runtime/cpu/cpu_runtime_context.hpp
+++ b/src/ngraph/runtime/cpu/cpu_runtime_context.hpp
@@ -37,7 +37,6 @@ namespace ngraph
    {
        class AlignedBuffer;
    }
-
    class State;
 }