[MLIR] Mem manager (#9)

* Implements a simple memory manager that just does malloc for now. Pointers are freed during cleanup. * Enable JIT call-back to memory manager to allocate temps. * Memory manager pointer is passed to the JIT'ed code upon invocation. That makes the code re-entrant from different threads in case the code is shared among identical sub-graphs that are executed in parallel.

[MLIR] Mem manager (#9)
* Implements a simple memory manager that just does malloc for now. Pointers are freed during cleanup. * Enable JIT call-back to memory manager to allocate temps. * Memory manager pointer is passed to the JIT'ed code upon invocation. That makes the code re-entrant from different threads in case the code is shared among identical sub-graphs that are executed in parallel.
e941412e · Nagy Mostafa · nmostafa · ba735a80 · e941412e · e941412e
Commit e941412e authored May 09, 2019 by Nagy Mostafa Committed by nmostafa Jun 02, 2019
12 changed files
--- a/src/ngraph/runtime/cpu/CMakeLists.txt
+++ b/src/ngraph/runtime/cpu/CMakeLists.txt
@@ -22,6 +22,7 @@ set(MLIR_SRC
    mlir/dialect/ops.cpp
    mlir/compiler.cpp
    mlir/lowerer.cpp
+    mlir/memory_manager.cpp
 )
 set(SRC
    cpu_backend.cpp
@@ -222,10 +223,11 @@ if (NGRAPH_CPU_ENABLE)
    add_definitions(${LLVM_DEFINITIONS})
    target_include_directories(cpu_backend PRIVATE ${LLVM_INCLUDE_DIRS})
+    if(DEFINED ENV{MLIR_INCLUDE_PATH})
-    string(REPLACE ":" ";" MLIR_INCLUDE_PATH $ENV{MLIR_INCLUDE_PATH})
+        string(REPLACE ":" ";" MLIR_INCLUDE_PATH $ENV{MLIR_INCLUDE_PATH})
-    message(STATUS "MLIR Headers at : ${MLIR_INCLUDE_PATH}")
+        message(STATUS "MLIR Headers at : ${MLIR_INCLUDE_PATH}")
-    target_include_directories(cpu_backend PRIVATE ${MLIR_INCLUDE_PATH}) 
+        target_include_directories(cpu_backend PRIVATE ${MLIR_INCLUDE_PATH}) 
+    endif()
    llvm_map_components_to_libnames(llvm_libs support core irreader)

--- a/src/ngraph/runtime/cpu/builder/add.cpp
+++ b/src/ngraph/runtime/cpu/builder/add.cpp
@@ -21,6 +21,7 @@
 #include "ngraph/runtime/cpu/kernel/add.hpp"
 #include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
 #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
+#include "ngraph/runtime/cpu/mlir/compiler.hpp"
 using namespace std;
 using namespace ngraph;

--- a/src/ngraph/runtime/cpu/mlir/compiler.cpp
+++ b/src/ngraph/runtime/cpu/mlir/compiler.cpp
@@ -59,12 +59,13 @@ namespace ngraph
        // Register any LLVM command line options
        llvm::cl::ParseEnvironmentOptions("ngraph", "MLIR_LLVM_OPTIONS", "");
    }
    void MLIRCompiler::compile_and_run()
    {
        build_module(); // MLIR gen
        lower_dialect();
        optimize();
-        bind_tensors_to_arguments();
+        bind_arguments();
        execute();
        cleanup();
    }
@@ -114,7 +115,6 @@ namespace ngraph
        }
    }
-    /// Collects input and output tensors to this sub-graph
    void MLIRCompiler::build_tensors_list()
    {
        for (const auto node : m_sub_graph)
@@ -223,7 +223,7 @@ namespace ngraph
    void MLIRCompiler::lower_dialect()
    {
        mlir::PassManager pm;
-        pm.addPass(createDialectLoweringPass());
+        pm.addPass(createDialectLoweringPass(this));
        pm.addPass(mlir::createCanonicalizerPass());
        pm.run(m_module.get());
@@ -314,7 +314,7 @@ namespace ngraph
        m_builder->create<NG_ReturnOp>(mlir::UnknownLoc::get(&m_context), value_list);
    }
-    void MLIRCompiler::bind_tensors_to_arguments()
+    void MLIRCompiler::bind_arguments()
    {
        NGRAPH_ASSERT(m_module && "MLIR module is not ready.");
@@ -326,11 +326,10 @@ namespace ngraph
        // SmallVector<StaticFloatMemref*>. StaticFloatMemref is just a struct with the
        // actual pointer to the data.
-        // TODO (dcab): Only f32 arguments are supported for now. We may want to implement
+        // create MemRef args
-        // this more generically by just allocating a void double pointer.
+        auto expected_arguments = allocate_memref_args(func);
-        auto expected_arguments = allocateMemRefArguments(func);
+        NGRAPH_ASSERT(expected_arguments.size()) << "Arguments can't be created";
-        NGRAPH_ASSERT(expected_arguments) << "Arguments can't be created";
+        m_invoke_args = std::move(expected_arguments);
-        m_invoke_args = std::move(*expected_arguments);
        NGRAPH_ASSERT(m_invoke_args.size() == m_external_tensors.size())
            << "Number of external tensors doesn't match number of function arguments";
@@ -340,6 +339,15 @@ namespace ngraph
        {
            ((mlir::StaticFloatMemRef*)m_invoke_args[i])->data = (float*)m_external_tensors[i];
        }
+        // Add pointer to memory manager
+        // malloc here since that's what allocateMemRefArguments use
+        // TODO (nmostafa): Better way of doing this ? Use builder allocator ?
+        MLIRMemMgr** mem_mgr_arg = reinterpret_cast<MLIRMemMgr**>(malloc(sizeof(void*)));
+        *mem_mgr_arg = &get_mem_mgr();
+        // inserting memory manager ptr in right location ?
+        NGRAPH_ASSERT(m_invoke_args.size() == get_mem_mgr_arg_id(func));
+        m_invoke_args.push_back(static_cast<void*>(mem_mgr_arg));
    }
    void MLIRCompiler::execute()
@@ -356,11 +364,6 @@ namespace ngraph
        llvm::InitializeNativeTarget();
        llvm::InitializeNativeTargetAsmPrinter();
-        // Create an MLIR execution engine. Note that it takes a null pass manager
-        // to make sure it won't run "default" passes on the MLIR that would trigger
-        // a second conversion to LLVM IR.  The execution engine eagerly JIT-compiles
-        // the module.
        // Create an MLIR execution engine. Note that it takes a null pass manager
        // to make sure it won't run "default" passes on the MLIR that would trigger
        // a second conversion to LLVM IR.  The execution engine eagerly JIT-compiles
@@ -389,5 +392,39 @@ namespace ngraph
        // Free MLIR function builder.
        if (m_builder)
            m_builder.reset(nullptr);
+        // Free allocated memory for JIT'ed code temps
+        m_mem_mgr.freeAll();
+    }
+    SmallVector<void*, 8> MLIRCompiler::allocate_memref_args(mlir::Function* func)
+    {
+        SmallVector<void*, 8> args;
+        args.reserve(func->getNumArguments());
+        for (const auto& arg : func->getArguments())
+        {
+            auto descriptor = allocate_memref_descriptor(arg->getType());
+            if (!descriptor)
+                continue;
+            args.push_back(descriptor);
+        }
+        return args;
+    }
+    mlir::StaticFloatMemRef* MLIRCompiler::allocate_memref_descriptor(mlir::Type type)
+    {
+        auto memRefType = type.dyn_cast<mlir::MemRefType>();
+        if (!memRefType)
+            return nullptr;
+        if (memRefType.getNumDynamicDims() != 0)
+            NGRAPH_FAIL();
+        // We only use StaticFloatMemRef because that's what MLIR currently offers.
+        // We should expand this with different types and dynamic MemRefs
+        auto* descriptor =
+            reinterpret_cast<mlir::StaticFloatMemRef*>(malloc(sizeof(mlir::StaticFloatMemRef)));
+        descriptor->data = nullptr;
+        return descriptor;
    }
 }
--- a/src/ngraph/runtime/cpu/mlir/compiler.hpp
+++ b/src/ngraph/runtime/cpu/mlir/compiler.hpp
@@ -15,6 +15,8 @@
 //*****************************************************************************
 #pragma once
+#include "lowerer.hpp"
+#include "memory_manager.hpp"
 #include "ngraph/descriptor/tensor.hpp"
 #include "ngraph/node.hpp"
@@ -29,6 +31,11 @@
 #include <mlir/IR/Types.h>
 #include <mlir/StandardOps/Ops.h>
+namespace mlir
+{
+    struct StaticFloatMemRef;
+}
 namespace ngraph
 {
    namespace runtime
@@ -37,6 +44,9 @@ namespace ngraph
        {
            class MLIRCompiler
            {
+            public:
+                static void init_mlir();
            public:
                using TensorList = std::vector<descriptor::Tensor*>;
                using TypeList = llvm::SmallVector<mlir::Type, 4>;
@@ -48,10 +58,17 @@ namespace ngraph
                {
                }
-                static void init_mlir();
+                /// Compiles and runs a subgraph in MLIR
-                /// Compiles and runs a subgraph in MLIR.
                void compile_and_run();
+                /// Returns the memory manager used by this sub-graph compiler
+                MLIRMemMgr& get_mem_mgr() { return m_mem_mgr; }
+                /// Returns memory manager pointer argument ID in call interface
+                unsigned get_mem_mgr_arg_id(mlir::Function* func)
+                {
+                    return func->getNumArguments() - 1;
+                }
            private:
                struct TensorInfo
                {
@@ -63,15 +80,17 @@ namespace ngraph
                void build_module();
                void lower_dialect();
                void optimize();
-                void bind_tensors_to_arguments();
+                void bind_arguments();
                void execute();
                void cleanup();
+                /// Collects input and output tensors to this sub-graph
                void build_tensors_list();
                mlir::Type get_mlir_type(const descriptor::Tensor* tensor);
                mlir::Type get_mlir_type(const element::Type& type);
                TensorInfo get_tensor_value(descriptor::Tensor* tensor);
                void update_tensor_value(descriptor::Tensor* tensor, mlir::Value* value);
                void build_ng_dialect();
                template <typename OP>
@@ -85,6 +104,12 @@ namespace ngraph
                mlir::Value* create_binary_op(const ngraph::Node* ng_node);
                void create_return();
+                /// Helper to create memref arguments for MLIR function signature
+                llvm::SmallVector<void*, 8> allocate_memref_args(mlir::Function* func);
+                /// Helper to allocate a mem ref object. Handles static shapes only for now.
+                mlir::StaticFloatMemRef* allocate_memref_descriptor(mlir::Type type);
            private:
                mlir::MLIRContext m_context;
                std::unique_ptr<mlir::Module> m_module;
@@ -107,6 +132,9 @@ namespace ngraph
                // List of input and output tensors in the graph
                TensorList m_ip_tensors, m_op_tensors;
                static const MLIRCompOpMap op_dispatcher;
+                // Memory manager for temp allocations inside JIT'ed code
+                MLIRMemMgr m_mem_mgr;
            };
        }
    }

--- a/src/ngraph/runtime/cpu/mlir/dialect/dialect.cpp
+++ b/src/ngraph/runtime/cpu/mlir/dialect/dialect.cpp
@@ -31,7 +31,7 @@ namespace ngraph
        addOperations<NG_AddOp>();
        addOperations<NG_MatmulBiasOp>();
        addOperations<NG_ReturnOp>();
-        addOperations<NG_FakeOutput>();
+        addOperations<NG_FakeInput>();
    }
    void NGDialect::printType(mlir::Type type, raw_ostream& os) const

--- a/src/ngraph/runtime/cpu/mlir/dialect/ops.cpp
+++ b/src/ngraph/runtime/cpu/mlir/dialect/ops.cpp
@@ -69,14 +69,14 @@ namespace ngraph
        }
    }
-    void runtime::cpu::NG_FakeOutput::build(mlir::Builder* builder,
+    void runtime::cpu::NG_FakeInput::build(mlir::Builder* builder,
-                                            mlir::OperationState* state,
+                                           mlir::OperationState* state,
-                                            mlir::Type resultType)
+                                           mlir::Type resultType)
    {
        state->types.push_back(std::move(resultType));
    }
-    mlir::LogicalResult runtime::cpu::NG_FakeOutput::verify()
+    mlir::LogicalResult runtime::cpu::NG_FakeInput::verify()
    {
        // TODO: Verify returned tensor types must match function return type.
        return mlir::success();

--- a/src/ngraph/runtime/cpu/mlir/dialect/ops.hpp
+++ b/src/ngraph/runtime/cpu/mlir/dialect/ops.hpp
@@ -29,10 +29,15 @@ namespace ngraph
        namespace cpu
        {
            // Fake instructions
-            class NG_FakeOutput : public mlir::Op<NG_FakeOutput,
-                                                  mlir::OpTrait::NOperands<0>::Impl,
+            /// Fake Input
-                                                  mlir::OpTrait::OneResult,
+            /// Used as fake definitions during dialect conversion.
-                                                  mlir::OpTrait::HasNoSideEffect>
+            /// Used when we cannot insert the real definition once during lowering.
+            /// The are cleaned up after dialect lowering and replaced with real defintion.
+            class NG_FakeInput : public mlir::Op<NG_FakeInput,
+                                                 mlir::OpTrait::NOperands<0>::Impl,
+                                                 mlir::OpTrait::OneResult,
+                                                 mlir::OpTrait::HasNoSideEffect>
            {
            public:
                static llvm::StringRef getOperationName() { return "ng.fake.output"; }

--- a/src/ngraph/runtime/cpu/mlir/dialect/type.hpp
+++ b/src/ngraph/runtime/cpu/mlir/dialect/type.hpp
@@ -94,6 +94,20 @@ namespace ngraph
                EltType getElementType() const { return getImpl()->getElementType(); }
                Shape getShape() const { return getImpl()->getShape(); }
                int getRank() { return getShape().size(); }
+                size_t getSizeInBytes()
+                {
+                    size_t s = 1;
+                    auto shape = getShape();
+                    for (auto i = 0; i < getRank(); i++)
+                    {
+                        // no dynamic dims
+                        if (shape[i] == -1)
+                            return -1;
+                        s *= shape[i];
+                    }
+                    // Multiply times element size
+                    return s * llvm::divideCeil(getElementType().getIntOrFloatBitWidth(), 8);
+                }
                /// convert to memref native MLIR type. Used for lowering.
                mlir::MemRefType toMemref();
                /// create a unique tensor type based on element type and shape.

--- a/src/ngraph/runtime/cpu/mlir/lowerer.cpp
+++ b/src/ngraph/runtime/cpu/mlir/lowerer.cpp
--- a/src/ngraph/runtime/cpu/mlir/lowerer.hpp
+++ b/src/ngraph/runtime/cpu/mlir/lowerer.hpp
@@ -25,7 +25,9 @@ namespace ngraph
    {
        namespace cpu
        {
-            mlir::Pass* createDialectLoweringPass();
+            class MLIRCompiler;
+            mlir::Pass* createDialectLoweringPass(MLIRCompiler* compiler);
        }
    }
 }
--- a/src/ngraph/runtime/cpu/mlir/memory_manager.cpp
+++ b/src/ngraph/runtime/cpu/mlir/memory_manager.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include "memory_manager.hpp"
+#include <llvm/ADT/STLExtras.h>
+#include <memory>
+#include "compiler.hpp"
+#include "ngraph/runtime/cpu/cpu_backend_visibility.h"
+using namespace ngraph::runtime::cpu;
+/// Call back to allocate memory for temps from JIT'ed code
+extern "C" CPU_BACKEND_API void* __mlir_allocate(MLIRMemMgr* mem_mgr, size_t size)
+{
+    return mem_mgr->allocate(size);
+}
+void* MLIRMemMgr::allocate(size_t size)
+{
+    void* ptr = malloc(size);
+    ptrList.push_back(ptr);
+    return ptr;
+}
+void MLIRMemMgr::freeAll()
+{
+    for (auto p : ptrList)
+    {
+        free(p);
+    }
+}
--- a/src/ngraph/runtime/cpu/mlir/memory_manager.hpp
+++ b/src/ngraph/runtime/cpu/mlir/memory_manager.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+#include <stdint.h>
+#include <vector>
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            /// Memory manager for temporaries in MLIR compiled sub-graph
+            /// It handles call-backs from the code and returns pointer to allocated memory
+            /// Also, handles freeing up memory
+            class MLIRMemMgr
+            {
+            public:
+                /// Allocates data for temporary tensor. Currently, it is called for each
+                /// temp tensor defintion. Keeps track of each pointer and free them during cleanup.
+                // TODO: Use pre-allocation from framework memory manager
+                void* allocate(size_t size);
+                /// Frees all allocated pointers
+                void freeAll();
+            private:
+                std::vector<void*> ptrList;
+            };
+        }
+    }
+}