Commit f143bb13 authored by Nagy Mostafa's avatar Nagy Mostafa Committed by Scott Cyphers

[MLIR] MLIR Compiler refactoring (#3786)

* Re-organize files. Create MLIR backend classes

* WIP

* Refactored. Code compiles

* Moved context to Runtime class to outlive compilation and execution

* style-apply

* Base Runtime class. Few other modifications

* Minor fixes

* Fixed Runtime::run() to take type-erased pointer

* renamed core compiler

* rename backend compiler

* rename runtime compiler

* PR feedback

* Fix build fails
parent bb9b5be6
...@@ -14,13 +14,97 @@ ...@@ -14,13 +14,97 @@
# limitations under the License. # limitations under the License.
# ****************************************************************************** # ******************************************************************************
# Common MLIR/LLVM setup for the sub-directories below
include_directories( include_directories(
${NGRAPH_INCLUDE_PATH} ${NGRAPH_INCLUDE_PATH}
${MLIR_LLVM_INCLUDE_PATH} ${MLIR_LLVM_INCLUDE_PATH}
${MLIR_INCLUDE_PATHS} ${MLIR_INCLUDE_PATHS}
) )
add_subdirectory(compiler)
add_subdirectory(tools/ngraph-opt) add_subdirectory(tools/ngraph-opt)
set(SRC
backend/cpu/cpu_backend.cpp
backend/pass/affine_lowerer.cpp
backend/pass/memory_optimization.cpp
core/compiler.cpp
core/ngraph_dialect/dialect.cpp
core/ngraph_dialect/type.cpp
core/ngraph_dialect/ops.cpp
core/pass/mlir_subgraph_extraction.cpp
core/pass/mlir_subgraph_extraction.hpp
runtime/cpu/memory_manager.cpp
runtime/cpu/cpu_runtime.cpp
utils.cpp
)
add_library(mlir_backend SHARED ${SRC})
llvm_map_components_to_libnames(llvm_libs support core irreader)
# Link MLIR libs
target_link_libraries(
mlir_backend PRIVATE
MLIRAnalysis
MLIREDSC
MLIRExecutionEngine
MLIRIR
MLIRLLVMIR
MLIRStandardToLLVM
MLIRParser
MLIRPass
MLIRTargetLLVMIR
MLIRTransforms
MLIRSupport
)
# some libs need whole archive linkage because of Globals static initialization
function(whole_archive_link target)
if("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
set(link_flags "-Llib -Wl,-all_load ")
FOREACH(LIB ${ARGN})
string(CONCAT link_flags ${link_flags} "${LIB}")
ENDFOREACH(LIB)
else()
set(link_flags "-Llib -Wl,--whole-archive,")
FOREACH(LIB ${ARGN})
string(CONCAT link_flags ${link_flags} "${LIB},")
ENDFOREACH(LIB)
string(CONCAT link_flags ${link_flags} "--no-whole-archive")
endif()
message(STATUS "MLIR Ops link flag: ${link_flags}" )
set_target_properties(${target} PROPERTIES LINK_FLAGS ${link_flags})
endfunction(whole_archive_link)
whole_archive_link(mlir_backend
${LLVM_BUILD_LIBRARY_DIR}/libMLIRAffineOps.a
${LLVM_BUILD_LIBRARY_DIR}/libMLIRStandardOps.a
)
# Link LLVM libs
target_link_libraries(
mlir_backend PRIVATE
${llvm_libs}
)
# Link ngraph
target_link_libraries(mlir_backend PUBLIC ngraph)
# table-gen dialect ops
# include table-gen helpers
include(${LLVM_DIR}/TableGen.cmake)
function(ngraph_tablegen ofn)
tablegen(MLIR ${ARGV} "-I${MLIR_SRC_INCLUDE_PATH}" "-I${MLIR_BIN_INCLUDE_PATH}")
set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} ${CMAKE_CURRENT_BINARY_DIR}/${ofn} PARENT_SCOPE)
endfunction()
set(MLIR_TABLEGEN_EXE mlir-tblgen)
set(LLVM_TARGET_DEFINITIONS core/ngraph_dialect/ops.td)
ngraph_tablegen(ops.h.inc -gen-op-decls)
ngraph_tablegen(ops.cpp.inc -gen-op-defs)
add_public_tablegen_target(ngraph_ops_gen)
add_dependencies(mlir_backend ngraph_ops_gen)
target_include_directories(mlir_backend PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
install(TARGETS mlir_backend DESTINATION ${NGRAPH_INSTALL_LIB})
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
// NOTE: This file follows nGraph format style.
// Follows nGraph naming convention for public APIs only, else MLIR naming convention.
#pragma once
#include <memory>
#include <mlir/IR/Module.h>
namespace ngraph
{
namespace runtime
{
namespace ngmlir
{
class MLIRBackend
{
public:
MLIRBackend(mlir::OwningModuleRef& module, mlir::MLIRContext& context)
: m_module(std::move(module))
, m_context(context)
{
}
MLIRBackend(mlir::ModuleOp& moduleOp, mlir::MLIRContext& context)
: m_module(moduleOp)
, m_context(context)
{
}
/// Generate code for the module
virtual void codegen() = 0;
mlir::OwningModuleRef& get_module() { return m_module; }
protected:
mlir::OwningModuleRef m_module;
mlir::MLIRContext& m_context;
};
}
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
// NOTE: This file follows nGraph format style.
// Follows nGraph naming convention for public APIs only, else MLIR naming convention.
#include "cpu_backend.hpp"
#include "contrib/mlir/backend/pass/affine_lowerer.hpp"
#include "contrib/mlir/backend/pass/memory_optimization.hpp"
#include "contrib/mlir/utils.hpp"
#include "ngraph/check.hpp"
#include <llvm/ADT/STLExtras.h>
#include <llvm/Analysis/TargetTransformInfo.h>
#include <llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h>
#include <llvm/IR/Module.h>
#include <llvm/Support/SourceMgr.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Target/TargetMachine.h>
#include <mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h>
#include <mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h>
#include <mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h>
#include <mlir/Dialect/LLVMIR/LLVMDialect.h>
#include <mlir/Pass/PassManager.h>
#include <mlir/Target/LLVMIR.h>
#include <mlir/Transforms/DialectConversion.h>
#include <mlir/Transforms/Passes.h>
#define DEBUG_TYPE "mlir-cpu-backend"
// *** Optimization flags ***
static llvm::cl::opt<bool> clEnableNgInPlaceMemoryOpt(
"ng-inplace-mem-opt",
llvm::cl::init(false),
llvm::cl::desc("Enable ngraph dialect in-place memory optimization pass"));
static llvm::cl::opt<bool>
clEnableAffineLoopFusion("ngraph-affine-loop-fusion",
llvm::cl::init(false),
llvm::cl::desc("Enable loop fusion optimization in Affine dialect"));
static llvm::cl::opt<bool>
clEnableAffineLoopTiling("ngraph-affine-loop-tile",
llvm::cl::init(false),
llvm::cl::desc("Enable loop tiling optimization in Affine dialect"));
static llvm::cl::opt<unsigned>
clLoopTilingCacheLevel("ngraph-affine-loop-tile-cache-level",
llvm::cl::init(2),
llvm::cl::desc("Cache level to which to apply affine loop tiling."));
static llvm::cl::opt<unsigned> clLoopTilingCacheSize(
"ngraph-affine-loop-tile-cache-size",
llvm::cl::init(0),
llvm::cl::desc(
"Cache size to use in affine loop tiling. If not zero, it overrides the cache-size "
"inferred from the host CPU using for the cache level specified by "
"-ngraph-loop-tile-cache-level."));
using namespace ngraph::runtime::ngmlir;
// Default optimization level.
llvm::CodeGenOpt::Level MLIRCPUBackend::mlirOptLevel = llvm::CodeGenOpt::Level::Aggressive;
std::unique_ptr<llvm::TargetMachine> MLIRCPUBackend::targetMachine;
bool MLIRCPUBackend::initialized = false;
/// Creates target machine for current host.
static llvm::Expected<std::unique_ptr<llvm::TargetMachine>>
createDefaultTargetMachine(unsigned optLevel)
{
auto machineBuilder = llvm::orc::JITTargetMachineBuilder::detectHost();
if (!machineBuilder)
{
return machineBuilder.takeError();
}
// Relocation model and code model are kept to default values. CodeGen optimization level
// matches LLVM recommendations, i.e.:
// enum Level {
// None, // -O0
// Less, // -O1
// Default, // -O2, -Os
// Aggressive // -O3
// };
machineBuilder->setCodeGenOptLevel((llvm::CodeGenOpt::Level)optLevel);
return machineBuilder->createTargetMachine();
}
/// Returns the cache level size from `targetInfo` for the `cacheLevel` provided. If `userCacheSize`
/// is not zero, it returns `userCacheSize`.
static unsigned getCacheLevelSize(llvm::TargetTransformInfo& targetInfo,
unsigned cacheLevel,
unsigned userCacheSize)
{
if (userCacheSize)
{
return userCacheSize;
}
llvm::Optional<unsigned> optCacheLevelSize;
switch (cacheLevel)
{
case 1:
optCacheLevelSize = targetInfo.getCacheSize(llvm::TargetTransformInfo::CacheLevel::L1D);
break;
case 2:
optCacheLevelSize = targetInfo.getCacheSize(llvm::TargetTransformInfo::CacheLevel::L2D);
break;
default:
NGRAPH_UNREACHABLE("Unsupported cache level: ", cacheLevel, ". Only 1 and 2 are supported");
}
NGRAPH_CHECK(optCacheLevelSize.hasValue() && "Cache level size is not available in TTI");
return optCacheLevelSize.getValue();
}
void MLIRCPUBackend::init()
{
// Mutex to safely initialize CPU backend
static std::mutex mlirInitMutex;
std::unique_lock<std::mutex> lock(mlirInitMutex);
if (!initialized)
{
// Override default optimization level with macro value.
if (char* optLevelStr = std::getenv("NGRAPH_MLIR_OPT_LEVEL"))
{
unsigned clOptLevel = std::stoi(optLevelStr);
NGRAPH_CHECK(clOptLevel >= 0 && clOptLevel <= 3, "Invalid optimization level");
mlirOptLevel = (llvm::CodeGenOpt::Level)clOptLevel;
}
// Initialize LLVM targets and target machine for current host.
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
auto expectedTargetMachine = createDefaultTargetMachine(mlirOptLevel);
NGRAPH_CHECK(expectedTargetMachine, "Invalid target machine");
targetMachine = std::move(*expectedTargetMachine);
initialized = true;
}
}
void MLIRCPUBackend::codegen()
{
optimizeNgDialect();
lowerNgDialect();
}
void MLIRCPUBackend::lowerNgDialect()
{
// Lower NG dialect to Affine
mlir::PassManager pm(&m_context);
pm.addPass(mlir::createDialectLoweringPass());
pm.addPass(mlir::createCanonicalizerPass());
// Apply any generic pass manager command line options.
mlir::applyPassManagerCLOptions(pm);
if (failed(pm.run(m_module.get())))
{
NGRAPH_CHECK(false, "MLIR pass manager failed");
}
if (failed(m_module->verify()))
{
NGRAPH_CHECK(false, "Incorrect module after dialect lowering");
}
optimizeAffineDialect();
NGRAPH_CHECK(m_module, "MLIR module is not ready.");
// Lower Standard dialect to LLVM dialect.
mlir::LLVMTypeConverter llvmConverter(&m_context);
mlir::OwningRewritePatternList patterns;
mlir::populateLoopToStdConversionPatterns(patterns, &m_context);
mlir::populateStdToLLVMConversionPatterns(llvmConverter, patterns);
mlir::ConversionTarget target(m_context);
target.addLegalDialect<mlir::LLVM::LLVMDialect>();
target.addLegalOp<mlir::ModuleOp, mlir::ModuleTerminatorOp>();
target.addDynamicallyLegalOp<mlir::FuncOp>(
[&](mlir::FuncOp op) { return llvmConverter.isSignatureLegal(op.getType()); });
auto result =
mlir::applyFullConversion(m_module.get(), target, std::move(patterns), &llvmConverter);
NGRAPH_CHECK(succeeded(result), "Standard to LLVM dialect conversion failed");
dumpMlirModule("LLVM-IR Dialect Conversion", m_module.get());
}
// Receives affine dialect as input and applies affine and standard dialect based optimizations.
// Lowering from affine dialect to standard dialect happens along the way. Output consists of
// standard dialect only ops.
void MLIRCPUBackend::optimizeAffineDialect()
{
// Create target transform info to obtain some target information to be used in MLIR
// optimizations. This is a temporary attempt to retrieve some target information by reusing
// LLVM TTI infra while MLIR does not have target model.
llvm::LLVMContext llvmContext;
auto module = std::unique_ptr<llvm::Module>(new llvm::Module("test", llvmContext));
module->setDataLayout(targetMachine->createDataLayout());
auto ttiSetupFunc = llvm::cast<llvm::Function>(
module
->getOrInsertFunction("__ngraph_tti_setup",
llvm::FunctionType::get(llvm::Type::getVoidTy(llvmContext), {}))
.getCallee());
auto targetInfo = targetMachine->getTargetTransformInfo(*ttiSetupFunc);
// Populate pass manager with affine dialect optimizations.
mlir::PassManager pm(&m_context);
if (clEnableAffineLoopFusion)
{
pm.addPass(mlir::createLoopFusionPass());
}
if (clEnableAffineLoopTiling)
{
unsigned cacheLevelSize =
getCacheLevelSize(targetInfo, clLoopTilingCacheLevel, clLoopTilingCacheSize);
LLVM_DEBUG(llvm::dbgs() << "Enabling Affine Loop Tiling for cache level "
<< clLoopTilingCacheLevel
<< ": "
<< cacheLevelSize
<< " bytes.\n");
pm.addPass(mlir::createLoopTilingPass(cacheLevelSize));
}
// Populate pass manager with affine dialect to Std dialect conversion.
pm.addPass(mlir::createLowerAffinePass());
// Apply any generic pass manager command line options.
mlir::applyPassManagerCLOptions(pm);
// Run pass manager passes.
auto result = pm.run(m_module.get());
NGRAPH_CHECK(succeeded(result), "Affine optimizaitons and convertion to Std dialect failed");
// Run Std dialect optimizations.
// TODO
}
void MLIRCPUBackend::optimizeNgDialect()
{
mlir::PassManager pm(&m_context);
mlir::applyPassManagerCLOptions(pm);
if (clEnableNgInPlaceMemoryOpt)
{
pm.addPass(mlir::createMemoryOptimizationPass());
}
if (failed(pm.run(m_module.get())))
{
NGRAPH_CHECK(false, "MLIR pass manager failed");
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
// NOTE: This file follows nGraph format style.
// Follows nGraph naming convention for public APIs only, else MLIR naming convention.
#pragma once
#include <memory>
#include "contrib/mlir/backend/backend.hpp"
#include "llvm/Support/CodeGen.h"
#include "ngraph/check.hpp"
namespace llvm
{
class TargetMachine;
}
namespace ngraph
{
namespace runtime
{
namespace ngmlir
{
class MLIRCPUBackend : public MLIRBackend
{
public:
/// Global Initialization for all CPU backends
static void init();
MLIRCPUBackend(mlir::OwningModuleRef& module, mlir::MLIRContext& context)
: MLIRBackend(module, context)
{
NGRAPH_CHECK(initialized,
"Cannot instantiate CPU MLIR backend without initialization");
}
MLIRCPUBackend(mlir::ModuleOp& moduleOp, mlir::MLIRContext& context)
: MLIRBackend(moduleOp, context)
{
NGRAPH_CHECK(initialized,
"Cannot instantiate CPU MLIR backend without initialization");
}
// codegen LLVM dialect from nGraph dialect applying CPU backend optimization passes
void codegen() override;
private:
// Apply CPU specific optimizations at nGraph dialect level
void optimizeNgDialect();
// Lowers nGraph dialect all the way to LLVM module.
void lowerNgDialect();
// Apply affine dialect optimizations
void optimizeAffineDialect();
public:
// JIT optimization level
static llvm::CodeGenOpt::Level mlirOptLevel;
// LLVM target machine to be used by this MLIR compiler instance to retrieve
// information about target features.
// TODO: Note that, unfortunatelly, MLIR/OrcJIT execution engine creates its own
// target machine for compilation internally. This target machine is for non-JIT
// related stuff. We should change OrcJIT API so that we can pass an external target
// machine or configuration flags.
// TODO: Move target machine to external nGraph backend when multiple backends start
// to use MLIR.
static std::unique_ptr<llvm::TargetMachine> targetMachine;
// Global initialization done for CPU backend
static bool initialized;
};
}
}
}
...@@ -17,10 +17,10 @@ ...@@ -17,10 +17,10 @@
// NOTE: This file follows nGraph format style and MLIR naming convention since it does // NOTE: This file follows nGraph format style and MLIR naming convention since it does
// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API. // not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
#include "lowerer.hpp" #include "affine_lowerer.hpp"
#include "dialect/ops.hpp" #include "contrib/mlir/core/ngraph_dialect/ops.hpp"
#include "dialect/type.hpp" #include "contrib/mlir/core/ngraph_dialect/type.hpp"
#include "ngraph/assertion.hpp" #include "ngraph/assertion.hpp"
#include <llvm/ADT/DenseSet.h> #include <llvm/ADT/DenseSet.h>
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#pragma once #pragma once
#include "contrib/mlir/compiler/compiler.hpp" #include "contrib/mlir/core/compiler.hpp"
#include <mlir/Pass/Pass.h> #include <mlir/Pass/Pass.h>
......
...@@ -17,9 +17,9 @@ ...@@ -17,9 +17,9 @@
// NOTE: This file follows nGraph format style and MLIR naming convention since it does // NOTE: This file follows nGraph format style and MLIR naming convention since it does
// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API. // not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
#include "contrib/mlir/compiler/compiler.hpp" #include "contrib/mlir/core/compiler.hpp"
#include "contrib/mlir/compiler/dialect/ops.hpp" #include "contrib/mlir/core/ngraph_dialect/ops.hpp"
#include "contrib/mlir/compiler/dialect/type.hpp" #include "contrib/mlir/core/ngraph_dialect/type.hpp"
#include "ngraph/assertion.hpp" #include "ngraph/assertion.hpp"
...@@ -53,7 +53,7 @@ namespace ...@@ -53,7 +53,7 @@ namespace
{ {
m_inplaceOps = { m_inplaceOps = {
#define MLIR_OP(OP, INPLACE) {OP::getOperationName().str(), INPLACE}, #define MLIR_OP(OP, INPLACE) {OP::getOperationName().str(), INPLACE},
#include "contrib/mlir/compiler/op_lowerers.inc" #include "contrib/mlir/backend/pass/op_lowerers.inc"
}; };
} }
void runOnFunction() override; void runOnFunction() override;
......
# ******************************************************************************
# Copyright 2017-2019 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
set(SRC
dialect/dialect.cpp
dialect/type.cpp
dialect/ops.cpp
compiler.cpp
lowerer.cpp
memory_manager.cpp
pass/mlir_subgraph_extraction.cpp
pass/mlir_subgraph_extraction.hpp
pass/memory_optimization.cpp
tools.cpp
)
add_library(mlir_backend SHARED ${SRC})
llvm_map_components_to_libnames(llvm_libs support core irreader)
# Link MLIR libs
target_link_libraries(
mlir_backend PRIVATE
MLIRAnalysis
MLIREDSC
MLIRExecutionEngine
MLIRIR
MLIRLLVMIR
MLIRStandardToLLVM
MLIRParser
MLIRPass
MLIRTargetLLVMIR
MLIRTransforms
MLIRSupport
)
# some libs need whole archive linkage because of Globals static initialization
function(whole_archive_link target)
if("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
set(link_flags "-Llib -Wl,-all_load ")
FOREACH(LIB ${ARGN})
string(CONCAT link_flags ${link_flags} "${LIB}")
ENDFOREACH(LIB)
else()
set(link_flags "-Llib -Wl,--whole-archive,")
FOREACH(LIB ${ARGN})
string(CONCAT link_flags ${link_flags} "${LIB},")
ENDFOREACH(LIB)
string(CONCAT link_flags ${link_flags} "--no-whole-archive")
endif()
message(STATUS "MLIR Ops link flag: ${link_flags}" )
set_target_properties(${target} PROPERTIES LINK_FLAGS ${link_flags})
endfunction(whole_archive_link)
whole_archive_link(mlir_backend
${LLVM_BUILD_LIBRARY_DIR}/libMLIRAffineOps.a
${LLVM_BUILD_LIBRARY_DIR}/libMLIRStandardOps.a
)
# Link LLVM libs
target_link_libraries(
mlir_backend PRIVATE
${llvm_libs}
)
# Link ngraph
target_link_libraries(mlir_backend PUBLIC ngraph)
# table-gen dialect ops
# include table-gen helpers
include(${LLVM_DIR}/TableGen.cmake)
function(ngraph_tablegen ofn)
tablegen(MLIR ${ARGV} "-I${MLIR_SRC_INCLUDE_PATH}" "-I${MLIR_BIN_INCLUDE_PATH}")
set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} ${CMAKE_CURRENT_BINARY_DIR}/${ofn} PARENT_SCOPE)
endfunction()
set(MLIR_TABLEGEN_EXE mlir-tblgen)
set(LLVM_TARGET_DEFINITIONS dialect/ops.td)
ngraph_tablegen(ops.h.inc -gen-op-decls)
ngraph_tablegen(ops.cpp.inc -gen-op-defs)
add_public_tablegen_target(ngraph_ops_gen)
add_dependencies(mlir_backend ngraph_ops_gen)
target_include_directories(mlir_backend PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
install(TARGETS mlir_backend DESTINATION ${NGRAPH_INSTALL_LIB})
...@@ -19,10 +19,10 @@ ...@@ -19,10 +19,10 @@
#include "compiler.hpp" #include "compiler.hpp"
#include "dialect/dialect.hpp" #include "ngraph_dialect/dialect.hpp"
#include "dialect/ops.hpp" #include "ngraph_dialect/ops.hpp"
#include "dialect/type.hpp" #include "ngraph_dialect/type.hpp"
#include "lowerer.hpp"
#include "ngraph/check.hpp" #include "ngraph/check.hpp"
#include "ngraph/descriptor/tensor.hpp" #include "ngraph/descriptor/tensor.hpp"
#include "ngraph/graph_util.hpp" #include "ngraph/graph_util.hpp"
...@@ -46,8 +46,8 @@ ...@@ -46,8 +46,8 @@
#include "ngraph/op/subtract.hpp" #include "ngraph/op/subtract.hpp"
#include "ngraph/op/util/index_reduction.hpp" #include "ngraph/op/util/index_reduction.hpp"
#include "ngraph/type/element_type.hpp" #include "ngraph/type/element_type.hpp"
#include "pass/memory_optimization.hpp"
#include "tools.hpp" #include "contrib/mlir/utils.hpp"
#include <llvm/ADT/STLExtras.h> #include <llvm/ADT/STLExtras.h>
#include <llvm/Analysis/TargetTransformInfo.h> #include <llvm/Analysis/TargetTransformInfo.h>
...@@ -83,92 +83,15 @@ using llvm::ArrayRef; ...@@ -83,92 +83,15 @@ using llvm::ArrayRef;
using namespace ngraph; using namespace ngraph;
using namespace ngraph::runtime::ngmlir; using namespace ngraph::runtime::ngmlir;
// *** Debug flags ***
static llvm::cl::opt<bool> clPrintIRAfterAll(
"ngraph-print-ir-after-all",
llvm::cl::init(false),
llvm::cl::desc(
"Print IR after transformation that are not implemented as passes in the MLIRCompiler. It "
"complements MLIR -print-ir-after-all and LLVM -print-after-all flags"));
// *** Optimization flags ***
static llvm::cl::opt<bool> clEnableNgInPlaceMemoryOpt(
"ng-inplace-mem-opt",
llvm::cl::init(false),
llvm::cl::desc("Enable ngraph dialect in-place memory optimization pass"));
static llvm::cl::opt<bool>
clEnableAffineLoopFusion("ngraph-affine-loop-fusion",
llvm::cl::init(false),
llvm::cl::desc("Enable loop fusion optimization in Affine dialect"));
static llvm::cl::opt<bool>
clEnableAffineLoopTiling("ngraph-affine-loop-tile",
llvm::cl::init(false),
llvm::cl::desc("Enable loop tiling optimization in Affine dialect"));
static llvm::cl::opt<unsigned>
clLoopTilingCacheLevel("ngraph-affine-loop-tile-cache-level",
llvm::cl::init(2),
llvm::cl::desc("Cache level to which to apply affine loop tiling."));
static llvm::cl::opt<unsigned> clLoopTilingCacheSize(
"ngraph-affine-loop-tile-cache-size",
llvm::cl::init(0),
llvm::cl::desc(
"Cache size to use in affine loop tiling. If not zero, it overrides the cache-size "
"inferred from the host CPU using for the cache level specified by "
"-ngraph-loop-tile-cache-level."));
// *** Debug flags ***
static llvm::cl::opt<bool>
clDumpObjectFile("ngraph-dump-mlir-object-file",
llvm::cl::desc("Dump MLIR JITted-compiled object to file specified with "
"-object-filename (<input file>.o by default)."));
static llvm::cl::opt<std::string>
clObjectFilename("ngraph-mlir-object-filename",
llvm::cl::desc("Dump MLIR JITted-compiled object to file jitted_mlir.o"));
#define COMPILE_OP_DECL(op_name) \ #define COMPILE_OP_DECL(op_name) \
createOp<op_name>(MLIRCompiler & compiler, const ngraph::Node* ngNode) createOp<op_name>(MLIRCompiler & compiler, const ngraph::Node* ngNode)
// Default optimization level. bool MLIRCompiler::initialized = false;
llvm::CodeGenOpt::Level MLIRCompiler::mlirOptLevel = llvm::CodeGenOpt::Level::Aggressive;
// Target machine will be properly initialized by `init_mlir`.
std::unique_ptr<llvm::TargetMachine> MLIRCompiler::targetMachine;
/// Creates target machine for current host. void MLIRCompiler::init()
static llvm::Expected<std::unique_ptr<llvm::TargetMachine>>
createDefaultTargetMachine(unsigned optLevel)
{
auto machineBuilder = llvm::orc::JITTargetMachineBuilder::detectHost();
if (!machineBuilder)
{
return machineBuilder.takeError();
}
// Relocation model and code model are kept to default values. CodeGen optimization level
// matches LLVM recommendations, i.e.:
// enum Level {
// None, // -O0
// Less, // -O1
// Default, // -O2, -Os
// Aggressive // -O3
// };
machineBuilder->setCodeGenOptLevel((llvm::CodeGenOpt::Level)optLevel);
return machineBuilder->createTargetMachine();
}
void MLIRCompiler::init_mlir()
{ {
// Mutex to safely initialize MLIR. // Mutex to safely initialize MLIR.
static std::mutex mlirInitMutex; static std::mutex mlirInitMutex;
static bool initialized = false;
std::unique_lock<std::mutex> lock(mlirInitMutex); std::unique_lock<std::mutex> lock(mlirInitMutex);
...@@ -181,21 +104,6 @@ void MLIRCompiler::init_mlir() ...@@ -181,21 +104,6 @@ void MLIRCompiler::init_mlir()
mlir::registerPassManagerCLOptions(); mlir::registerPassManagerCLOptions();
llvm::cl::ParseEnvironmentOptions("ngraph", "NGRAPH_MLIR_OPTIONS", ""); llvm::cl::ParseEnvironmentOptions("ngraph", "NGRAPH_MLIR_OPTIONS", "");
// Override default optimization level with macro value.
if (char* optLevelStr = std::getenv("NGRAPH_MLIR_OPT_LEVEL"))
{
unsigned clOptLevel = std::stoi(optLevelStr);
NGRAPH_CHECK(clOptLevel >= 0 && clOptLevel <= 3, "Invalid optimization level");
mlirOptLevel = (llvm::CodeGenOpt::Level)clOptLevel;
}
// Initialize LLVM targets and target machine for current host.
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
auto expectedTargetMachine = createDefaultTargetMachine(mlirOptLevel);
NGRAPH_CHECK(expectedTargetMachine, "Invalid target machine");
targetMachine = std::move(*expectedTargetMachine);
initialized = true; initialized = true;
} }
} }
...@@ -203,15 +111,11 @@ void MLIRCompiler::init_mlir() ...@@ -203,15 +111,11 @@ void MLIRCompiler::init_mlir()
void MLIRCompiler::compile() void MLIRCompiler::compile()
{ {
buildNgDialectModule(); buildNgDialectModule();
optimizeNgDialect(); // Free MLIR function builder.
lowerNgDialect(); if (m_builder)
} {
m_builder.reset(nullptr);
void MLIRCompiler::run(std::vector<void*>& externalTensors) }
{
bindArguments(externalTensors);
execute();
cleanup();
} }
// Creates an MLIR module and function with nGraph dialect ops from the input CompiledKernel. // Creates an MLIR module and function with nGraph dialect ops from the input CompiledKernel.
...@@ -261,7 +165,7 @@ void MLIRCompiler::buildNgDialectModule() ...@@ -261,7 +165,7 @@ void MLIRCompiler::buildNgDialectModule()
NGRAPH_CHECK(false, "Invalid module after lowering to NG dialect"); NGRAPH_CHECK(false, "Invalid module after lowering to NG dialect");
} }
dumpMlirModule("nGraph Dialect Construction"); dumpMlirModule("nGraph Dialect Construction", m_module.get());
} }
template <typename T> template <typename T>
...@@ -349,136 +253,6 @@ MLIRCompiler::TensorInfo MLIRCompiler::getTensorValue(descriptor::Tensor* tensor ...@@ -349,136 +253,6 @@ MLIRCompiler::TensorInfo MLIRCompiler::getTensorValue(descriptor::Tensor* tensor
return it->second; return it->second;
} }
// Lowers nGraph dialect all the way to LLVM module.
void MLIRCompiler::lowerNgDialect()
{
// Lower NG dialect to Affine
mlir::PassManager pm(&m_context);
pm.addPass(mlir::createDialectLoweringPass());
pm.addPass(mlir::createCanonicalizerPass());
// Apply any generic pass manager command line options.
mlir::applyPassManagerCLOptions(pm);
if (failed(pm.run(m_module.get())))
{
NGRAPH_CHECK(false, "MLIR pass manager failed");
}
if (failed(m_module->verify()))
{
NGRAPH_CHECK(false, "Incorrect module after dialect lowering");
}
optimize();
NGRAPH_CHECK(m_module, "MLIR module is not ready.");
// Lower Standard dialect to LLVM dialect.
mlir::LLVMTypeConverter llvmConverter(&m_context);
mlir::OwningRewritePatternList patterns;
mlir::populateLoopToStdConversionPatterns(patterns, &m_context);
mlir::populateStdToLLVMConversionPatterns(llvmConverter, patterns);
mlir::ConversionTarget target(m_context);
target.addLegalDialect<mlir::LLVM::LLVMDialect>();
target.addLegalOp<mlir::ModuleOp, mlir::ModuleTerminatorOp>();
target.addDynamicallyLegalOp<mlir::FuncOp>(
[&](mlir::FuncOp op) { return llvmConverter.isSignatureLegal(op.getType()); });
auto result = applyFullConversion(*m_module, target, std::move(patterns), &llvmConverter);
NGRAPH_CHECK(succeeded(result), "Standard to LLVM dialect conversion failed");
dumpMlirModule("LLVM-IR Dialect Conversion");
// Create an MLIR execution engine. We use a null MLIR pass manager for now to make sure we
// don't run MLIR passes that were already run. We also pass a default transformer created with
// the default or user-provided optimization level.
auto llvmTransformer =
mlir::makeOptimizingTransformer(mlirOptLevel, /*sizeLevel=*/0, targetMachine.get());
auto maybeEngine = mlir::ExecutionEngine::create(m_module.get(), llvmTransformer, mlirOptLevel);
NGRAPH_CHECK(maybeEngine, "failed to construct an execution engine");
m_engine = std::move(maybeEngine.get());
}
/// Returns the cache level size from `targetInfo` for the `cacheLevel` provided. If `userCacheSize`
/// is not zero, it returns `userCacheSize`.
static unsigned getCacheLevelSize(llvm::TargetTransformInfo& targetInfo,
unsigned cacheLevel,
unsigned userCacheSize)
{
if (userCacheSize)
{
return userCacheSize;
}
llvm::Optional<unsigned> optCacheLevelSize;
switch (cacheLevel)
{
case 1:
optCacheLevelSize = targetInfo.getCacheSize(llvm::TargetTransformInfo::CacheLevel::L1D);
break;
case 2:
optCacheLevelSize = targetInfo.getCacheSize(llvm::TargetTransformInfo::CacheLevel::L2D);
break;
default:
NGRAPH_UNREACHABLE("Unsupported cache level: ", cacheLevel, ". Only 1 and 2 are supported");
}
NGRAPH_CHECK(optCacheLevelSize.hasValue() && "Cache level size is not available in TTI");
return optCacheLevelSize.getValue();
}
// Receives affine dialect as input and applies affine and standard dialect based optimizations.
// Lowering from affine dialect to standard dialect happens along the way. Output consists of
// standard dialect only ops.
void MLIRCompiler::optimize()
{
// Create target transform info to obtain some target information to be used in MLIR
// optimizations. This is a temporary attempt to retrieve some target information by reusing
// LLVM TTI infra while MLIR does not have target model.
llvm::LLVMContext llvmContext;
auto module = std::unique_ptr<llvm::Module>(new llvm::Module("test", llvmContext));
module->setDataLayout(targetMachine->createDataLayout());
auto ttiSetupFunc = llvm::cast<llvm::Function>(
module
->getOrInsertFunction("__ngraph_tti_setup",
llvm::FunctionType::get(llvm::Type::getVoidTy(llvmContext), {}))
.getCallee());
auto targetInfo = targetMachine->getTargetTransformInfo(*ttiSetupFunc);
// Populate pass manager with affine dialect optimizations.
mlir::PassManager pm(&m_context);
if (clEnableAffineLoopFusion)
{
pm.addPass(mlir::createLoopFusionPass());
}
if (clEnableAffineLoopTiling)
{
unsigned cacheLevelSize =
getCacheLevelSize(targetInfo, clLoopTilingCacheLevel, clLoopTilingCacheSize);
LLVM_DEBUG(llvm::dbgs() << "Enabling Affine Loop Tiling for cache level "
<< clLoopTilingCacheLevel
<< ": "
<< cacheLevelSize
<< " bytes.\n");
pm.addPass(mlir::createLoopTilingPass(cacheLevelSize));
}
// Populate pass manager with affine dialect to Std dialect conversion.
pm.addPass(mlir::createLowerAffinePass());
// Apply any generic pass manager command line options.
mlir::applyPassManagerCLOptions(pm);
// Run pass manager passes.
auto result = pm.run(m_module.get());
NGRAPH_CHECK(succeeded(result), "Affine optimizaitons and convertion to Std dialect failed");
// Run Std dialect optimizations.
// TODO
}
// MLIR builders // MLIR builders
#define TI(x) std::type_index(typeid(x)) #define TI(x) std::type_index(typeid(x))
...@@ -703,130 +477,3 @@ mlir::Operation* MLIRCompiler::createIndexReduction(const ngraph::Node* ngNode) ...@@ -703,130 +477,3 @@ mlir::Operation* MLIRCompiler::createIndexReduction(const ngraph::Node* ngNode)
op->setAttr("axes", redAxesAttr); op->setAttr("axes", redAxesAttr);
return op; return op;
} }
void MLIRCompiler::optimizeNgDialect()
{
mlir::PassManager pm(&m_context);
mlir::applyPassManagerCLOptions(pm);
if (clEnableNgInPlaceMemoryOpt)
{
pm.addPass(mlir::createMemoryOptimizationPass());
}
if (failed(pm.run(m_module.get())))
{
NGRAPH_CHECK(false, "MLIR pass manager failed");
}
}
// Binds MLIR function arguments to the proper values. This includes externally allocated tensors
// helpers to be used inside the function.
void MLIRCompiler::bindArguments(std::vector<void*>& externalTensors)
{
NGRAPH_CHECK(m_module, "MLIR module is not ready.");
mlir::FuncOp func = m_module->lookupSymbol<mlir::FuncOp>("main");
NGRAPH_CHECK(func && !func.getBlocks().empty(), "Function not found");
// Set external arguments
NGRAPH_CHECK(m_compiledKernel, "No compiled kernel set for compiler");
NGRAPH_CHECK((m_compiledKernel->get_arguments().size() +
m_compiledKernel->get_kernel_outputs().size()) == externalTensors.size(),
"Number of arguments and outputs doesn't match number of tensors");
m_externalTensors = &externalTensors;
// Create list with a type-erased double pointer for each invocation arguments.
// We currently use 'allocateMemrefArgs', which creates the arguments list per call ABI (see
// comment below).
// StaticFloatMemref is just a struct with the actual pointer to the data.
auto expectedArguments = allocateMemrefArgs();
NGRAPH_CHECK(expectedArguments.size(), "Arguments can't be created");
m_invokeArgs = std::move(expectedArguments);
NGRAPH_CHECK(m_invokeArgs.size() == m_externalTensors->size(),
"Number of external tensors doesn't match number of function arguments");
// Assign external tensor pointers to invocation arguments.
for (size_t i = 0, numArgs = m_invokeArgs.size(); i < numArgs; ++i)
{
auto* memRefArg = *(reinterpret_cast<mlir::StaticFloatMemRef**>(m_invokeArgs[i]));
memRefArg->data = reinterpret_cast<float*>((*m_externalTensors)[i]);
}
}
// Lowers standard dialect to LLVM dialect and uses the MLIR execution engine to execute the code.
void MLIRCompiler::execute()
{
// Invoke the JIT-compiled function with the arguments. Note that, for API
// uniformity reasons, it takes a list of type-erased pointers to arguments.
// Please, note that 'invoke' method is overloaded with a parameter pack version.
// Make sure the MutableArrayRef version is invoked.
auto invocationResult = m_engine->invoke("main", llvm::MutableArrayRef<void*>(m_invokeArgs));
if (clDumpObjectFile)
{
m_engine->dumpToObjectFile(clObjectFilename.empty() ? "jitted_mlir.o"
: clObjectFilename.getValue());
}
NGRAPH_CHECK(!invocationResult, "JIT invocation of 'main' failed\n");
}
void MLIRCompiler::cleanup()
{
// Free void double pointer arguments without freeing external tensor data.
for (auto* arg : m_invokeArgs)
{
auto* memRefArg = *(reinterpret_cast<mlir::StaticFloatMemRef**>(arg));
free(memRefArg);
free(arg);
}
// Free MLIR function builder.
if (m_builder)
{
m_builder.reset(nullptr);
}
}
// The current call ABI takes a single arg pointer (argPtr) pointing to a list of args.
// Each arg is a pointer to a StaticFloatMemRef which contains a data pointer
//
// The args are laid out as follows
// argPtr-> arg[0]-> StaticFloatMemRef -> <data>
// arg[1]-> StaticFloatMemRef -> <data>
// ...
SmallVector<void*, 8> MLIRCompiler::allocateMemrefArgs()
{
SmallVector<void*, 8> args;
for (auto i = 0; i < m_externalTensors->size(); i++)
{
auto descriptor = allocateMemrefDescriptor();
mlir::StaticFloatMemRef** arg =
reinterpret_cast<mlir::StaticFloatMemRef**>(malloc(sizeof(mlir::StaticFloatMemRef*)));
*arg = descriptor;
args.push_back(arg);
}
return args;
}
mlir::StaticFloatMemRef* MLIRCompiler::allocateMemrefDescriptor()
{
// We only use StaticFloatMemRef because that's what MLIR currently offers.
// We should expand this with different types and dynamic MemRefs
auto* descriptor =
reinterpret_cast<mlir::StaticFloatMemRef*>(malloc(sizeof(mlir::StaticFloatMemRef)));
NGRAPH_CHECK(descriptor != nullptr, "NULL MemRef descriptor");
descriptor->data = nullptr;
return descriptor;
}
void MLIRCompiler::dumpMlirModule(const std::string msg)
{
if (clPrintIRAfterAll)
{
llvm::dbgs() << "*** IR Dump After " << msg << " ***\n";
m_module->dump();
llvm::dbgs() << "\n\n";
}
}
...@@ -19,10 +19,11 @@ ...@@ -19,10 +19,11 @@
#pragma once #pragma once
#include "memory_manager.hpp" #include "contrib/mlir/runtime/cpu/memory_manager.hpp"
#include "ngraph/check.hpp"
#include "ngraph/descriptor/tensor.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include <mlir/ExecutionEngine/ExecutionEngine.h>
#include <mlir/ExecutionEngine/MemRefUtils.h> #include <mlir/ExecutionEngine/MemRefUtils.h>
#include <mlir/IR/Builders.h> #include <mlir/IR/Builders.h>
#include <mlir/IR/Module.h> #include <mlir/IR/Module.h>
...@@ -32,11 +33,6 @@ ...@@ -32,11 +33,6 @@
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
namespace llvm
{
class TargetMachine;
}
namespace ngraph namespace ngraph
{ {
namespace descriptor namespace descriptor
...@@ -55,30 +51,34 @@ namespace ngraph ...@@ -55,30 +51,34 @@ namespace ngraph
{ {
namespace ngmlir namespace ngmlir
{ {
/// This class is the entry point to MLIR from nGraph. It drives the conversion of /// MLIR Compiler. Given an nGraph sub-graph, represented as CompiledKernel node, it
/// nGraph sub-graphs, represented with CompiledKernel nodes, to MLIR nGraph dialect /// translates the graph down to nGraph dialect and applies core optimizations.
/// and its lowering, optimization and execution using LLVM-based MLIR execution engine. ///
/// The compiler owns the MLIR module until compilation is done. After that,
/// the module can be grabbed and plugged into MLIR backends.
class MLIRCompiler class MLIRCompiler
{ {
public: public:
/// Initializes MLIR environment. It must be called only once per execution. /// Initializes MLIR environment. It must be called only once.
static void init_mlir(); static void init();
public: public:
using TensorList = std::vector<descriptor::Tensor*>; using TensorList = std::vector<descriptor::Tensor*>;
using TypeList = llvm::SmallVector<mlir::Type, 4>; using TypeList = llvm::SmallVector<mlir::Type, 4>;
MLIRCompiler(const ngraph::op::CompiledKernel* compiled_kernel) MLIRCompiler(const ngraph::op::CompiledKernel* compiled_kernel,
mlir::MLIRContext& context)
: m_compiledKernel(compiled_kernel) : m_compiledKernel(compiled_kernel)
, m_context(context)
{ {
NGRAPH_CHECK(initialized,
"Cannot instantiate a compiler without initializing MLIR");
} }
/// Compiles a subgraph with MLIR /// Compiles a subgraph with MLIR
void compile(); void compile();
/// Executes a pre-compiled subgraph mlir::OwningModuleRef& get_module() { return m_module; }
void run(std::vector<void*>& externalTensors);
private: private:
struct TensorInfo struct TensorInfo
{ {
...@@ -87,13 +87,11 @@ namespace ngraph ...@@ -87,13 +87,11 @@ namespace ngraph
}; };
private: private:
// Converts an nGraph sub-graph to MLIR nGraph dialect.
void buildNgDialectModule(); void buildNgDialectModule();
void lowerNgDialect(); void buildNgDialect();
void optimizeNgDialect(); // Applies any nGraph dialect optimizations
void optimize(); void optimizeNgDialect() { /*TODO: Add Core NG dialect optimizations */}
void bindArguments(std::vector<void*>& externalTensors);
void execute();
void cleanup();
mlir::Type getMlirType(const descriptor::Tensor* tensor); mlir::Type getMlirType(const descriptor::Tensor* tensor);
mlir::Type getMlirType(const element::Type& type); mlir::Type getMlirType(const element::Type& type);
...@@ -102,8 +100,6 @@ namespace ngraph ...@@ -102,8 +100,6 @@ namespace ngraph
TensorInfo getTensorValue(descriptor::Tensor* tensor); TensorInfo getTensorValue(descriptor::Tensor* tensor);
void updateTensorValue(descriptor::Tensor* tensor, mlir::Value* value); void updateTensorValue(descriptor::Tensor* tensor, mlir::Value* value);
void buildNgDialect();
template <typename Op> template <typename Op>
static mlir::Operation* createOp(MLIRCompiler& compiler, const ngraph::Node* ngNode) static mlir::Operation* createOp(MLIRCompiler& compiler, const ngraph::Node* ngNode)
{ {
...@@ -121,15 +117,6 @@ namespace ngraph ...@@ -121,15 +117,6 @@ namespace ngraph
void createReturn(); void createReturn();
/// Helper to create memref arguments for MLIR function signature
llvm::SmallVector<void*, 8> allocateMemrefArgs();
/// Helper to allocate a mem ref object. Handles static shapes only for now.
mlir::StaticFloatMemRef* allocateMemrefDescriptor();
/// Helper to dump MLIR module into llvm::dbgs prepended by the message \p msg.
void dumpMlirModule(const std::string msg);
/// Converts nGraph shape-like types \p ng_shape to MLIR shape \p mlir_shape. /// Converts nGraph shape-like types \p ng_shape to MLIR shape \p mlir_shape.
template <typename T> template <typename T>
void getMlirShape(T ngShape, llvm::SmallVectorImpl<int64_t>& mlirShape); void getMlirShape(T ngShape, llvm::SmallVectorImpl<int64_t>& mlirShape);
...@@ -142,19 +129,12 @@ namespace ngraph ...@@ -142,19 +129,12 @@ namespace ngraph
// Sub-graph to be compiled and executed with MLIR. // Sub-graph to be compiled and executed with MLIR.
const ngraph::op::CompiledKernel* m_compiledKernel; const ngraph::op::CompiledKernel* m_compiledKernel;
// Pointers to externally allocated memory for sub-graph's input and output tensors.
std::vector<void*>* m_externalTensors;
// Arguments for the MLIR function generated for the nGraph sub-graph.
llvm::SmallVector<void*, 8> m_invokeArgs;
// MLIR context that holds all the MLIR information related to the sub-graph // MLIR context that holds all the MLIR information related to the sub-graph
// compilation. // compilation.
mlir::MLIRContext m_context; mlir::MLIRContext& m_context;
mlir::OwningModuleRef m_module; mlir::OwningModuleRef m_module;
std::unique_ptr<mlir::OpBuilder> m_builder; std::unique_ptr<mlir::OpBuilder> m_builder;
std::unique_ptr<mlir::ExecutionEngine> m_engine;
using TensorToInfo = std::pair<descriptor::Tensor*, TensorInfo>; using TensorToInfo = std::pair<descriptor::Tensor*, TensorInfo>;
using TensorToInfoMap = std::unordered_map<descriptor::Tensor*, TensorInfo>; using TensorToInfoMap = std::unordered_map<descriptor::Tensor*, TensorInfo>;
...@@ -166,26 +146,8 @@ namespace ngraph ...@@ -166,26 +146,8 @@ namespace ngraph
// use for MLIR dialect gen // use for MLIR dialect gen
TensorToInfoMap m_tensorToValueMap; TensorToInfoMap m_tensorToValueMap;
static const MLIRCompOpMap opDispatcher; static const MLIRCompOpMap opDispatcher;
// Global initialization for MLIR compiler
// Optimization level used by MLIR and LLVM compilers. It's based on LLVM CG static bool initialized;
// optimization levels:
// enum Level {
// None, // -O0
// Less, // -O1
// Default, // -O2, -Os
// Aggressive // -O3
// };
static llvm::CodeGenOpt::Level mlirOptLevel;
// LLVM target machine to be used by this MLIR compiler instance to retrieve
// information about target features.
// TODO: Note that, unfortunatelly, MLIR/OrcJIT execution engine creates its own
// target machine for compilation internally. This target machine is for non-JIT
// related stuff. We should change OrcJIT API so that we can pass an external target
// machine or configuration flags.
// TODO: Move target machine to external nGraph backend when multiple backends start
// to use MLIR.
static std::unique_ptr<llvm::TargetMachine> targetMachine;
}; };
} }
} }
......
...@@ -549,5 +549,5 @@ void MLIRSubgraphExtractionPass::clean_up() ...@@ -549,5 +549,5 @@ void MLIRSubgraphExtractionPass::clean_up()
const std::set<std::type_index> MLIRSubgraphExtractionPass::m_supported_ops{ const std::set<std::type_index> MLIRSubgraphExtractionPass::m_supported_ops{
#define MLIR_OP(OP) TI(ngraph::op::OP), #define MLIR_OP(OP) TI(ngraph::op::OP),
#include "contrib/mlir/compiler/ops_supported.inc" #include "contrib/mlir/core/ops_supported.inc"
}; };
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
// NOTE: This file follows nGraph format style.
// Follows nGraph naming convention for public APIs only, else MLIR naming convention.
#include "cpu_runtime.hpp"
#include "contrib/mlir/backend/cpu/cpu_backend.hpp"
#include "ngraph/check.hpp"
#include <llvm/ADT/STLExtras.h>
#include <llvm/Analysis/TargetTransformInfo.h>
#include <llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h>
#include <llvm/IR/Module.h>
#include <llvm/Support/ErrorOr.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Support/SourceMgr.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Target/TargetMachine.h>
#include <mlir/ExecutionEngine/ExecutionEngine.h>
#include <mlir/ExecutionEngine/MemRefUtils.h>
#include <mlir/ExecutionEngine/OptUtils.h>
#include <mlir/IR/Function.h>
using llvm::SmallVector;
using llvm::StringRef;
using llvm::ArrayRef;
using namespace ngraph;
using namespace ngraph::runtime::ngmlir;
#define DEBUG_TYPE "mlir-cpu-runtime"
static llvm::cl::opt<bool>
clDumpObjectFile("ngraph-dump-mlir-object-file",
llvm::cl::desc("Dump MLIR JITted-compiled object to file specified with "
"-object-filename (<input file>.o by default)."));
static llvm::cl::opt<std::string>
clObjectFilename("ngraph-mlir-object-filename",
llvm::cl::desc("Dump MLIR JITted-compiled object to file jitted_mlir.o"));
void MLIRCPURuntime::run(void* args)
{
run_internal(*reinterpret_cast<std::vector<void*>*>(args));
}
void MLIRCPURuntime::run_internal(std::vector<void*>& externalTensors)
{
// Create an MLIR execution engine. We use a null MLIR pass manager for now to make sure we
// don't run MLIR passes that were already run. We also pass a default transformer created with
// the default or user-provided optimization level.
auto llvmTransformer = mlir::makeOptimizingTransformer(
MLIRCPUBackend::mlirOptLevel, /*sizeLevel=*/0, MLIRCPUBackend::targetMachine.get());
auto maybeEngine = mlir::ExecutionEngine::create(
m_module.get(), llvmTransformer, MLIRCPUBackend::mlirOptLevel);
NGRAPH_CHECK(maybeEngine, "failed to construct an execution engine");
m_engine = std::move(maybeEngine.get());
bindArguments(externalTensors);
execute();
cleanup();
}
// Binds MLIR function arguments to the proper values. This includes externally allocated tensors
// helpers to be used inside the function.
void MLIRCPURuntime::bindArguments(std::vector<void*>& externalTensors)
{
NGRAPH_CHECK(m_module, "MLIR module is not ready.");
mlir::FuncOp func = m_module->lookupSymbol<mlir::FuncOp>("main");
NGRAPH_CHECK(func && !func.getBlocks().empty(), "Function not found");
// Set external arguments
m_externalTensors = &externalTensors;
// Create list with a type-erased double pointer for each invocation arguments.
// We currently use 'allocateMemrefArgs', which creates the arguments list per call ABI (see
// comment below).
// StaticFloatMemref is just a struct with the actual pointer to the data.
auto expectedArguments = allocateMemrefArgs();
NGRAPH_CHECK(expectedArguments.size(), "Arguments can't be created");
m_invokeArgs = std::move(expectedArguments);
NGRAPH_CHECK(m_invokeArgs.size() == m_externalTensors->size(),
"Number of external tensors doesn't match number of function arguments");
// Assign external tensor pointers to invocation arguments.
for (size_t i = 0, numArgs = m_invokeArgs.size(); i < numArgs; ++i)
{
auto* memRefArg = *(reinterpret_cast<mlir::StaticFloatMemRef**>(m_invokeArgs[i]));
memRefArg->data = reinterpret_cast<float*>((*m_externalTensors)[i]);
}
}
// Lowers standard dialect to LLVM dialect and uses the MLIR execution engine to execute the code.
void MLIRCPURuntime::execute()
{
// Invoke the JIT-compiled function with the arguments. Note that, for API
// uniformity reasons, it takes a list of type-erased pointers to arguments.
// Please, note that 'invoke' method is overloaded with a parameter pack version.
// Make sure the MutableArrayRef version is invoked.
auto invocationResult = m_engine->invoke("main", llvm::MutableArrayRef<void*>(m_invokeArgs));
if (clDumpObjectFile)
{
m_engine->dumpToObjectFile(clObjectFilename.empty() ? "jitted_mlir.o"
: clObjectFilename.getValue());
}
NGRAPH_CHECK(!invocationResult, "JIT invocation of 'main' failed\n");
}
void MLIRCPURuntime::cleanup()
{
// Free void double pointer arguments without freeing external tensor data.
for (auto* arg : m_invokeArgs)
{
auto* memRefArg = *(reinterpret_cast<mlir::StaticFloatMemRef**>(arg));
free(memRefArg);
free(arg);
}
}
// The current call ABI takes a single arg pointer (argPtr) pointing to a list of args.
// Each arg is a pointer to a StaticFloatMemRef which contains a data pointer
//
// The args are laid out as follows
// argPtr-> arg[0]-> StaticFloatMemRef -> <data>
// arg[1]-> StaticFloatMemRef -> <data>
// ...
SmallVector<void*, 8> MLIRCPURuntime::allocateMemrefArgs()
{
SmallVector<void*, 8> args;
for (auto i = 0; i < m_externalTensors->size(); i++)
{
auto descriptor = allocateMemrefDescriptor();
mlir::StaticFloatMemRef** arg =
reinterpret_cast<mlir::StaticFloatMemRef**>(malloc(sizeof(mlir::StaticFloatMemRef*)));
*arg = descriptor;
args.push_back(arg);
}
return args;
}
mlir::StaticFloatMemRef* MLIRCPURuntime::allocateMemrefDescriptor()
{
// We only use StaticFloatMemRef because that's what MLIR currently offers.
// We should expand this with different types and dynamic MemRefs
auto* descriptor =
reinterpret_cast<mlir::StaticFloatMemRef*>(malloc(sizeof(mlir::StaticFloatMemRef)));
NGRAPH_CHECK(descriptor != nullptr, "NULL MemRef descriptor");
descriptor->data = nullptr;
return descriptor;
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
// NOTE: This file follows nGraph format style.
// Follows nGraph naming convention for public APIs only, else MLIR naming convention.
#pragma once
#include <memory>
#include <mlir/ExecutionEngine/ExecutionEngine.h>
#include <mlir/ExecutionEngine/MemRefUtils.h>
#include <mlir/IR/Builders.h>
#include <mlir/IR/Module.h>
#include <mlir/IR/Types.h>
#include "contrib/mlir/backend/backend.hpp"
#include "contrib/mlir/runtime/runtime.hpp"
namespace ngraph
{
namespace runtime
{
namespace ngmlir
{
/// A CPU Runtime is an MLIR runtime that owns an MLIR context and a module
/// The module should be in LLVM dialect and ready to be lowered via an MLIR
/// ExecutionEngine. The runtime owns the context and must out-live any MLIR
/// code Compilation and execution.
class MLIRCPURuntime : public MLIRRuntime
{
public:
/// Executes a pre-compiled subgraph
void run(void* args) override;
private:
void run_internal(std::vector<void*>& externalTensors);
// Bind external tensors to MLIR module entry point
void bindArguments(std::vector<void*>& externalTensors);
// Invokes an MLIR module entry point with bound arguments
void execute();
// Cleans up allocated args
void cleanup();
/// Helper to create memref arguments for MLIR function signature
llvm::SmallVector<void*, 8> allocateMemrefArgs();
/// Helper to allocate a mem ref object. Handles static shapes only for now.
mlir::StaticFloatMemRef* allocateMemrefDescriptor();
private:
// Pointers to externally allocated memory for sub-graph's input and output tensors.
std::vector<void*>* m_externalTensors;
// Arguments for the MLIR function generated for the nGraph sub-graph.
llvm::SmallVector<void*, 8> m_invokeArgs;
std::unique_ptr<mlir::ExecutionEngine> m_engine;
};
}
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
// NOTE: This file follows nGraph format style.
// Follows nGraph naming convention for public APIs only, else MLIR naming convention.
#pragma once
#include "contrib/mlir/backend/backend.hpp"
#include <memory>
#include <mlir/ExecutionEngine/ExecutionEngine.h>
#include <mlir/ExecutionEngine/MemRefUtils.h>
#include <mlir/IR/Builders.h>
#include <mlir/IR/Module.h>
#include <mlir/IR/Types.h>
namespace ngraph
{
namespace runtime
{
namespace ngmlir
{
/// Base class for an MLIR runtime. An MLIR runtime owns the MLIR Context and owns
/// the final compiled module. It supports invoking the module with specific arguments
class MLIRRuntime
{
public:
/// Sets the MLIR module that this runtime will own
void set_module(mlir::OwningModuleRef& module) { m_module = std::move(module); }
/// Overload with module op
void set_module(mlir::ModuleOp& module) { m_module = module; }
/// Executes a pre-compiled subgraph
virtual void run(void* args) = 0;
/// Get the MLIR module that this runtime owns
mlir::OwningModuleRef& get_module() { return m_module; }
mlir::MLIRContext& get_context() { return m_context; }
protected:
mlir::OwningModuleRef m_module;
mlir::MLIRContext m_context;
};
}
}
}
\ No newline at end of file
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
/// small sequence of passes without running the whole compiler pipeline. Please, refer to /// small sequence of passes without running the whole compiler pipeline. Please, refer to
/// ngraph_repo_path/tests/mlir/ for examples. /// ngraph_repo_path/tests/mlir/ for examples.
#include "contrib/mlir/compiler/tools.hpp" #include "contrib/mlir/utils.hpp"
#include "ngraph/check.hpp" #include "ngraph/check.hpp"
#include <llvm/Support/CommandLine.h> #include <llvm/Support/CommandLine.h>
......
...@@ -17,13 +17,32 @@ ...@@ -17,13 +17,32 @@
// NOTE: This file follows nGraph format style and MLIR naming convention since it does // NOTE: This file follows nGraph format style and MLIR naming convention since it does
// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API. // not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
#include "tools.hpp" #include "utils.hpp"
#include "dialect/dialect.hpp" #include "contrib/mlir/core/ngraph_dialect/dialect.hpp"
#include <llvm/Support/CommandLine.h>
#include <llvm/Support/Debug.h>
#include <mlir/IR/Dialect.h> #include <mlir/IR/Dialect.h>
static llvm::cl::opt<bool> clPrintIRAfterAll(
"ngraph-print-ir-after-all",
llvm::cl::init(false),
llvm::cl::desc(
"Print IR after transformation that are not implemented as passes in the MLIRCompiler. It "
"complements MLIR -print-ir-after-all and LLVM -print-after-all flags"));
void ngraph::runtime::ngmlir::initializeNGraphMLIR() void ngraph::runtime::ngmlir::initializeNGraphMLIR()
{ {
mlir::registerDialect<mlir::NGraphOpsDialect>(); mlir::registerDialect<mlir::NGraphOpsDialect>();
} }
void ngraph::runtime::ngmlir::dumpMlirModule(const std::string msg, mlir::ModuleOp module)
{
if (clPrintIRAfterAll)
{
llvm::dbgs() << "*** IR Dump After " << msg << " ***\n";
module.dump();
llvm::dbgs() << "\n\n";
}
}
...@@ -16,9 +16,10 @@ ...@@ -16,9 +16,10 @@
// NOTE: This file follows nGraph format style and MLIR naming convention since it does // NOTE: This file follows nGraph format style and MLIR naming convention since it does
// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API. // not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
#pragma once #pragma once
#include <mlir/IR/Module.h>
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
...@@ -29,6 +30,8 @@ namespace ngraph ...@@ -29,6 +30,8 @@ namespace ngraph
/// require nGraph dialect initialization. /// require nGraph dialect initialization.
void initializeNGraphMLIR(); void initializeNGraphMLIR();
/// Helper to dump MLIR module into llvm::dbgs prepended by the message \p msg.
void dumpMlirModule(const std::string msg, mlir::ModuleOp module);
} // namespace ngmlir } // namespace ngmlir
} // namespace runtime } // namespace runtime
} // namespace ngraph } // namespace ngraph
...@@ -272,7 +272,7 @@ if (NGRAPH_CPU_ENABLE) ...@@ -272,7 +272,7 @@ if (NGRAPH_CPU_ENABLE)
# TODO: can we get away without LLVM/MLIR include path. # TODO: can we get away without LLVM/MLIR include path.
# Currently mlir backend compiler.hpp include LLVM/MLIR files # Currently mlir backend compiler.hpp include LLVM/MLIR files
get_directory_property(MLIR_LLVM_INCLUDEPATH get_directory_property(MLIR_LLVM_INCLUDEPATH
DIRECTORY ${NGRAPH_MLIR_SOURCE_DIR}/compiler DIRECTORY ${NGRAPH_MLIR_SOURCE_DIR}
DEFINITION MLIR_LLVM_INCLUDEPATH) DEFINITION MLIR_LLVM_INCLUDEPATH)
message(STATUS "Building CPU backend with MLIR") message(STATUS "Building CPU backend with MLIR")
......
...@@ -16,7 +16,9 @@ ...@@ -16,7 +16,9 @@
#include "ngraph/runtime/cpu/cpu_builder.hpp" #include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "contrib/mlir/compiler/compiler.hpp" #include "contrib/mlir/backend/cpu/cpu_backend.hpp"
#include "contrib/mlir/core/compiler.hpp"
#include "contrib/mlir/runtime/cpu/cpu_runtime.hpp"
#include "ngraph/op/experimental/compiled_kernel.hpp" #include "ngraph/op/experimental/compiled_kernel.hpp"
#include "ngraph/runtime/cpu/cpu_runtime_context.hpp" #include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
...@@ -68,22 +70,41 @@ namespace ngraph ...@@ -68,22 +70,41 @@ namespace ngraph
// Compile nodes within the CompiledKernel op. // Compile nodes within the CompiledKernel op.
CompiledKernel* compiled_kernel = CompiledKernel* compiled_kernel =
static_cast<CompiledKernel*>(const_cast<Node*>(node)); static_cast<CompiledKernel*>(const_cast<Node*>(node));
bool is_module_ready = true;
auto it = ctx->mlir_compilers.find(compiled_kernel);
if (it == ctx->mlir_compilers.end()) auto it = ctx->mlir_runtimes.find(compiled_kernel);
{
// create a new compiler for the CK
ctx->mlir_compilers.emplace(compiled_kernel, compiled_kernel);
is_module_ready = false;
}
MLIRCompiler& mlir_compiler = ctx->mlir_compilers.find(compiled_kernel)->second; if (it == ctx->mlir_runtimes.end())
if (!is_module_ready)
{ {
// Compile the sub-graph and create a new runtime
// We must create an MLIRContext that out lives the compilation/execution
// The runtime contains the context and gets store in the CK cache
// Runtime contains context and must be constructed in-place.
// MLIR contexts cannot be copied over
ctx->mlir_runtimes.emplace(std::piecewise_construct,
std::make_tuple(compiled_kernel),
std::make_tuple());
MLIRCPURuntime& mlir_runtime =
ctx->mlir_runtimes.find(compiled_kernel)->second;
// Grab the context and initialize a core compiler
mlir::MLIRContext& context = mlir_runtime.get_context();
MLIRCompiler mlir_compiler(compiled_kernel, context);
// Compile to NG dialect
mlir_compiler.compile(); mlir_compiler.compile();
// Grab a context and initialize a CPU backend using same context
MLIRCPUBackend mlir_backend(mlir_compiler.get_module(), context);
// Codegen to LLVM dialect
mlir_backend.codegen();
// Store module into runtime, and invoke.
mlir_runtime.set_module(mlir_backend.get_module());
mlir_runtime.run(&ptr_args);
}
else
{
// We have found a cached runtime, just invoke.
MLIRCPURuntime& mlir_runtime = it->second;
mlir_runtime.run(&ptr_args);
} }
mlir_compiler.run(ptr_args);
}; };
functors.emplace_back(functor); functors.emplace_back(functor);
......
...@@ -32,7 +32,8 @@ ...@@ -32,7 +32,8 @@
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
#ifdef NGRAPH_MLIR_ENABLE #ifdef NGRAPH_MLIR_ENABLE
#include "contrib/mlir/compiler/compiler.hpp" #include "contrib/mlir/backend/cpu/cpu_backend.hpp"
#include "contrib/mlir/core/compiler.hpp"
#endif #endif
using namespace ngraph; using namespace ngraph;
...@@ -95,7 +96,9 @@ shared_ptr<runtime::Executable> ...@@ -95,7 +96,9 @@ shared_ptr<runtime::Executable>
if (std::getenv("NGRAPH_MLIR") != nullptr) if (std::getenv("NGRAPH_MLIR") != nullptr)
{ {
// Initialize MLIR compiler // Initialize MLIR compiler
ngmlir::MLIRCompiler::init_mlir(); ngmlir::MLIRCompiler::init();
// Initialize MLIR backend
ngmlir::MLIRCPUBackend::init();
} }
#endif #endif
......
...@@ -112,7 +112,7 @@ ...@@ -112,7 +112,7 @@
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
#ifdef NGRAPH_MLIR_ENABLE #ifdef NGRAPH_MLIR_ENABLE
#include "contrib/mlir/compiler/compiler.hpp" #include "contrib/mlir/core/compiler.hpp"
#endif #endif
using namespace std; using namespace std;
......
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
#endif #endif
#ifdef NGRAPH_MLIR_ENABLE #ifdef NGRAPH_MLIR_ENABLE
#include "contrib/mlir/compiler/pass/mlir_subgraph_extraction.hpp" #include "contrib/mlir/core/pass/mlir_subgraph_extraction.hpp"
#endif #endif
#include "ngraph/descriptor/input.hpp" #include "ngraph/descriptor/input.hpp"
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#include "ngraph/op/experimental/compiled_kernel.hpp" #include "ngraph/op/experimental/compiled_kernel.hpp"
#ifdef NGRAPH_MLIR_ENABLE #ifdef NGRAPH_MLIR_ENABLE
#include "contrib/mlir/compiler/compiler.hpp" #include "contrib/mlir/runtime/cpu/cpu_runtime.hpp"
#endif #endif
namespace mkldnn namespace mkldnn
...@@ -84,8 +84,8 @@ namespace ngraph ...@@ -84,8 +84,8 @@ namespace ngraph
/// The MLIR compiler caches the compiled code on the first invocation, /// The MLIR compiler caches the compiled code on the first invocation,
/// and may in the future support re-compilation /// and may in the future support re-compilation
std::unordered_map<ngraph::op::CompiledKernel*, std::unordered_map<ngraph::op::CompiledKernel*,
ngraph::runtime::ngmlir::MLIRCompiler> ngraph::runtime::ngmlir::MLIRCPURuntime>
mlir_compilers; mlir_runtimes;
#endif #endif
}; };
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment