Commit 79283e3e authored by Diego Caballero's avatar Diego Caballero Committed by Scott Cyphers

[MLIR] Enable LLVM vectorization by initializing TTI (#3424)

* [MLIR] Bump MLIR repo to commit c636f12, 08/09/2019

MLIR Commit:
commit c636f127ee412ef7279ec0d550f42740824cd9ea
Author: Alex Zinenko <zinenko@google.com>
Date:   Fri Aug 9 08:59:45 2019 -0700

    LLVM dialect and translation: support global strings

LLVM Commit:
commit c636f127ee412ef7279ec0d550f42740824cd9ea
Author: Alex Zinenko <zinenko@google.com>
Date:   Fri Aug 9 08:59:45 2019 -0700

    LLVM dialect and translation: support global strings

* [MLIR] Set optimization level for LLVM optimizer and codegen

Now both LLVM optimizer and codegen are aligned with
"NGRAPH_MLIR_OPT_LEVEL" macro.

* [MLIR] Enable LLVM vectorization by initializing TTI

This is the final piece to enable LLVM vectorization for MLIR compiler.
The PR refactors the creation of a target machine in MLIRCompiler so that
we can use it to initialize TargetTransformInfo with the proper host
features and LLVM Loop Vectorizer can get the right vector register
information of the target CPU.
parent 83611106
...@@ -110,6 +110,31 @@ static llvm::cl::opt<unsigned> clLoopTilingCacheSize( ...@@ -110,6 +110,31 @@ static llvm::cl::opt<unsigned> clLoopTilingCacheSize(
// Default optimization level. // Default optimization level.
unsigned MLIRCompiler::mlir_opt_level = 2; unsigned MLIRCompiler::mlir_opt_level = 2;
// Target machine will be properly initialized by `init_mlir`.
std::unique_ptr<llvm::TargetMachine> MLIRCompiler::target_machine;
/// Creates target machine for current host.
static llvm::Expected<std::unique_ptr<llvm::TargetMachine>>
createDefaultTargetMachine(unsigned opt_level)
{
auto machineBuilder = llvm::orc::JITTargetMachineBuilder::detectHost();
if (!machineBuilder)
{
return machineBuilder.takeError();
}
// Relocation model and code model are kept to default values. CodeGen optimization level
// matches LLVM recommendations, i.e.:
// enum Level {
// None, // -O0
// Less, // -O1
// Default, // -O2, -Os
// Aggressive // -O3
// };
machineBuilder->setCodeGenOptLevel((llvm::CodeGenOpt::Level)opt_level);
return machineBuilder->createTargetMachine();
}
void MLIRCompiler::init_mlir() void MLIRCompiler::init_mlir()
{ {
// Mutex to safely initialize MLIR. // Mutex to safely initialize MLIR.
...@@ -131,6 +156,13 @@ void MLIRCompiler::init_mlir() ...@@ -131,6 +156,13 @@ void MLIRCompiler::init_mlir()
NGRAPH_CHECK(mlir_opt_level >= 0 && mlir_opt_level <= 3, "Invalid optimization level"); NGRAPH_CHECK(mlir_opt_level >= 0 && mlir_opt_level <= 3, "Invalid optimization level");
} }
// Initialize LLVM targets and target machine for current host.
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
auto expected_target_machine = createDefaultTargetMachine(mlir_opt_level);
NGRAPH_CHECK(expected_target_machine, "Invalid target machine");
target_machine = std::move(*expected_target_machine);
initialized = true; initialized = true;
} }
} }
...@@ -319,54 +351,16 @@ void MLIRCompiler::lower_ng_dialect() ...@@ -319,54 +351,16 @@ void MLIRCompiler::lower_ng_dialect()
dump_mlir_module("LLVM-IR Dialect Dump:"); dump_mlir_module("LLVM-IR Dialect Dump:");
// Initialize LLVM targets.
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
// Create an MLIR execution engine. We use a null MLIR pass manager for now to make sure we // Create an MLIR execution engine. We use a null MLIR pass manager for now to make sure we
// don't run MLIR passes that were already run. We also pass a default transformer created with // don't run MLIR passes that were already run. We also pass a default transformer created with
// the default or user-provided optimization level. // the default or user-provided optimization level.
auto llvm_transformer = auto llvm_transformer =
mlir::makeOptimizingTransformer(mlir_opt_level, /*sizeLevel=*/0, /*targetMachine*/ nullptr); mlir::makeOptimizingTransformer(mlir_opt_level, /*sizeLevel=*/0, target_machine.get());
auto maybeEngine = mlir::ExecutionEngine::create(m_module.get(), llvm_transformer); auto maybeEngine = mlir::ExecutionEngine::create(m_module.get(), llvm_transformer);
NGRAPH_CHECK(maybeEngine, "failed to construct an execution engine"); NGRAPH_CHECK(maybeEngine, "failed to construct an execution engine");
m_engine = std::move(maybeEngine.get()); m_engine = std::move(maybeEngine.get());
} }
/// Creates target machine for current host.
static llvm::Expected<std::unique_ptr<llvm::TargetMachine>>
createDefaultTargetMachine(unsigned opt_level)
{
auto machineBuilder = llvm::orc::JITTargetMachineBuilder::detectHost();
if (!machineBuilder)
{
return machineBuilder.takeError();
}
// Retrieve host CPU sub-target features.
llvm::SubtargetFeatures subtargetFeatures;
llvm::StringMap<bool> featureMap;
llvm::sys::getHostCPUFeatures(featureMap);
for (auto& feature : featureMap)
{
subtargetFeatures.AddFeature(feature.first(), feature.second);
}
// Relocation model and code model are kept to default values. CodeGen optimization level
// matches LLVM recommendations, i.e.:
// enum Level {
// None, // -O0
// Less, // -O1
// Default, // -O2, -Os
// Aggressive // -O3
// };
machineBuilder->setCPU(llvm::sys::getHostCPUName());
machineBuilder->setCodeGenOptLevel((llvm::CodeGenOpt::Level)opt_level);
machineBuilder->addFeatures(subtargetFeatures.getFeatures());
return machineBuilder->createTargetMachine();
}
/// Returns the cache level size from `targetInfo` for the `cacheLevel` provided. If `userCacheSize` /// Returns the cache level size from `targetInfo` for the `cacheLevel` provided. If `userCacheSize`
/// is not zero, it returns `userCacheSize`. /// is not zero, it returns `userCacheSize`.
static unsigned getCacheLevelSize(llvm::TargetTransformInfo& targetInfo, static unsigned getCacheLevelSize(llvm::TargetTransformInfo& targetInfo,
...@@ -400,25 +394,18 @@ static unsigned getCacheLevelSize(llvm::TargetTransformInfo& targetInfo, ...@@ -400,25 +394,18 @@ static unsigned getCacheLevelSize(llvm::TargetTransformInfo& targetInfo,
// standard dialect only ops. // standard dialect only ops.
void MLIRCompiler::optimize() void MLIRCompiler::optimize()
{ {
// Create target machine with all the current host features.
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
auto expectedTargetMachine = createDefaultTargetMachine(mlir_opt_level);
NGRAPH_CHECK(expectedTargetMachine, "Invalid target machine");
auto targetMachine = std::move(*expectedTargetMachine);
// Create target transform info to obtain some target information to be used in MLIR // Create target transform info to obtain some target information to be used in MLIR
// optimizations. This is a temporary attempt to retrieve some target information by reusing // optimizations. This is a temporary attempt to retrieve some target information by reusing
// LLVM TTI infra while MLIR does not have target model. // LLVM TTI infra while MLIR does not have target model.
llvm::LLVMContext llvmContext; llvm::LLVMContext llvmContext;
auto module = make_unique<llvm::Module>("test", llvmContext); auto module = make_unique<llvm::Module>("test", llvmContext);
module->setDataLayout(targetMachine->createDataLayout()); module->setDataLayout(target_machine->createDataLayout());
auto ttiSetupFunc = llvm::cast<llvm::Function>( auto ttiSetupFunc = llvm::cast<llvm::Function>(
module module
->getOrInsertFunction("__ngraph_tti_setup", ->getOrInsertFunction("__ngraph_tti_setup",
llvm::FunctionType::get(llvm::Type::getVoidTy(llvmContext), {})) llvm::FunctionType::get(llvm::Type::getVoidTy(llvmContext), {}))
.getCallee()); .getCallee());
auto targetInfo = targetMachine->getTargetTransformInfo(*ttiSetupFunc); auto targetInfo = target_machine->getTargetTransformInfo(*ttiSetupFunc);
// Run Affine dialect optimizations. // Run Affine dialect optimizations.
mlir::PassManager pm_opts; mlir::PassManager pm_opts;
......
...@@ -32,6 +32,11 @@ ...@@ -32,6 +32,11 @@
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
namespace llvm
{
class TargetMachine;
}
namespace ngraph namespace ngraph
{ {
namespace descriptor namespace descriptor
...@@ -164,6 +169,16 @@ namespace ngraph ...@@ -164,6 +169,16 @@ namespace ngraph
// Optimization level used by MLIR and LLVM compilers. // Optimization level used by MLIR and LLVM compilers.
static unsigned mlir_opt_level; static unsigned mlir_opt_level;
// LLVM target machine to be used by this MLIR compiler instance to retrieve
// information about target features.
// TODO: Note that, unfortunatelly, MLIR/OrcJIT execution engine creates its own
// target machine for compilation internally. This target machine is for non-JIT
// related stuff. We should change OrcJIT API so that we can pass an external target
// machine or configuration flags.
// TODO: Move target machine to external nGraph backend when multiple backends start
// to use MLIR.
static std::unique_ptr<llvm::TargetMachine> target_machine;
}; };
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment