[MLIR] Enable LLVM vectorization by initializing TTI (#3424)

* [MLIR] Bump MLIR repo to commit c636f12, 08/09/2019 MLIR Commit: commit c636f127ee412ef7279ec0d550f42740824cd9ea Author: Alex Zinenko <zinenko@google.com> Date: Fri Aug 9 08:59:45 2019 -0700 LLVM dialect and translation: support global strings LLVM Commit: commit c636f127ee412ef7279ec0d550f42740824cd9ea Author: Alex Zinenko <zinenko@google.com> Date: Fri Aug 9 08:59:45 2019 -0700 LLVM dialect and translation: support global strings * [MLIR] Set optimization level for LLVM optimizer and codegen Now both LLVM optimizer and codegen are aligned with "NGRAPH_MLIR_OPT_LEVEL" macro. * [MLIR] Enable LLVM vectorization by initializing TTI This is the final piece to enable LLVM vectorization for MLIR compiler. The PR refactors the creation of a target machine in MLIRCompiler so that we can use it to initialize TargetTransformInfo with the proper host features and LLVM Loop Vectorizer can get the right vector register information of the target CPU.

[MLIR] Enable LLVM vectorization by initializing TTI (#3424)
* [MLIR] Bump MLIR repo to commit c636f12, 08/09/2019 MLIR Commit: commit c636f127ee412ef7279ec0d550f42740824cd9ea Author: Alex Zinenko <zinenko@google.com> Date: Fri Aug 9 08:59:45 2019 -0700 LLVM dialect and translation: support global strings LLVM Commit: commit c636f127ee412ef7279ec0d550f42740824cd9ea Author: Alex Zinenko <zinenko@google.com> Date: Fri Aug 9 08:59:45 2019 -0700 LLVM dialect and translation: support global strings * [MLIR] Set optimization level for LLVM optimizer and codegen Now both LLVM optimizer and codegen are aligned with "NGRAPH_MLIR_OPT_LEVEL" macro. * [MLIR] Enable LLVM vectorization by initializing TTI This is the final piece to enable LLVM vectorization for MLIR compiler. The PR refactors the creation of a target machine in MLIRCompiler so that we can use it to initialize TargetTransformInfo with the proper host features and LLVM Loop Vectorizer can get the right vector register information of the target CPU.
79283e3e · Diego Caballero · Scott Cyphers · 83611106 · 79283e3e · 79283e3e
Commit 79283e3e authored Aug 14, 2019 by Diego Caballero Committed by Scott Cyphers Aug 14, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 50 additions and 48 deletions

compiler.cpp src/contrib/mlir/compiler.cpp +35 -48

compiler.hpp src/contrib/mlir/compiler.hpp +15 -0

No files found.
--- a/src/contrib/mlir/compiler.cpp
+++ b/src/contrib/mlir/compiler.cpp
@@ -110,6 +110,31 @@ static llvm::cl::opt<unsigned> clLoopTilingCacheSize(
 // Default optimization level.
 unsigned MLIRCompiler::mlir_opt_level = 2;
+// Target machine will be properly initialized by `init_mlir`.
+std::unique_ptr<llvm::TargetMachine> MLIRCompiler::target_machine;
+/// Creates target machine for current host.
+static llvm::Expected<std::unique_ptr<llvm::TargetMachine>>
+    createDefaultTargetMachine(unsigned opt_level)
+{
+    auto machineBuilder = llvm::orc::JITTargetMachineBuilder::detectHost();
+    if (!machineBuilder)
+    {
+        return machineBuilder.takeError();
+    }
+    // Relocation model and code model are kept to default values. CodeGen optimization level
+    // matches LLVM recommendations, i.e.:
+    // enum Level {
+    //   None,        // -O0
+    //   Less,        // -O1
+    //   Default,     // -O2, -Os
+    //   Aggressive   // -O3
+    // };
+    machineBuilder->setCodeGenOptLevel((llvm::CodeGenOpt::Level)opt_level);
+    return machineBuilder->createTargetMachine();
+}
 void MLIRCompiler::init_mlir()
 {
    // Mutex to safely initialize MLIR.
@@ -131,6 +156,13 @@ void MLIRCompiler::init_mlir()
            NGRAPH_CHECK(mlir_opt_level >= 0 && mlir_opt_level <= 3, "Invalid optimization level");
        }
+        // Initialize LLVM targets and target machine for current host.
+        llvm::InitializeNativeTarget();
+        llvm::InitializeNativeTargetAsmPrinter();
+        auto expected_target_machine = createDefaultTargetMachine(mlir_opt_level);
+        NGRAPH_CHECK(expected_target_machine, "Invalid target machine");
+        target_machine = std::move(*expected_target_machine);
        initialized = true;
    }
 }
@@ -319,54 +351,16 @@ void MLIRCompiler::lower_ng_dialect()
    dump_mlir_module("LLVM-IR Dialect Dump:");
-    // Initialize LLVM targets.
-    llvm::InitializeNativeTarget();
-    llvm::InitializeNativeTargetAsmPrinter();
    // Create an MLIR execution engine. We use a null MLIR pass manager for now to make sure we
    // don't run MLIR passes that were already run. We also pass a default transformer created with
    // the default or user-provided optimization level.
    auto llvm_transformer =
-        mlir::makeOptimizingTransformer(mlir_opt_level, /*sizeLevel=*/0, /*targetMachine*/ nullptr);
+        mlir::makeOptimizingTransformer(mlir_opt_level, /*sizeLevel=*/0, target_machine.get());
    auto maybeEngine = mlir::ExecutionEngine::create(m_module.get(), llvm_transformer);
    NGRAPH_CHECK(maybeEngine, "failed to construct an execution engine");
    m_engine = std::move(maybeEngine.get());
 }
-/// Creates target machine for current host.
-static llvm::Expected<std::unique_ptr<llvm::TargetMachine>>
-    createDefaultTargetMachine(unsigned opt_level)
-{
-    auto machineBuilder = llvm::orc::JITTargetMachineBuilder::detectHost();
-    if (!machineBuilder)
-    {
-        return machineBuilder.takeError();
-    }
-    // Retrieve host CPU sub-target features.
-    llvm::SubtargetFeatures subtargetFeatures;
-    llvm::StringMap<bool> featureMap;
-    llvm::sys::getHostCPUFeatures(featureMap);
-    for (auto& feature : featureMap)
-    {
-        subtargetFeatures.AddFeature(feature.first(), feature.second);
-    }
-    // Relocation model and code model are kept to default values. CodeGen optimization level
-    // matches LLVM recommendations, i.e.:
-    // enum Level {
-    //   None,        // -O0
-    //   Less,        // -O1
-    //   Default,     // -O2, -Os
-    //   Aggressive   // -O3
-    // };
-    machineBuilder->setCPU(llvm::sys::getHostCPUName());
-    machineBuilder->setCodeGenOptLevel((llvm::CodeGenOpt::Level)opt_level);
-    machineBuilder->addFeatures(subtargetFeatures.getFeatures());
-    return machineBuilder->createTargetMachine();
-}
 /// Returns the cache level size from `targetInfo` for the `cacheLevel` provided. If `userCacheSize`
 /// is not zero, it returns `userCacheSize`.
 static unsigned getCacheLevelSize(llvm::TargetTransformInfo& targetInfo,
@@ -400,25 +394,18 @@ static unsigned getCacheLevelSize(llvm::TargetTransformInfo& targetInfo,
 // standard dialect only ops.
 void MLIRCompiler::optimize()
 {
-    // Create target machine with all the current host features.
-    llvm::InitializeNativeTarget();
-    llvm::InitializeNativeTargetAsmPrinter();
-    auto expectedTargetMachine = createDefaultTargetMachine(mlir_opt_level);
-    NGRAPH_CHECK(expectedTargetMachine, "Invalid target machine");
-    auto targetMachine = std::move(*expectedTargetMachine);
    // Create target transform info to obtain some target information to be used in MLIR
    // optimizations. This is a temporary attempt to retrieve some target information by reusing
    // LLVM TTI infra while MLIR does not have target model.
    llvm::LLVMContext llvmContext;
    auto module = make_unique<llvm::Module>("test", llvmContext);
-    module->setDataLayout(targetMachine->createDataLayout());
+    module->setDataLayout(target_machine->createDataLayout());
    auto ttiSetupFunc = llvm::cast<llvm::Function>(
        module
            ->getOrInsertFunction("__ngraph_tti_setup",
                                  llvm::FunctionType::get(llvm::Type::getVoidTy(llvmContext), {}))
            .getCallee());
-    auto targetInfo = targetMachine->getTargetTransformInfo(*ttiSetupFunc);
+    auto targetInfo = target_machine->getTargetTransformInfo(*ttiSetupFunc);
    // Run Affine dialect optimizations.
    mlir::PassManager pm_opts;

--- a/src/contrib/mlir/compiler.hpp
+++ b/src/contrib/mlir/compiler.hpp
@@ -32,6 +32,11 @@
 #include <unordered_map>
 #include <vector>
+namespace llvm
+{
+    class TargetMachine;
+}
 namespace ngraph
 {
    namespace descriptor
@@ -164,6 +169,16 @@ namespace ngraph
                // Optimization level used by MLIR and LLVM compilers.
                static unsigned mlir_opt_level;
+                // LLVM target machine to be used by this MLIR compiler instance to retrieve
+                // information about target features.
+                // TODO: Note that, unfortunatelly, MLIR/OrcJIT execution engine creates its own
+                // target machine for compilation internally. This target machine is for non-JIT
+                // related stuff. We should change OrcJIT API so that we can pass an external target
+                // machine or configuration flags.
+                // TODO: Move target machine to external nGraph backend when multiple backends start
+                // to use MLIR.
+                static std::unique_ptr<llvm::TargetMachine> target_machine;
            };
        }
    }