Commit 588d69a4 authored by Jai Menon's avatar Jai Menon Committed by Robert Kimball

Jmenon/eigen opt (#326)

* CPU: Optimize Eigen based rowwise vector broadcast

* CPU: Remove the need for transposing the broadcast vector

* CPU: Optimize to a replicate expression

* CPU: Change code model to medium and compile for the host CPU
instead of hardcoding BDW
parent 41cb4a2d
...@@ -16,27 +16,27 @@ include(ExternalProject) ...@@ -16,27 +16,27 @@ include(ExternalProject)
set(EIGEN_INSTALL_DIR ${EXTERNAL_INSTALL_DIR}/eigen) set(EIGEN_INSTALL_DIR ${EXTERNAL_INSTALL_DIR}/eigen)
set(EIGEN_PROJECT eigen) set(EIGEN_PROJECT eigen)
set(EIGEN_SHA1_HASH dd238ca6c6b5d2ce2e7e2e9ded4c59bad77ce6d0) set(EIGEN_GIT_TAG d608d9f3f577118981acbdd40da9dcf6b514668a)
set(EIGEN_URL http://bitbucket.org/eigen/eigen/get/3.3.3.zip) set(EIGEN_GIT_URL https://github.com/jmenon/eigen)
#---------------------------------------------------------------------------------------------------------- #----------------------------------------------------------------------------------------------------------
# Download and install GoogleTest ... # Download and install Eigen
#---------------------------------------------------------------------------------------------------------- #----------------------------------------------------------------------------------------------------------
# The 'BUILD_BYPRODUCTS' argument was introduced in CMake 3.2. # The 'BUILD_BYPRODUCTS' argument was introduced in CMake 3.2.
if (${CMAKE_VERSION} VERSION_LESS 3.2) if (${CMAKE_VERSION} VERSION_LESS 3.2)
ExternalProject_Add( ExternalProject_Add(
${EIGEN_PROJECT} ${EIGEN_PROJECT}
URL ${EIGEN_URL} GIT_REPOSITORY ${EIGEN_GIT_URL}
URL_HASH SHA1=${EIGEN_SHA1_HASH} GIT_TAG ${EIGEN_GIT_TAG}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${EIGEN_INSTALL_DIR} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${EIGEN_INSTALL_DIR}
) )
else() else()
ExternalProject_Add( ExternalProject_Add(
${EIGEN_PROJECT} ${EIGEN_PROJECT}
URL ${EIGEN_URL} GIT_REPOSITORY ${EIGEN_GIT_URL}
URL_HASH SHA1=${EIGEN_SHA1_HASH} GIT_TAG ${EIGEN_GIT_TAG}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${EIGEN_INSTALL_DIR} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${EIGEN_INSTALL_DIR}
BUILD_BYPRODUCTS "${EIGEN_INSTALL_DIR}/include/eigen3" BUILD_BYPRODUCTS "${EIGEN_INSTALL_DIR}/include/eigen3"
......
...@@ -111,6 +111,12 @@ StaticCompiler::StaticCompiler() ...@@ -111,6 +111,12 @@ StaticCompiler::StaticCompiler()
vector<const char*> args; vector<const char*> args;
args.push_back(m_source_name.c_str()); args.push_back(m_source_name.c_str());
// Inlining thresholds are forced to a very high value
// to ensure all Eigen code gets properly inlined
// This is for both Eigen strong and weak inlines
args.push_back("-mllvm");
args.push_back("-inline-threshold=1000000");
// Prepare DiagnosticEngine // Prepare DiagnosticEngine
IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions(); IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
TextDiagnosticPrinter* textDiagPrinter = new clang::TextDiagnosticPrinter(errs(), &*DiagOpts); TextDiagnosticPrinter* textDiagPrinter = new clang::TextDiagnosticPrinter(errs(), &*DiagOpts);
...@@ -203,6 +209,7 @@ StaticCompiler::StaticCompiler() ...@@ -203,6 +209,7 @@ StaticCompiler::StaticCompiler()
auto& CGO = m_compiler->getInvocation().getCodeGenOpts(); auto& CGO = m_compiler->getInvocation().getCodeGenOpts();
CGO.OptimizationLevel = 3; CGO.OptimizationLevel = 3;
CGO.RelocationModel = "static"; CGO.RelocationModel = "static";
CGO.CodeModel = "medium";
CGO.ThreadModel = "posix"; CGO.ThreadModel = "posix";
CGO.FloatABI = "hard"; CGO.FloatABI = "hard";
CGO.OmitLeafFramePointer = 1; CGO.OmitLeafFramePointer = 1;
...@@ -218,8 +225,8 @@ StaticCompiler::StaticCompiler() ...@@ -218,8 +225,8 @@ StaticCompiler::StaticCompiler()
// Enable various target features // Enable various target features
// Most of these are for Eigen // Most of these are for Eigen
auto& TO = m_compiler->getInvocation().getTargetOpts(); auto& TO = m_compiler->getInvocation().getTargetOpts();
// TODO: This needs to be configurable and selected carefully
TO.CPU = "broadwell"; TO.CPU = sys::getHostCPUName();
TO.FeaturesAsWritten.emplace_back("+sse"); TO.FeaturesAsWritten.emplace_back("+sse");
TO.FeaturesAsWritten.emplace_back("+sse2"); TO.FeaturesAsWritten.emplace_back("+sse2");
TO.FeaturesAsWritten.emplace_back("+sse3"); TO.FeaturesAsWritten.emplace_back("+sse3");
......
...@@ -41,6 +41,8 @@ bool codegen::ExecutionEngine::add_module(std::unique_ptr<llvm::Module>& module) ...@@ -41,6 +41,8 @@ bool codegen::ExecutionEngine::add_module(std::unique_ptr<llvm::Module>& module)
m_execution_engine.reset(llvm::EngineBuilder(move(module)) m_execution_engine.reset(llvm::EngineBuilder(move(module))
.setEngineKind(llvm::EngineKind::JIT) .setEngineKind(llvm::EngineKind::JIT)
.setOptLevel(llvm::CodeGenOpt::Aggressive) .setOptLevel(llvm::CodeGenOpt::Aggressive)
.setMCPU(llvm::sys::getHostCPUName())
.setCodeModel(llvm::CodeModel::Medium)
.setErrorStr(&m_jit_error) .setErrorStr(&m_jit_error)
.create()); .create());
......
...@@ -523,8 +523,17 @@ void runtime::cpu::CPU_Emitter::EmitBroadcast(const ngraph::Node* n, ...@@ -523,8 +523,17 @@ void runtime::cpu::CPU_Emitter::EmitBroadcast(const ngraph::Node* n,
{ {
m_out << "{ // " << n->get_name() << "\n"; m_out << "{ // " << n->get_name() << "\n";
m_out.indent++; m_out.indent++;
m_out << emit_matrix(out[0]) << ".rowwise() =\n"
<< " " << emit_vector(args[0]) << ".transpose();\n"; m_out << "Eigen::Map<Eigen::Matrix<" << out[0].get_element_type().c_type_string()
<< ", " << join(out[0].get_shape())
<< ", Eigen::RowMajor>, Eigen::Aligned64, Eigen::Stride<"
<< join(out[0].get_strides()) << ">> out(" << out[0].get_name() << ");\n";
m_out << "Eigen::Map<Eigen::Matrix<" << args[0].get_element_type().c_type_string()
<< ", 1, " << args[0].get_size()
<< ", Eigen::RowMajor>, Eigen::Aligned64, Eigen::Stride<" << args[0].get_size()
<< ", 1>> arg0(" << args[0].get_name() << ");\n";
m_out << "out = arg0.replicate<" << out[0].get_shape().at(0) << ", 1>();\n";
m_out.indent--; m_out.indent--;
m_out << "}\n"; m_out << "}\n";
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment