Commit 31eb5c46 authored by Jaikrishnan Menon's avatar Jaikrishnan Menon

CPU: Use transpose kernel from MKL in 2D Reshape

Also, add more codegen options
parent 0df3792f
...@@ -143,10 +143,17 @@ std::unique_ptr<llvm::Module> execution_state::compile(const string& source, con ...@@ -143,10 +143,17 @@ std::unique_ptr<llvm::Module> execution_state::compile(const string& source, con
LO->OpenMP = 1; LO->OpenMP = 1;
LO->OpenMPUseTLS = 1; LO->OpenMPUseTLS = 1;
if (debuginfo_enabled)
{
// CodeGen options // CodeGen options
auto& CGO = Clang->getInvocation().getCodeGenOpts(); auto& CGO = Clang->getInvocation().getCodeGenOpts();
CGO.OptimizationLevel = 3;
CGO.RelocationModel = "static";
CGO.ThreadModel = "posix";
CGO.OmitLeafFramePointer = 1;
CGO.VectorizeLoop = 1;
CGO.VectorizeSLP = 1;
if (debuginfo_enabled)
{
CGO.setDebugInfo(codegenoptions::FullDebugInfo); CGO.setDebugInfo(codegenoptions::FullDebugInfo);
} }
...@@ -161,6 +168,8 @@ std::unique_ptr<llvm::Module> execution_state::compile(const string& source, con ...@@ -161,6 +168,8 @@ std::unique_ptr<llvm::Module> execution_state::compile(const string& source, con
// Enable various target features // Enable various target features
// Most of these are for Eigen // Most of these are for Eigen
auto &TO = Clang->getInvocation().getTargetOpts(); auto &TO = Clang->getInvocation().getTargetOpts();
// TODO: This needs to be configurable and selected carefully
TO.CPU = "broadwell";
TO.FeaturesAsWritten.emplace_back("+sse4.1"); TO.FeaturesAsWritten.emplace_back("+sse4.1");
TO.FeaturesAsWritten.emplace_back("+sse4.2"); TO.FeaturesAsWritten.emplace_back("+sse4.2");
TO.FeaturesAsWritten.emplace_back("+avx"); TO.FeaturesAsWritten.emplace_back("+avx");
......
...@@ -86,3 +86,15 @@ namespace cblas ...@@ -86,3 +86,15 @@ namespace cblas
const ngraph::element::Int64::type ldc); const ngraph::element::Int64::type ldc);
} }
} }
namespace mkl
{
extern "C" {
void MKL_Somatcopy(char ordering,
char trans,
size_t rows, size_t cols,
const ngraph::element::Float32::type alpha,
const ngraph::element::Float32::type* A, size_t lda,
ngraph::element::Float32::type* B, size_t ldb);
}
}
...@@ -979,7 +979,25 @@ void Emitter::EMITTER_DECL(EmitReshape) ...@@ -979,7 +979,25 @@ void Emitter::EMITTER_DECL(EmitReshape)
auto arg0_layout = inputs[0].get_layout<DenseTensorViewLayout>(); auto arg0_layout = inputs[0].get_layout<DenseTensorViewLayout>();
auto out_layout = outputs[0].get_layout<DenseTensorViewLayout>(); auto out_layout = outputs[0].get_layout<DenseTensorViewLayout>();
TU += " {\n" // Emit an MKL transpose call if possible
if (result_element_type == ngraph::element::Float32::element_type())
{
TU +=
" {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(result_element_type)] +
">(" + to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(result_element_type)] +
">(" + to_string(outputs[0].get_index()) + ");\n"
" mkl::MKL_Somatcopy('R', 'T', " + to_string(arg_shape[0]) + ",\n"
" " + to_string(arg_shape[1]) + ", 1.0f,\n"
" arg0, " + to_string(arg_shape[1]) + ",\n"
" out, " + to_string(arg_shape[0]) + ");\n"
" }\n";
}
else
{
TU +=
" {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(result_element_type)] + " auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(result_element_type)] +
">(" + to_string(inputs[0].get_index()) + ");\n" ">(" + to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(result_element_type)] + " auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(result_element_type)] +
...@@ -990,6 +1008,7 @@ void Emitter::EMITTER_DECL(EmitReshape) ...@@ -990,6 +1008,7 @@ void Emitter::EMITTER_DECL(EmitReshape)
EIGEN_MATRIX_FORMAT(arg0_layout->get_shape(), arg0_layout->get_strides()) + ").transpose();\n" EIGEN_MATRIX_FORMAT(arg0_layout->get_shape(), arg0_layout->get_strides()) + ").transpose();\n"
" }\n"; " }\n";
} }
}
// Other cases (reordering of axes for tensors with rank>2) are not handled yet. // Other cases (reordering of axes for tensors with rank>2) are not handled yet.
else else
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment