Commit 85f42f42 authored by Jaikrishnan Menon's avatar Jaikrishnan Menon

CPU: Work around Eigen tanh miscompilation for now

This will be revisited if a performant tanh is needed
parent 67109304
...@@ -1620,16 +1620,18 @@ void Emitter::EMITTER_DECL(EmitTanh) ...@@ -1620,16 +1620,18 @@ void Emitter::EMITTER_DECL(EmitTanh)
(dynamic_pointer_cast<const TensorViewType>(n->get_arguments().at(0)->get_value_type())) (dynamic_pointer_cast<const TensorViewType>(n->get_arguments().at(0)->get_value_type()))
->get_element_type(); ->get_element_type();
// Eigen's generic_fast_tanh_float<float> is currently miscompiled by Clang/LLVM
// so we fall-back to std::tanh
// TODO: Implement our own internal fast/approximate tanh if this actually gets used
// by models
TU += TU +=
" {\n" " {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" + " auto& arg0 = call_frame->get_parameterized_tensor_view<" + element_type_names[TI(et)] + ">(" +
to_string(inputs[0].get_index()) + ");\n" to_string(inputs[0].get_index()) + ")->get_vector();\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" + " auto& out = call_frame->get_parameterized_tensor_view<" + element_type_names[TI(et)] + ">(" +
to_string(outputs[0].get_index()) + ");\n" to_string(outputs[0].get_index()) + ")->get_vector();\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(out, " " std::transform(arg0.begin(), arg0.end(), out.begin(), [](" + element_type_names[TI(et)] +
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n" "::type x) -> " + element_type_names[TI(et)] + "::type { return std::tanh(x); });\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(arg0, "
EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ").tanh();\n"
" }\n"; " }\n";
} }
......
...@@ -210,6 +210,8 @@ void ExternalFunction::compile(FunctionMap& function_map) ...@@ -210,6 +210,8 @@ void ExternalFunction::compile(FunctionMap& function_map)
TU += R"(// Generated by the NGraph CPU backend TU += R"(// Generated by the NGraph CPU backend
#include <memory> #include <memory>
#include <vector> #include <vector>
#include <algorithm>
#include <cmath>
#include <Eigen/Dense> #include <Eigen/Dense>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment