fast_math: selectively use GCC rounding builtins when available

Add a new macro definition OPENCV_USE_FASTMATH_GCC_BUILTINS to enable usage of GCC inline math functions, if available and requested by the user. Likewise, enable it for POWER. This is nearly always a substantial improvement over using integer manipulation as most operations can be done in several instructions with no branching. The result is a 1.5-1.8x speedup in the ceil/floor operations. 1. As tested with AT 12.0-1 (GCC 8.3.1) compiler on P9 LE.

fast_math: selectively use GCC rounding builtins when available
Add a new macro definition OPENCV_USE_FASTMATH_GCC_BUILTINS to enable usage of GCC inline math functions, if available and requested by the user. Likewise, enable it for POWER. This is nearly always a substantial improvement over using integer manipulation as most operations can be done in several instructions with no branching. The result is a 1.5-1.8x speedup in the ceil/floor operations. 1. As tested with AT 12.0-1 (GCC 8.3.1) compiler on P9 LE.
3f92bcc1 · Paul E. Murphy · b2135be5 · 3f92bcc1
Commit 3f92bcc1 authored Jul 22, 2019 by Paul E. Murphy
Hide whitespace changes
Inline Side-by-side

Showing with 29 additions and 0 deletions

fast_math.hpp modules/core/include/opencv2/core/fast_math.hpp +29 -0

No files found.
--- a/modules/core/include/opencv2/core/fast_math.hpp
+++ b/modules/core/include/opencv2/core/fast_math.hpp
@@ -92,6 +92,19 @@
    #define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
 #endif
+#if defined __PPC64__ && !defined OPENCV_USE_FASTMATH_GCC_BUILTINS
+    /* Let GCC inline C math functions when available. Dedicated hardware is available to
+       round and covert FP values. */
+    #define OPENCV_USE_FASTMATH_GCC_BUILTINS
+#endif
+/* Enable GCC builtin math functions if possible, desired, and available.
+   Note, not all math functions inline equally. E.g lrint will not inline
+   without the -fno-math-errno option. */
+#if defined OPENCV_USE_FASTMATH_GCC_BUILTINS && defined __GNUC__ && !defined __clang__ && !defined (__CUDACC__)
+    #define _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
+#endif
 /** @brief Rounds floating-point number to the nearest integer
 @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
@@ -138,8 +151,12 @@ cvRound( double value )
 */
 CV_INLINE int cvFloor( double value )
 {
+#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
+    return __builtin_floor(value);
+#else
    int i = (int)value;
    return i - (i > value);
+#endif
 }
 /** @brief Rounds floating-point number to the nearest integer not smaller than the original.
@@ -151,8 +168,12 @@ CV_INLINE int cvFloor( double value )
 */
 CV_INLINE int cvCeil( double value )
 {
+#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
+    return __builtin_ceil(value);
+#else
    int i = (int)value;
    return i + (i < value);
+#endif
 }
 /** @brief Determines if the argument is Not A Number.
@@ -225,8 +246,12 @@ CV_INLINE int cvRound( int value )
 /** @overload */
 CV_INLINE int cvFloor( float value )
 {
+#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
+    return __builtin_floorf(value);
+#else
    int i = (int)value;
    return i - (i > value);
+#endif
 }
 /** @overload */
@@ -238,8 +263,12 @@ CV_INLINE int cvFloor( int value )
 /** @overload */
 CV_INLINE int cvCeil( float value )
 {
+#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
+    return __builtin_ceilf(value);
+#else
    int i = (int)value;
    return i + (i < value);
+#endif
 }
 /** @overload */