From b5adffd5c22e9c749b89864a4273bbea99c385ba Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Fri, 24 Jun 2022 14:58:32 +0300 Subject: [PATCH] * cleaned cvRound(), cvFloor() and cvCeil() implementations, removed the old non-banking rounding branch completely * enable the use of GCC/clang __builtin_*() functions more broadly --- .../core/include/opencv2/core/fast_math.hpp | 66 +++++++------------ 1 file changed, 24 insertions(+), 42 deletions(-) diff --git a/modules/core/include/opencv2/core/fast_math.hpp b/modules/core/include/opencv2/core/fast_math.hpp index eb4fbe213b..cd5de0b546 100644 --- a/modules/core/include/opencv2/core/fast_math.hpp +++ b/modules/core/include/opencv2/core/fast_math.hpp @@ -128,12 +128,8 @@ #define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value) #endif - #if !defined(OPENCV_USE_FASTMATH_BUILTINS) \ - && ( \ - defined(__x86_64__) || defined(__i686__) \ - || defined(__arm__) \ - || defined(__PPC64__) \ - ) + #if !defined(OPENCV_USE_FASTMATH_BUILTINS) && \ + (defined __GNUC__ || defined __clang__ || defined _MSC_VER) /* Let builtin C math functions when available. Dedicated hardware is available to round and convert FP values. */ #define OPENCV_USE_FASTMATH_BUILTINS 1 @@ -201,9 +197,7 @@ cvRound( double value ) { #if defined CV_INLINE_ROUND_DBL CV_INLINE_ROUND_DBL(value); -#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ - && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \ - && !defined(__CUDACC__) +#elif (defined _MSC_VER && defined _M_X64) && !defined(__CUDACC__) __m128d t = _mm_set_sd( value ); return _mm_cvtsd_si32(t); #elif defined _MSC_VER && defined _M_IX86 @@ -214,12 +208,11 @@ cvRound( double value ) fistp t; } return t; -#elif defined CV_ICC || defined __GNUC__ - return (int)(lrint(value)); +#elif defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \ + defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS + return (int)__builtin_lrint(value); #else - /* it's ok if round does not comply with IEEE754 standard; - the tests should allow +/-1 difference when the tested functions use round */ - return (int)(value + (value >= 0 ? 0.5 : -0.5)); + return (int)lrint(value); #endif } @@ -233,11 +226,9 @@ cvRound( double value ) */ CV_INLINE int cvFloor( double value ) { -#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ - && ( \ - defined(__PPC64__) \ - ) - return __builtin_floor(value); +#if defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \ + defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS + return (int)__builtin_floor(value); #else int i = (int)value; return i - (i > value); @@ -253,11 +244,9 @@ CV_INLINE int cvFloor( double value ) */ CV_INLINE int cvCeil( double value ) { -#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ - && ( \ - defined(__PPC64__) \ - ) - return __builtin_ceil(value); +#if defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \ + defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS + return (int)__builtin_ceil(value); #else int i = (int)value; return i + (i < value); @@ -312,9 +301,7 @@ CV_INLINE int cvRound(float value) { #if defined CV_INLINE_ROUND_FLT CV_INLINE_ROUND_FLT(value); -#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ - && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \ - && !defined(__CUDACC__) +#elif (defined _MSC_VER && defined _M_X64) && !defined(__CUDACC__) __m128 t = _mm_set_ss( value ); return _mm_cvtss_si32(t); #elif defined _MSC_VER && defined _M_IX86 @@ -325,12 +312,11 @@ CV_INLINE int cvRound(float value) fistp t; } return t; -#elif defined CV_ICC || defined __GNUC__ - return (int)(lrintf(value)); +#elif defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \ + defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS + return (int)__builtin_lrintf(value); #else - /* it's ok if round does not comply with IEEE754 standard; - the tests should allow +/-1 difference when the tested functions use round */ - return (int)(value + (value >= 0 ? 0.5f : -0.5f)); + return (int)lrintf(value); #endif } @@ -343,11 +329,9 @@ CV_INLINE int cvRound( int value ) /** @overload */ CV_INLINE int cvFloor( float value ) { -#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ - && ( \ - defined(__PPC64__) \ - ) - return __builtin_floorf(value); +#if defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \ + defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS + return (int)__builtin_floorf(value); #else int i = (int)value; return i - (i > value); @@ -363,11 +347,9 @@ CV_INLINE int cvFloor( int value ) /** @overload */ CV_INLINE int cvCeil( float value ) { -#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ - && ( \ - defined(__PPC64__) \ - ) - return __builtin_ceilf(value); +#if defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \ + defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS + return (int)__builtin_ceilf(value); #else int i = (int)value; return i + (i < value);