Use intrinsics for `cvRound` on x86 and x86_64 `__GNUC__` (clang/gcc linux) too.

We've measured a 7x improvement in speed for `cvRound` using the intrinsic.
pull/24001/head
Clement Courbet 1 year ago committed by Alexander Smorkalov
parent 0052d46b8e
commit 3cce299a78
  1. 4
      modules/core/include/opencv2/core/fast_math.hpp

@ -201,7 +201,7 @@ cvRound( double value )
{ {
#if defined CV_INLINE_ROUND_DBL #if defined CV_INLINE_ROUND_DBL
CV_INLINE_ROUND_DBL(value); CV_INLINE_ROUND_DBL(value);
#elif (defined _MSC_VER && defined _M_X64) && !defined(__CUDACC__) #elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __SSE2__)) && !defined(__CUDACC__)
__m128d t = _mm_set_sd( value ); __m128d t = _mm_set_sd( value );
return _mm_cvtsd_si32(t); return _mm_cvtsd_si32(t);
#elif defined _MSC_VER && defined _M_IX86 #elif defined _MSC_VER && defined _M_IX86
@ -323,7 +323,7 @@ CV_INLINE int cvRound(float value)
{ {
#if defined CV_INLINE_ROUND_FLT #if defined CV_INLINE_ROUND_FLT
CV_INLINE_ROUND_FLT(value); CV_INLINE_ROUND_FLT(value);
#elif (defined _MSC_VER && defined _M_X64) && !defined(__CUDACC__) #elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __SSE2__)) && !defined(__CUDACC__)
__m128 t = _mm_set_ss( value ); __m128 t = _mm_set_ss( value );
return _mm_cvtss_si32(t); return _mm_cvtss_si32(t);
#elif defined _MSC_VER && defined _M_IX86 #elif defined _MSC_VER && defined _M_IX86

Loading…
Cancel
Save