diff --git a/modules/core/include/opencv2/core/hal/intrin_sse.hpp b/modules/core/include/opencv2/core/hal/intrin_sse.hpp index da167e3401..e7370504ef 100644 --- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp @@ -1220,14 +1220,23 @@ inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float32x4, ps) OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float64x2, pd) -#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec, cast) \ +#if CV_SSE4_1 +#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \ inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ -{ return cast(v_reinterpret_as_f64(a) == v_reinterpret_as_f64(b)); } \ +{ return _Tpvec(_mm_cmpeq_epi64(a.val, b.val)); } \ inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ -{ return cast(v_reinterpret_as_f64(a) != v_reinterpret_as_f64(b)); } +{ return ~(a == b); } +#else +#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ __m128i cmp = _mm_cmpeq_epi32(a.val, b.val); \ + return _Tpvec(_mm_and_si128(cmp, _mm_shuffle_epi32(cmp, _MM_SHUFFLE(2, 3, 0, 1)))); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return ~(a == b); } +#endif -OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2, v_reinterpret_as_u64) -OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2, v_reinterpret_as_s64) +OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2) +OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2) inline v_float32x4 v_not_nan(const v_float32x4& a) { return v_float32x4(_mm_cmpord_ps(a.val, a.val)); } diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index 2226502591..bd1e24722c 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -1442,6 +1442,50 @@ template struct TheTest return *this; } #endif + +#if CV_SIMD_64F + TheTest & test_cmp64() + { + Data dataA, dataB; + R a = dataA, b = dataB; + + for (int i = 0; i < R::nlanes; ++i) + { + dataA[i] = dataB[i]; + } + dataA[0]++; + + a = dataA, b = dataB; + + Data resC = (a == b); + Data resD = (a != b); + + for (int i = 0; i < R::nlanes; ++i) + { + SCOPED_TRACE(cv::format("i=%d", i)); + EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0); + EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0); + } + + for (int i = 0; i < R::nlanes; ++i) + { + dataA[i] = dataB[i] = (LaneType)-1; + } + + a = dataA, b = dataB; + + resC = (a == b); + resD = (a != b); + + for (int i = 0; i < R::nlanes; ++i) + { + SCOPED_TRACE(cv::format("i=%d", i)); + EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0); + EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0); + } + return *this; + } +#endif }; @@ -1657,6 +1701,9 @@ void test_hal_intrin_uint64() TheTest() .test_loadstore() .test_addsub() +#if CV_SIMD_64F + .test_cmp64() +#endif .test_shift<1>().test_shift<8>() .test_logic() .test_reverse() @@ -1671,6 +1718,9 @@ void test_hal_intrin_int64() TheTest() .test_loadstore() .test_addsub() +#if CV_SIMD_64F + .test_cmp64() +#endif .test_shift<1>().test_shift<8>() .test_logic() .test_reverse()