Merge pull request #15738 from ChipKerchner:bugInt64x2Comparison

Fixing bug with comparison of v_int64x2 or v_uint64x2

* Casting v_uint64x2 to v_float64x2 and comparing does NOT work in all cases.  Rewrite using epi64 instructions - faster too.

* Fix bad merge.

* Fix equal comparsion for non-SSE4.1. Add test cases for v_int64x2 comparisons.

* Try to fix merge conflict.

* Only test v_int64x2 comparisons if CV_SIMD_64F

* Fix compiler warning.
pull/15754/head
Chip Kerchner 5 years ago committed by Alexander Alekhin
parent 1864b64f64
commit 5a6a49405d
  1. 19
      modules/core/include/opencv2/core/hal/intrin_sse.hpp
  2. 50
      modules/core/test/test_intrin_utils.hpp

@ -1220,14 +1220,23 @@ inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float32x4, ps)
OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float64x2, pd)
#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec, cast) \
#if CV_SSE4_1
#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \
inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
{ return cast(v_reinterpret_as_f64(a) == v_reinterpret_as_f64(b)); } \
{ return _Tpvec(_mm_cmpeq_epi64(a.val, b.val)); } \
inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
{ return cast(v_reinterpret_as_f64(a) != v_reinterpret_as_f64(b)); }
{ return ~(a == b); }
#else
#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \
inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
{ __m128i cmp = _mm_cmpeq_epi32(a.val, b.val); \
return _Tpvec(_mm_and_si128(cmp, _mm_shuffle_epi32(cmp, _MM_SHUFFLE(2, 3, 0, 1)))); } \
inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
{ return ~(a == b); }
#endif
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2, v_reinterpret_as_u64)
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2, v_reinterpret_as_s64)
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2)
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2)
inline v_float32x4 v_not_nan(const v_float32x4& a)
{ return v_float32x4(_mm_cmpord_ps(a.val, a.val)); }

@ -1442,6 +1442,50 @@ template<typename R> struct TheTest
return *this;
}
#endif
#if CV_SIMD_64F
TheTest & test_cmp64()
{
Data<R> dataA, dataB;
R a = dataA, b = dataB;
for (int i = 0; i < R::nlanes; ++i)
{
dataA[i] = dataB[i];
}
dataA[0]++;
a = dataA, b = dataB;
Data<R> resC = (a == b);
Data<R> resD = (a != b);
for (int i = 0; i < R::nlanes; ++i)
{
SCOPED_TRACE(cv::format("i=%d", i));
EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0);
EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0);
}
for (int i = 0; i < R::nlanes; ++i)
{
dataA[i] = dataB[i] = (LaneType)-1;
}
a = dataA, b = dataB;
resC = (a == b);
resD = (a != b);
for (int i = 0; i < R::nlanes; ++i)
{
SCOPED_TRACE(cv::format("i=%d", i));
EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0);
EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0);
}
return *this;
}
#endif
};
@ -1657,6 +1701,9 @@ void test_hal_intrin_uint64()
TheTest<v_uint64>()
.test_loadstore()
.test_addsub()
#if CV_SIMD_64F
.test_cmp64()
#endif
.test_shift<1>().test_shift<8>()
.test_logic()
.test_reverse()
@ -1671,6 +1718,9 @@ void test_hal_intrin_int64()
TheTest<v_int64>()
.test_loadstore()
.test_addsub()
#if CV_SIMD_64F
.test_cmp64()
#endif
.test_shift<1>().test_shift<8>()
.test_logic()
.test_reverse()

Loading…
Cancel
Save