Fixed v_reduce_sad intrinsics implementation and added tests

pull/14385/head
Vitaly Tuzov 6 years ago
parent 5c0a98cfb6
commit 18d10d6b86
  1. 8
      modules/core/include/opencv2/core/hal/intrin_avx.hpp
  2. 7
      modules/core/include/opencv2/core/hal/intrin_sse.hpp
  3. 16
      modules/core/test/test_intrin_utils.hpp

@ -1141,12 +1141,16 @@ inline v_float32x8 v_reduce_sum4(const v_float32x8& a, const v_float32x8& b,
inline unsigned v_reduce_sad(const v_uint8x32& a, const v_uint8x32& b)
{
return (unsigned)_v_cvtsi256_si32(_mm256_sad_epu8(a.val, b.val));
__m256i half = _mm256_sad_epu8(a.val, b.val);
__m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half));
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter)));
}
inline unsigned v_reduce_sad(const v_int8x32& a, const v_int8x32& b)
{
__m256i half = _mm256_set1_epi8(0x7f);
return (unsigned)_v_cvtsi256_si32(_mm256_sad_epu8(_mm256_add_epi8(a.val, half), _mm256_add_epi8(b.val, half)));
half = _mm256_sad_epu8(_mm256_add_epi8(a.val, half), _mm256_add_epi8(b.val, half));
__m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half));
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter)));
}
inline unsigned v_reduce_sad(const v_uint16x16& a, const v_uint16x16& b)
{

@ -1486,13 +1486,14 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, min, std::min)
inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b)
{
return (unsigned)_mm_cvtsi128_si32(_mm_sad_epu8(a.val, b.val));
__m128i half = _mm_sad_epu8(a.val, b.val);
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half)));
}
inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b)
{
__m128i half = _mm_set1_epi8(0x7f);
return (unsigned)_mm_cvtsi128_si32(_mm_sad_epu8(_mm_add_epi8(a.val, half),
_mm_add_epi8(b.val, half)));
half = _mm_sad_epu8(_mm_add_epi8(a.val, half), _mm_add_epi8(b.val, half));
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half)));
}
inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b)
{

@ -770,6 +770,15 @@ template<typename R> struct TheTest
return *this;
}
TheTest & test_reduce_sad()
{
Data<R> dataA, dataB(R::nlanes/2);
R a = dataA;
R b = dataB;
EXPECT_EQ((unsigned)(R::nlanes*R::nlanes/4), v_reduce_sad(a, b));
return *this;
}
TheTest & test_mask()
{
typedef typename V_RegTraits<R>::int_reg int_reg;
@ -1320,6 +1329,7 @@ void test_hal_intrin_uint8()
.test_logic()
.test_min_max()
.test_absdiff()
.test_reduce_sad()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
@ -1358,6 +1368,7 @@ void test_hal_intrin_int8()
.test_absdiff()
.test_absdiffs()
.test_abs()
.test_reduce_sad()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
@ -1387,6 +1398,7 @@ void test_hal_intrin_uint16()
.test_min_max()
.test_absdiff()
.test_reduce()
.test_reduce_sad()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
@ -1418,6 +1430,7 @@ void test_hal_intrin_int16()
.test_absdiffs()
.test_abs()
.test_reduce()
.test_reduce_sad()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
@ -1446,6 +1459,7 @@ void test_hal_intrin_uint32()
.test_min_max()
.test_absdiff()
.test_reduce()
.test_reduce_sad()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
@ -1473,6 +1487,7 @@ void test_hal_intrin_int32()
.test_min_max()
.test_absdiff()
.test_reduce()
.test_reduce_sad()
.test_mask()
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
.test_unpack()
@ -1528,6 +1543,7 @@ void test_hal_intrin_float32()
.test_min_max()
.test_float_absdiff()
.test_reduce()
.test_reduce_sad()
.test_mask()
.test_unpack()
.test_float_math()

Loading…
Cancel
Save