Updated v_popcount description, reference implementation and test.

pull/14232/head
Vitaly Tuzov 6 years ago
parent 96ab78dc4f
commit 1220dd4877
  1. 25
      modules/core/include/opencv2/core/hal/intrin_cpp.hpp
  2. 4
      modules/core/src/stat.simd.hpp
  3. 20
      modules/core/test/test_intrin_utils.hpp

@ -603,27 +603,20 @@ static const unsigned char popCountTable[] =
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
}; };
/** @brief Count the 1 bits in the vector and return 4 values /** @brief Count the 1 bits in the vector lanes and return result as corresponding unsigned type
Scheme: Scheme:
@code @code
{A1 A2 A3 ...} => popcount(A1) {A1 A2 A3 ...} => {popcount(A1), popcount(A2), popcount(A3), ...}
@endcode @endcode
Any types but result will be in v_uint32x4*/ For all integer types. */
template<typename _Tp, int n> inline v_uint32x4 v_popcount(const v_reg<_Tp, n>& a) template<typename _Tp, int n>
{ inline v_reg<typename V_TypeTraits<_Tp>::abs_type, n> v_popcount(const v_reg<_Tp, n>& a)
v_uint8x16 b;
b = v_reinterpret_as_u8(a);
for( int i = 0; i < v_uint8x16::nlanes; i++ )
{
b.s[i] = popCountTable[b.s[i]];
}
v_uint32x4 c;
for( int i = 0; i < v_uint32x4::nlanes; i++ )
{ {
c.s[i] = b.s[i*4] + b.s[i*4+1] + b.s[i*4+2] + b.s[i*4+3]; v_reg<typename V_TypeTraits<_Tp>::abs_type, n> b = v_reg<typename V_TypeTraits<_Tp>::abs_type, n>::zero();
} for( int i = 0; i < n*sizeof(_Tp); i++ )
return c; b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]];
return b;
} }

@ -75,7 +75,7 @@ int normHamming(const uchar* a, int n)
v_uint64 t = vx_setzero_u64(); v_uint64 t = vx_setzero_u64();
for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
t += v_popcount(v_reinterpret_as_u64(vx_load(a + i))); t += v_popcount(v_reinterpret_as_u64(vx_load(a + i)));
result += v_reduce_sum(t); result += (int)v_reduce_sum(t);
} }
#endif // CV_SIMD #endif // CV_SIMD
#if CV_ENABLE_UNROLLED #if CV_ENABLE_UNROLLED
@ -144,7 +144,7 @@ int normHamming(const uchar* a, const uchar* b, int n)
v_uint64 t = vx_setzero_u64(); v_uint64 t = vx_setzero_u64();
for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
t += v_popcount(v_reinterpret_as_u64(vx_load(a + i) ^ vx_load(b + i))); t += v_popcount(v_reinterpret_as_u64(vx_load(a + i) ^ vx_load(b + i)));
result += v_reduce_sum(t); result += (int)v_reduce_sum(t);
} }
#endif // CV_SIMD #endif // CV_SIMD
#if CV_ENABLE_UNROLLED #if CV_ENABLE_UNROLLED

@ -686,18 +686,24 @@ template<typename R> struct TheTest
TheTest & test_popcount() TheTest & test_popcount()
{ {
typedef typename V_RegTraits<R>::u_reg Ru;
static unsigned popcountTable[] = { static unsigned popcountTable[] = {
0, 1, 2, 4, 5, 7, 9, 12, 13, 15, 17, 20, 22, 25, 28, 32, 33, 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, //0x00-0x0f
35, 37, 40, 42, 45, 48, 52, 54, 57, 60, 64, 67, 71, 75, 80, 81, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, //0x10-0x1f
83, 85, 88, 90, 93, 96, 100, 102, 105, 108, 112, 115, 119, 123, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, //0x20-0x2f
128, 130, 133, 136, 140, 143, 147, 151, 156, 159, 163, 167, 172, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, //0x30-0x3f
176, 181, 186, 192, 193 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, //0x40-0x4f
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, //0x50-0x5f
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, //0x60-0x6f
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, //0x70-0x7f
1 //0x80
}; };
Data<R> dataA; Data<R> dataA;
R a = dataA; R a = dataA;
unsigned resB = (unsigned)v_reduce_sum(v_popcount(a)); Data<Ru> resB = v_popcount(a);
EXPECT_EQ(popcountTable[R::nlanes], resB); for (int i = 0; i < Ru::nlanes; ++i)
EXPECT_EQ(popcountTable[i + 1], resB[i]);
return *this; return *this;
} }

Loading…
Cancel
Save