Open Source Computer Vision Library
https://opencv.org/
134 lines
3.3 KiB
134 lines
3.3 KiB
// This file is part of OpenCV project. |
|
// It is subject to the license terms in the LICENSE file found in the top-level directory |
|
// of this distribution and at http://opencv.org/license.html. |
|
|
|
#include "opencv2/core/hal/intrin.hpp" |
|
|
|
namespace cv { namespace hal { |
|
|
|
extern const uchar popCountTable[256]; |
|
|
|
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN |
|
|
|
// forward declarations |
|
int normHamming(const uchar* a, int n); |
|
int normHamming(const uchar* a, const uchar* b, int n); |
|
|
|
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY |
|
|
|
#if CV_AVX2 |
|
static inline int _mm256_extract_epi32_(__m256i reg, const int i) |
|
{ |
|
CV_DECL_ALIGNED(32) int reg_data[8]; |
|
CV_DbgAssert(0 <= i && i < 8); |
|
_mm256_store_si256((__m256i*)reg_data, reg); |
|
return reg_data[i]; |
|
} |
|
#endif |
|
|
|
int normHamming(const uchar* a, int n) |
|
{ |
|
CV_AVX_GUARD; |
|
|
|
int i = 0; |
|
int result = 0; |
|
|
|
#if CV_SIMD && CV_SIMD_WIDTH > 16 |
|
{ |
|
v_uint64 t = vx_setzero_u64(); |
|
for (; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) |
|
t += v_popcount(v_reinterpret_as_u64(vx_load(a + i))); |
|
result = (int)v_reduce_sum(t); |
|
} |
|
#endif |
|
|
|
#if CV_POPCNT |
|
{ |
|
# if defined CV_POPCNT_U64 |
|
for(; i <= n - 8; i += 8) |
|
{ |
|
result += (int)CV_POPCNT_U64(*(uint64*)(a + i)); |
|
} |
|
# endif |
|
for(; i <= n - 4; i += 4) |
|
{ |
|
result += CV_POPCNT_U32(*(uint*)(a + i)); |
|
} |
|
} |
|
#elif CV_SIMD |
|
{ |
|
v_uint64x2 t = v_setzero_u64(); |
|
for(; i <= n - v_uint8x16::nlanes; i += v_uint8x16::nlanes) |
|
t += v_popcount(v_reinterpret_as_u64(v_load(a + i))); |
|
result += (int)v_reduce_sum(t); |
|
} |
|
#endif |
|
#if CV_ENABLE_UNROLLED |
|
for(; i <= n - 4; i += 4) |
|
{ |
|
result += popCountTable[a[i]] + popCountTable[a[i+1]] + |
|
popCountTable[a[i+2]] + popCountTable[a[i+3]]; |
|
} |
|
#endif |
|
for(; i < n; i++) |
|
{ |
|
result += popCountTable[a[i]]; |
|
} |
|
return result; |
|
} |
|
|
|
int normHamming(const uchar* a, const uchar* b, int n) |
|
{ |
|
CV_AVX_GUARD; |
|
|
|
int i = 0; |
|
int result = 0; |
|
|
|
#if CV_SIMD && CV_SIMD_WIDTH > 16 |
|
{ |
|
v_uint64 t = vx_setzero_u64(); |
|
for (; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) |
|
t += v_popcount(v_reinterpret_as_u64(vx_load(a + i) ^ vx_load(b + i))); |
|
result += (int)v_reduce_sum(t); |
|
} |
|
#endif |
|
|
|
#if CV_POPCNT |
|
{ |
|
# if defined CV_POPCNT_U64 |
|
for(; i <= n - 8; i += 8) |
|
{ |
|
result += (int)CV_POPCNT_U64(*(uint64*)(a + i) ^ *(uint64*)(b + i)); |
|
} |
|
# endif |
|
for(; i <= n - 4; i += 4) |
|
{ |
|
result += CV_POPCNT_U32(*(uint*)(a + i) ^ *(uint*)(b + i)); |
|
} |
|
} |
|
#elif CV_SIMD |
|
{ |
|
v_uint64x2 t = v_setzero_u64(); |
|
for(; i <= n - v_uint8x16::nlanes; i += v_uint8x16::nlanes) |
|
t += v_popcount(v_reinterpret_as_u64(vx_load(a + i) ^ vx_load(b + i))); |
|
result += (int)v_reduce_sum(t); |
|
} |
|
#endif |
|
#if CV_ENABLE_UNROLLED |
|
for(; i <= n - 4; i += 4) |
|
{ |
|
result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] + |
|
popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]]; |
|
} |
|
#endif |
|
for(; i < n; i++) |
|
{ |
|
result += popCountTable[a[i] ^ b[i]]; |
|
} |
|
return result; |
|
} |
|
|
|
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY |
|
|
|
CV_CPU_OPTIMIZATION_NAMESPACE_END |
|
}} //cv::hal
|
|
|