|
|
|
@ -71,33 +71,759 @@ minMaxIdx_( const T* src, const uchar* mask, WT* _minVal, WT* _maxVal, |
|
|
|
|
*_maxVal = maxVal; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#if CV_SIMD128 |
|
|
|
|
template<typename T, typename WT> CV_ALWAYS_INLINE void |
|
|
|
|
minMaxIdx_init( const T* src, const uchar* mask, WT* minval, WT* maxval, |
|
|
|
|
size_t* minidx, size_t* maxidx, WT &minVal, WT &maxVal, |
|
|
|
|
size_t &minIdx, size_t &maxIdx, const WT minInit, const WT maxInit, |
|
|
|
|
const int nlanes, int len, size_t startidx, int &j, int &len0 ) |
|
|
|
|
{ |
|
|
|
|
len0 = len & -nlanes; |
|
|
|
|
j = 0; |
|
|
|
|
|
|
|
|
|
minVal = *minval, maxVal = *maxval; |
|
|
|
|
minIdx = *minidx, maxIdx = *maxidx; |
|
|
|
|
|
|
|
|
|
// To handle start values out of range
|
|
|
|
|
if ( minVal < minInit || maxVal < minInit || minVal > maxInit || maxVal > maxInit ) |
|
|
|
|
{ |
|
|
|
|
uchar done = 0x00; |
|
|
|
|
|
|
|
|
|
for ( ; (j < len) && (done != 0x03); j++ ) |
|
|
|
|
{ |
|
|
|
|
if ( !mask || mask[j] ) { |
|
|
|
|
T val = src[j]; |
|
|
|
|
if ( val < minVal ) |
|
|
|
|
{ |
|
|
|
|
minVal = val; |
|
|
|
|
minIdx = startidx + j; |
|
|
|
|
done |= 0x01; |
|
|
|
|
} |
|
|
|
|
if ( val > maxVal ) |
|
|
|
|
{ |
|
|
|
|
maxVal = val; |
|
|
|
|
maxIdx = startidx + j; |
|
|
|
|
done |= 0x02; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
len0 = j + ((len - j) & -nlanes); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#if CV_SIMD128_64F |
|
|
|
|
CV_ALWAYS_INLINE double v_reduce_min(const v_float64x2& a) |
|
|
|
|
{ |
|
|
|
|
double CV_DECL_ALIGNED(32) idx[2]; |
|
|
|
|
v_store_aligned(idx, a); |
|
|
|
|
return std::min(idx[0], idx[1]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
CV_ALWAYS_INLINE double v_reduce_max(const v_float64x2& a) |
|
|
|
|
{ |
|
|
|
|
double CV_DECL_ALIGNED(32) idx[2]; |
|
|
|
|
v_store_aligned(idx, a); |
|
|
|
|
return std::max(idx[0], idx[1]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
CV_ALWAYS_INLINE uint64_t v_reduce_min(const v_uint64x2& a) |
|
|
|
|
{ |
|
|
|
|
uint64_t CV_DECL_ALIGNED(32) idx[2]; |
|
|
|
|
v_store_aligned(idx, a); |
|
|
|
|
return std::min(idx[0], idx[1]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
CV_ALWAYS_INLINE v_uint64x2 v_select(const v_uint64x2& mask, const v_uint64x2& a, const v_uint64x2& b) |
|
|
|
|
{ |
|
|
|
|
return b ^ ((a ^ b) & mask); |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#define MINMAXIDX_REDUCE(suffix, suffix2, maxLimit, IR) \ |
|
|
|
|
template<typename T, typename VT, typename IT> CV_ALWAYS_INLINE void \
|
|
|
|
|
minMaxIdx_reduce_##suffix( VT &valMin, VT &valMax, IT &idxMin, IT &idxMax, IT &none, \
|
|
|
|
|
T &minVal, T &maxVal, size_t &minIdx, size_t &maxIdx, \
|
|
|
|
|
size_t delta ) \
|
|
|
|
|
{ \
|
|
|
|
|
if ( v_check_any(idxMin != none) ) \
|
|
|
|
|
{ \
|
|
|
|
|
minVal = v_reduce_min(valMin); \
|
|
|
|
|
minIdx = (size_t)v_reduce_min(v_select(v_reinterpret_as_##suffix2(v_setall_##suffix((IR)minVal) == valMin), \
|
|
|
|
|
idxMin, v_setall_##suffix2(maxLimit))) + delta; \
|
|
|
|
|
} \
|
|
|
|
|
if ( v_check_any(idxMax != none) ) \
|
|
|
|
|
{ \
|
|
|
|
|
maxVal = v_reduce_max(valMax); \
|
|
|
|
|
maxIdx = (size_t)v_reduce_min(v_select(v_reinterpret_as_##suffix2(v_setall_##suffix((IR)maxVal) == valMax), \
|
|
|
|
|
idxMax, v_setall_##suffix2(maxLimit))) + delta; \
|
|
|
|
|
} \
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
MINMAXIDX_REDUCE(u8, u8, UCHAR_MAX, uchar) |
|
|
|
|
MINMAXIDX_REDUCE(s8, u8, UCHAR_MAX, uchar) |
|
|
|
|
MINMAXIDX_REDUCE(u16, u16, USHRT_MAX, ushort) |
|
|
|
|
MINMAXIDX_REDUCE(s16, u16, USHRT_MAX, ushort) |
|
|
|
|
MINMAXIDX_REDUCE(s32, u32, UINT_MAX, uint) |
|
|
|
|
MINMAXIDX_REDUCE(f32, u32, (1 << 23) - 1, float) |
|
|
|
|
#if CV_SIMD128_64F |
|
|
|
|
MINMAXIDX_REDUCE(f64, u64, UINT_MAX, double) |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
template<typename T, typename WT> CV_ALWAYS_INLINE void |
|
|
|
|
minMaxIdx_finish( const T* src, const uchar* mask, WT* minval, WT* maxval, |
|
|
|
|
size_t* minidx, size_t* maxidx, WT minVal, WT maxVal, |
|
|
|
|
size_t minIdx, size_t maxIdx, int len, size_t startidx, |
|
|
|
|
int j ) |
|
|
|
|
{ |
|
|
|
|
for ( ; j < len ; j++ ) |
|
|
|
|
{ |
|
|
|
|
if ( !mask || mask[j] ) |
|
|
|
|
{ |
|
|
|
|
T val = src[j]; |
|
|
|
|
if ( val < minVal ) |
|
|
|
|
{ |
|
|
|
|
minVal = val; |
|
|
|
|
minIdx = startidx + j; |
|
|
|
|
} |
|
|
|
|
if ( val > maxVal ) |
|
|
|
|
{ |
|
|
|
|
maxVal = val; |
|
|
|
|
maxIdx = startidx + j; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
*minidx = minIdx; |
|
|
|
|
*maxidx = maxIdx; |
|
|
|
|
*minval = minVal; |
|
|
|
|
*maxval = maxVal; |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
static void minMaxIdx_8u(const uchar* src, const uchar* mask, int* minval, int* maxval, |
|
|
|
|
size_t* minidx, size_t* maxidx, int len, size_t startidx ) |
|
|
|
|
{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } |
|
|
|
|
{ |
|
|
|
|
#if CV_SIMD128 |
|
|
|
|
if ( len >= v_uint8x16::nlanes ) |
|
|
|
|
{ |
|
|
|
|
int j, len0; |
|
|
|
|
int minVal, maxVal; |
|
|
|
|
size_t minIdx, maxIdx; |
|
|
|
|
|
|
|
|
|
minMaxIdx_init( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, minIdx, maxIdx, |
|
|
|
|
(int)0, (int)UCHAR_MAX, v_uint8x16::nlanes, len, startidx, j, len0 ); |
|
|
|
|
|
|
|
|
|
if ( j <= len0 - v_uint8x16::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_uint8x16 inc = v_setall_u8(v_uint8x16::nlanes); |
|
|
|
|
v_uint8x16 none = v_reinterpret_as_u8(v_setall_s8(-1)); |
|
|
|
|
v_uint8x16 idxStart(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
|
|
|
|
|
|
|
|
|
do |
|
|
|
|
{ |
|
|
|
|
v_uint8x16 valMin = v_setall_u8((uchar)minVal), valMax = v_setall_u8((uchar)maxVal); |
|
|
|
|
v_uint8x16 idx = idxStart, idxMin = none, idxMax = none; |
|
|
|
|
|
|
|
|
|
int k = j; |
|
|
|
|
size_t delta = startidx + j; |
|
|
|
|
|
|
|
|
|
if ( !mask ) |
|
|
|
|
{ |
|
|
|
|
for( ; k < std::min(len0, j + 15 * v_uint8x16::nlanes); k += v_uint8x16::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_uint8x16 data = v_load(src + k); |
|
|
|
|
v_uint8x16 cmpMin = (data < valMin); |
|
|
|
|
v_uint8x16 cmpMax = (data > valMax); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_min(data, valMin); |
|
|
|
|
valMax = v_max(data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
for( ; k < std::min(len0, j + 15 * v_uint8x16::nlanes); k += v_uint8x16::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_uint8x16 data = v_load(src + k); |
|
|
|
|
v_uint8x16 maskVal = v_load(mask + k) != v_setzero_u8(); |
|
|
|
|
v_uint8x16 cmpMin = (data < valMin) & maskVal; |
|
|
|
|
v_uint8x16 cmpMax = (data > valMax) & maskVal; |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_select(cmpMin, data, valMin); |
|
|
|
|
valMax = v_select(cmpMax, data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
j = k; |
|
|
|
|
|
|
|
|
|
minMaxIdx_reduce_u8( valMin, valMax, idxMin, idxMax, none, minVal, maxVal, |
|
|
|
|
minIdx, maxIdx, delta ); |
|
|
|
|
} |
|
|
|
|
while ( j < len0 ); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
minMaxIdx_finish( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, |
|
|
|
|
minIdx, maxIdx, len, startidx, j ); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx); |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx); |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void minMaxIdx_8s(const schar* src, const uchar* mask, int* minval, int* maxval, |
|
|
|
|
size_t* minidx, size_t* maxidx, int len, size_t startidx ) |
|
|
|
|
{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } |
|
|
|
|
{ |
|
|
|
|
#if CV_SIMD128 |
|
|
|
|
if ( len >= v_int8x16::nlanes ) |
|
|
|
|
{ |
|
|
|
|
int j, len0; |
|
|
|
|
int minVal, maxVal; |
|
|
|
|
size_t minIdx, maxIdx; |
|
|
|
|
|
|
|
|
|
minMaxIdx_init( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, minIdx, maxIdx, |
|
|
|
|
(int)SCHAR_MIN, (int)SCHAR_MAX, v_int8x16::nlanes, len, startidx, j, len0 ); |
|
|
|
|
|
|
|
|
|
if ( j <= len0 - v_int8x16::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_uint8x16 inc = v_setall_u8(v_int8x16::nlanes); |
|
|
|
|
v_uint8x16 none = v_reinterpret_as_u8(v_setall_s8(-1)); |
|
|
|
|
v_uint8x16 idxStart(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
|
|
|
|
|
|
|
|
|
do |
|
|
|
|
{ |
|
|
|
|
v_int8x16 valMin = v_setall_s8((schar)minVal), valMax = v_setall_s8((schar)maxVal); |
|
|
|
|
v_uint8x16 idx = idxStart, idxMin = none, idxMax = none; |
|
|
|
|
|
|
|
|
|
int k = j; |
|
|
|
|
size_t delta = startidx + j; |
|
|
|
|
|
|
|
|
|
if ( !mask ) |
|
|
|
|
{ |
|
|
|
|
for( ; k < std::min(len0, j + 15 * v_int8x16::nlanes); k += v_int8x16::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_int8x16 data = v_load(src + k); |
|
|
|
|
v_uint8x16 cmpMin = v_reinterpret_as_u8(data < valMin); |
|
|
|
|
v_uint8x16 cmpMax = v_reinterpret_as_u8(data > valMax); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_min(data, valMin); |
|
|
|
|
valMax = v_max(data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
for( ; k < std::min(len0, j + 15 * v_int8x16::nlanes); k += v_int8x16::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_int8x16 data = v_load(src + k); |
|
|
|
|
v_uint8x16 maskVal = v_load(mask + k) != v_setzero_u8(); |
|
|
|
|
v_uint8x16 cmpMin = v_reinterpret_as_u8(data < valMin) & maskVal; |
|
|
|
|
v_uint8x16 cmpMax = v_reinterpret_as_u8(data > valMax) & maskVal; |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_select(v_reinterpret_as_s8(cmpMin), data, valMin); |
|
|
|
|
valMax = v_select(v_reinterpret_as_s8(cmpMax), data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
j = k; |
|
|
|
|
|
|
|
|
|
minMaxIdx_reduce_s8( valMin, valMax, idxMin, idxMax, none, minVal, maxVal, |
|
|
|
|
minIdx, maxIdx, delta ); |
|
|
|
|
} |
|
|
|
|
while ( j < len0 ); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
minMaxIdx_finish( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, |
|
|
|
|
minIdx, maxIdx, len, startidx, j ); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx); |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void minMaxIdx_16u(const ushort* src, const uchar* mask, int* minval, int* maxval, |
|
|
|
|
size_t* minidx, size_t* maxidx, int len, size_t startidx ) |
|
|
|
|
{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } |
|
|
|
|
{ |
|
|
|
|
#if CV_SIMD128 |
|
|
|
|
if ( len >= v_uint16x8::nlanes ) |
|
|
|
|
{ |
|
|
|
|
int j, len0; |
|
|
|
|
int minVal, maxVal; |
|
|
|
|
size_t minIdx, maxIdx; |
|
|
|
|
|
|
|
|
|
minMaxIdx_init( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, minIdx, maxIdx, |
|
|
|
|
(int)0, (int)USHRT_MAX, v_uint16x8::nlanes, len, startidx, j, len0 ); |
|
|
|
|
|
|
|
|
|
if ( j <= len0 - v_uint16x8::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_uint16x8 inc = v_setall_u16(v_uint16x8::nlanes); |
|
|
|
|
v_uint16x8 none = v_reinterpret_as_u16(v_setall_s16(-1)); |
|
|
|
|
v_uint16x8 idxStart(0, 1, 2, 3, 4, 5, 6, 7); |
|
|
|
|
|
|
|
|
|
do |
|
|
|
|
{ |
|
|
|
|
v_uint16x8 valMin = v_setall_u16((ushort)minVal), valMax = v_setall_u16((ushort)maxVal); |
|
|
|
|
v_uint16x8 idx = idxStart, idxMin = none, idxMax = none; |
|
|
|
|
|
|
|
|
|
int k = j; |
|
|
|
|
size_t delta = startidx + j; |
|
|
|
|
|
|
|
|
|
if ( !mask ) |
|
|
|
|
{ |
|
|
|
|
for( ; k < std::min(len0, j + 8191 * v_uint16x8::nlanes); k += v_uint16x8::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_uint16x8 data = v_load(src + k); |
|
|
|
|
v_uint16x8 cmpMin = (data < valMin); |
|
|
|
|
v_uint16x8 cmpMax = (data > valMax); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_min(data, valMin); |
|
|
|
|
valMax = v_max(data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
for( ; k < std::min(len0, j + 8191 * v_uint16x8::nlanes); k += v_uint16x8::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_uint16x8 data = v_load(src + k); |
|
|
|
|
v_uint16x8 maskVal = v_load_expand(mask + k) != v_setzero_u16(); |
|
|
|
|
v_uint16x8 cmpMin = (data < valMin) & maskVal; |
|
|
|
|
v_uint16x8 cmpMax = (data > valMax) & maskVal; |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_select(cmpMin, data, valMin); |
|
|
|
|
valMax = v_select(cmpMax, data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
j = k; |
|
|
|
|
|
|
|
|
|
minMaxIdx_reduce_u16( valMin, valMax, idxMin, idxMax, none, minVal, maxVal, |
|
|
|
|
minIdx, maxIdx, delta ); |
|
|
|
|
} |
|
|
|
|
while ( j < len0 ); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
minMaxIdx_finish( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, |
|
|
|
|
minIdx, maxIdx, len, startidx, j ); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx); |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void minMaxIdx_16s(const short* src, const uchar* mask, int* minval, int* maxval, |
|
|
|
|
size_t* minidx, size_t* maxidx, int len, size_t startidx ) |
|
|
|
|
{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } |
|
|
|
|
{ |
|
|
|
|
#if CV_SIMD128 |
|
|
|
|
if ( len >= v_int16x8::nlanes ) |
|
|
|
|
{ |
|
|
|
|
int j, len0; |
|
|
|
|
int minVal, maxVal; |
|
|
|
|
size_t minIdx, maxIdx; |
|
|
|
|
|
|
|
|
|
minMaxIdx_init( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, minIdx, maxIdx, |
|
|
|
|
(int)SHRT_MIN, (int)SHRT_MAX, v_int16x8::nlanes, len, startidx, j, len0 ); |
|
|
|
|
|
|
|
|
|
if ( j <= len0 - v_int16x8::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_uint16x8 inc = v_setall_u16(v_int16x8::nlanes); |
|
|
|
|
v_uint16x8 none = v_reinterpret_as_u16(v_setall_s16(-1)); |
|
|
|
|
v_uint16x8 idxStart(0, 1, 2, 3, 4, 5, 6, 7); |
|
|
|
|
|
|
|
|
|
do |
|
|
|
|
{ |
|
|
|
|
v_int16x8 valMin = v_setall_s16((short)minVal), valMax = v_setall_s16((short)maxVal); |
|
|
|
|
v_uint16x8 idx = idxStart, idxMin = none, idxMax = none; |
|
|
|
|
|
|
|
|
|
int k = j; |
|
|
|
|
size_t delta = startidx + j; |
|
|
|
|
|
|
|
|
|
if ( !mask ) |
|
|
|
|
{ |
|
|
|
|
for( ; k < std::min(len0, j + 8191 * v_int16x8::nlanes); k += v_int16x8::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_int16x8 data = v_load(src + k); |
|
|
|
|
v_uint16x8 cmpMin = v_reinterpret_as_u16(data < valMin); |
|
|
|
|
v_uint16x8 cmpMax = v_reinterpret_as_u16(data > valMax); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_min(data, valMin); |
|
|
|
|
valMax = v_max(data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
for( ; k < std::min(len0, j + 8191 * v_int16x8::nlanes); k += v_int16x8::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_int16x8 data = v_load(src + k); |
|
|
|
|
v_uint16x8 maskVal = v_load_expand(mask + k) != v_setzero_u16(); |
|
|
|
|
v_uint16x8 cmpMin = v_reinterpret_as_u16(data < valMin) & maskVal; |
|
|
|
|
v_uint16x8 cmpMax = v_reinterpret_as_u16(data > valMax) & maskVal; |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_select(v_reinterpret_as_s16(cmpMin), data, valMin); |
|
|
|
|
valMax = v_select(v_reinterpret_as_s16(cmpMax), data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
j = k; |
|
|
|
|
|
|
|
|
|
minMaxIdx_reduce_s16( valMin, valMax, idxMin, idxMax, none, minVal, maxVal, |
|
|
|
|
minIdx, maxIdx, delta ); |
|
|
|
|
} |
|
|
|
|
while ( j < len0 ); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
minMaxIdx_finish( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, |
|
|
|
|
minIdx, maxIdx, len, startidx, j ); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx); |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void minMaxIdx_32s(const int* src, const uchar* mask, int* minval, int* maxval, |
|
|
|
|
size_t* minidx, size_t* maxidx, int len, size_t startidx ) |
|
|
|
|
{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } |
|
|
|
|
{ |
|
|
|
|
#if CV_SIMD128 |
|
|
|
|
if ( len >= 2 * v_int32x4::nlanes ) |
|
|
|
|
{ |
|
|
|
|
int j = 0, len0 = len & -(2 * v_int32x4::nlanes); |
|
|
|
|
int minVal = *minval, maxVal = *maxval; |
|
|
|
|
size_t minIdx = *minidx, maxIdx = *maxidx; |
|
|
|
|
|
|
|
|
|
{ |
|
|
|
|
v_uint32x4 inc = v_setall_u32(v_int32x4::nlanes); |
|
|
|
|
v_uint32x4 none = v_reinterpret_as_u32(v_setall_s32(-1)); |
|
|
|
|
v_uint32x4 idxStart(0, 1, 2, 3); |
|
|
|
|
|
|
|
|
|
do |
|
|
|
|
{ |
|
|
|
|
v_int32x4 valMin = v_setall_s32(minVal), valMax = v_setall_s32(maxVal); |
|
|
|
|
v_uint32x4 idx = idxStart, idxMin = none, idxMax = none; |
|
|
|
|
|
|
|
|
|
int k = j; |
|
|
|
|
size_t delta = startidx + j; |
|
|
|
|
|
|
|
|
|
if ( !mask ) |
|
|
|
|
{ |
|
|
|
|
for( ; k < std::min(len0, j + 32766 * 2 * v_int32x4::nlanes); k += 2 * v_int32x4::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_int32x4 data = v_load(src + k); |
|
|
|
|
v_uint32x4 cmpMin = v_reinterpret_as_u32(data < valMin); |
|
|
|
|
v_uint32x4 cmpMax = v_reinterpret_as_u32(data > valMax); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_min(data, valMin); |
|
|
|
|
valMax = v_max(data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
data = v_load(src + k + v_int32x4::nlanes); |
|
|
|
|
cmpMin = v_reinterpret_as_u32(data < valMin); |
|
|
|
|
cmpMax = v_reinterpret_as_u32(data > valMax); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_min(data, valMin); |
|
|
|
|
valMax = v_max(data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
for( ; k < std::min(len0, j + 32766 * 2 * v_int32x4::nlanes); k += 2 * v_int32x4::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_int32x4 data = v_load(src + k); |
|
|
|
|
v_uint16x8 maskVal = v_load_expand(mask + k) != v_setzero_u16(); |
|
|
|
|
v_int32x4 maskVal1, maskVal2; |
|
|
|
|
v_expand(v_reinterpret_as_s16(maskVal), maskVal1, maskVal2); |
|
|
|
|
v_uint32x4 cmpMin = v_reinterpret_as_u32((data < valMin) & maskVal1); |
|
|
|
|
v_uint32x4 cmpMax = v_reinterpret_as_u32((data > valMax) & maskVal1); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_select(v_reinterpret_as_s32(cmpMin), data, valMin); |
|
|
|
|
valMax = v_select(v_reinterpret_as_s32(cmpMax), data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
data = v_load(src + k + v_int32x4::nlanes); |
|
|
|
|
cmpMin = v_reinterpret_as_u32((data < valMin) & maskVal2); |
|
|
|
|
cmpMax = v_reinterpret_as_u32((data > valMax) & maskVal2); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_select(v_reinterpret_as_s32(cmpMin), data, valMin); |
|
|
|
|
valMax = v_select(v_reinterpret_as_s32(cmpMax), data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
j = k; |
|
|
|
|
|
|
|
|
|
minMaxIdx_reduce_s32( valMin, valMax, idxMin, idxMax, none, minVal, maxVal, |
|
|
|
|
minIdx, maxIdx, delta ); |
|
|
|
|
} |
|
|
|
|
while ( j < len0 ); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
minMaxIdx_finish( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, |
|
|
|
|
minIdx, maxIdx, len, startidx, j ); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx); |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void minMaxIdx_32f(const float* src, const uchar* mask, float* minval, float* maxval, |
|
|
|
|
size_t* minidx, size_t* maxidx, int len, size_t startidx ) |
|
|
|
|
{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } |
|
|
|
|
{ |
|
|
|
|
#if CV_SIMD128 |
|
|
|
|
if ( len >= 2 * v_float32x4::nlanes ) |
|
|
|
|
{ |
|
|
|
|
int j, len0; |
|
|
|
|
float minVal, maxVal; |
|
|
|
|
size_t minIdx, maxIdx; |
|
|
|
|
|
|
|
|
|
minMaxIdx_init( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, minIdx, maxIdx, |
|
|
|
|
FLT_MIN, FLT_MAX, 2 * v_float32x4::nlanes, len, startidx, j, len0 ); |
|
|
|
|
|
|
|
|
|
if ( j <= len0 - 2 * v_float32x4::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_uint32x4 inc = v_setall_u32(v_float32x4::nlanes); |
|
|
|
|
v_uint32x4 none = v_reinterpret_as_u32(v_setall_s32(-1)); |
|
|
|
|
v_uint32x4 idxStart(0, 1, 2, 3); |
|
|
|
|
|
|
|
|
|
do |
|
|
|
|
{ |
|
|
|
|
v_float32x4 valMin = v_setall_f32(minVal), valMax = v_setall_f32(maxVal); |
|
|
|
|
v_uint32x4 idx = idxStart, idxMin = none, idxMax = none; |
|
|
|
|
|
|
|
|
|
int k = j; |
|
|
|
|
size_t delta = startidx + j; |
|
|
|
|
|
|
|
|
|
if ( !mask ) |
|
|
|
|
{ |
|
|
|
|
for( ; k < std::min(len0, j + 32766 * 2 * v_float32x4::nlanes); k += 2 * v_float32x4::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_float32x4 data = v_load(src + k); |
|
|
|
|
v_uint32x4 cmpMin = v_reinterpret_as_u32(data < valMin); |
|
|
|
|
v_uint32x4 cmpMax = v_reinterpret_as_u32(data > valMax); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_min(data, valMin); |
|
|
|
|
valMax = v_max(data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
data = v_load(src + k + v_float32x4::nlanes); |
|
|
|
|
cmpMin = v_reinterpret_as_u32(data < valMin); |
|
|
|
|
cmpMax = v_reinterpret_as_u32(data > valMax); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_min(data, valMin); |
|
|
|
|
valMax = v_max(data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
for( ; k < std::min(len0, j + 32766 * 2 * v_float32x4::nlanes); k += 2 * v_float32x4::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_float32x4 data = v_load(src + k); |
|
|
|
|
v_uint16x8 maskVal = v_load_expand(mask + k) != v_setzero_u16(); |
|
|
|
|
v_int32x4 maskVal1, maskVal2; |
|
|
|
|
v_expand(v_reinterpret_as_s16(maskVal), maskVal1, maskVal2); |
|
|
|
|
v_uint32x4 cmpMin = v_reinterpret_as_u32(v_reinterpret_as_s32(data < valMin) & maskVal1); |
|
|
|
|
v_uint32x4 cmpMax = v_reinterpret_as_u32(v_reinterpret_as_s32(data > valMax) & maskVal1); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_select(v_reinterpret_as_f32(cmpMin), data, valMin); |
|
|
|
|
valMax = v_select(v_reinterpret_as_f32(cmpMax), data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
data = v_load(src + k + v_float32x4::nlanes); |
|
|
|
|
cmpMin = v_reinterpret_as_u32(v_reinterpret_as_s32(data < valMin) & maskVal2); |
|
|
|
|
cmpMax = v_reinterpret_as_u32(v_reinterpret_as_s32(data > valMax) & maskVal2); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_select(v_reinterpret_as_f32(cmpMin), data, valMin); |
|
|
|
|
valMax = v_select(v_reinterpret_as_f32(cmpMax), data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
j = k; |
|
|
|
|
|
|
|
|
|
minMaxIdx_reduce_f32( valMin, valMax, idxMin, idxMax, none, minVal, maxVal, |
|
|
|
|
minIdx, maxIdx, delta ); |
|
|
|
|
} |
|
|
|
|
while ( j < len0 ); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
minMaxIdx_finish( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, |
|
|
|
|
minIdx, maxIdx, len, startidx, j ); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx); |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void minMaxIdx_64f(const double* src, const uchar* mask, double* minval, double* maxval, |
|
|
|
|
size_t* minidx, size_t* maxidx, int len, size_t startidx ) |
|
|
|
|
{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } |
|
|
|
|
{ |
|
|
|
|
#if CV_SIMD128_64F |
|
|
|
|
if ( len >= 4 * v_float64x2::nlanes ) |
|
|
|
|
{ |
|
|
|
|
int j, len0; |
|
|
|
|
double minVal, maxVal; |
|
|
|
|
size_t minIdx, maxIdx; |
|
|
|
|
|
|
|
|
|
minMaxIdx_init( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, minIdx, maxIdx, |
|
|
|
|
DBL_MIN, DBL_MAX, 4 * v_float64x2::nlanes, len, startidx, j, len0 ); |
|
|
|
|
|
|
|
|
|
if ( j <= len0 - 4 * v_float64x2::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_uint64x2 inc = v_setall_u64(v_float64x2::nlanes); |
|
|
|
|
v_uint64x2 none = v_reinterpret_as_u64(v_setall_s64(-1)); |
|
|
|
|
v_uint64x2 idxStart(0, 1); |
|
|
|
|
|
|
|
|
|
do |
|
|
|
|
{ |
|
|
|
|
v_float64x2 valMin = v_setall_f64(minVal), valMax = v_setall_f64(maxVal); |
|
|
|
|
v_uint64x2 idx = idxStart, idxMin = none, idxMax = none; |
|
|
|
|
|
|
|
|
|
int k = j; |
|
|
|
|
size_t delta = startidx + j; |
|
|
|
|
|
|
|
|
|
if ( !mask ) |
|
|
|
|
{ |
|
|
|
|
for( ; k < std::min(len0, j + 32764 * 4 * v_float64x2::nlanes); k += 4 * v_float64x2::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_float64x2 data = v_load(src + k); |
|
|
|
|
v_uint64x2 cmpMin = v_reinterpret_as_u64(data < valMin); |
|
|
|
|
v_uint64x2 cmpMax = v_reinterpret_as_u64(data > valMax); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_min(data, valMin); |
|
|
|
|
valMax = v_max(data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
data = v_load(src + k + v_float64x2::nlanes); |
|
|
|
|
cmpMin = v_reinterpret_as_u64(data < valMin); |
|
|
|
|
cmpMax = v_reinterpret_as_u64(data > valMax); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_min(data, valMin); |
|
|
|
|
valMax = v_max(data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
data = v_load(src + k + 2 * v_float64x2::nlanes); |
|
|
|
|
cmpMin = v_reinterpret_as_u64(data < valMin); |
|
|
|
|
cmpMax = v_reinterpret_as_u64(data > valMax); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_min(data, valMin); |
|
|
|
|
valMax = v_max(data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
data = v_load(src + k + 3 * v_float64x2::nlanes); |
|
|
|
|
cmpMin = v_reinterpret_as_u64(data < valMin); |
|
|
|
|
cmpMax = v_reinterpret_as_u64(data > valMax); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_min(data, valMin); |
|
|
|
|
valMax = v_max(data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
for( ; k < std::min(len0, j + 32764 * 4 * v_float64x2::nlanes); k += 4 * v_float64x2::nlanes ) |
|
|
|
|
{ |
|
|
|
|
v_float64x2 data = v_load(src + k); |
|
|
|
|
v_uint16x8 maskVal = v_load_expand(mask + k) != v_setzero_u16(); |
|
|
|
|
v_int32x4 maskVal1, maskVal2; |
|
|
|
|
v_expand(v_reinterpret_as_s16(maskVal), maskVal1, maskVal2); |
|
|
|
|
v_int64x2 maskVal3, maskVal4; |
|
|
|
|
v_expand(maskVal1, maskVal3, maskVal4); |
|
|
|
|
v_uint64x2 cmpMin = v_reinterpret_as_u64(v_reinterpret_as_s64(data < valMin) & maskVal3); |
|
|
|
|
v_uint64x2 cmpMax = v_reinterpret_as_u64(v_reinterpret_as_s64(data > valMax) & maskVal3); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_select(v_reinterpret_as_f64(cmpMin), data, valMin); |
|
|
|
|
valMax = v_select(v_reinterpret_as_f64(cmpMax), data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
data = v_load(src + k + v_float64x2::nlanes); |
|
|
|
|
cmpMin = v_reinterpret_as_u64(v_reinterpret_as_s64(data < valMin) & maskVal4); |
|
|
|
|
cmpMax = v_reinterpret_as_u64(v_reinterpret_as_s64(data > valMax) & maskVal4); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_select(v_reinterpret_as_f64(cmpMin), data, valMin); |
|
|
|
|
valMax = v_select(v_reinterpret_as_f64(cmpMax), data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
data = v_load(src + k + 2 * v_float64x2::nlanes); |
|
|
|
|
v_expand(maskVal2, maskVal3, maskVal4); |
|
|
|
|
cmpMin = v_reinterpret_as_u64(v_reinterpret_as_s64(data < valMin) & maskVal3); |
|
|
|
|
cmpMax = v_reinterpret_as_u64(v_reinterpret_as_s64(data > valMax) & maskVal3); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_select(v_reinterpret_as_f64(cmpMin), data, valMin); |
|
|
|
|
valMax = v_select(v_reinterpret_as_f64(cmpMax), data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
data = v_load(src + k + 3 * v_float64x2::nlanes); |
|
|
|
|
cmpMin = v_reinterpret_as_u64(v_reinterpret_as_s64(data < valMin) & maskVal4); |
|
|
|
|
cmpMax = v_reinterpret_as_u64(v_reinterpret_as_s64(data > valMax) & maskVal4); |
|
|
|
|
idxMin = v_select(cmpMin, idx, idxMin); |
|
|
|
|
idxMax = v_select(cmpMax, idx, idxMax); |
|
|
|
|
valMin = v_select(v_reinterpret_as_f64(cmpMin), data, valMin); |
|
|
|
|
valMax = v_select(v_reinterpret_as_f64(cmpMax), data, valMax); |
|
|
|
|
idx += inc; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
j = k; |
|
|
|
|
|
|
|
|
|
minMaxIdx_reduce_f64( valMin, valMax, idxMin, idxMax, none, minVal, maxVal, |
|
|
|
|
minIdx, maxIdx, delta ); |
|
|
|
|
} |
|
|
|
|
while ( j < len0 ); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
minMaxIdx_finish( src, mask, minval, maxval, minidx, maxidx, minVal, maxVal, |
|
|
|
|
minIdx, maxIdx, len, startidx, j ); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx); |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
typedef void (*MinMaxIdxFunc)(const uchar*, const uchar*, int*, int*, size_t*, size_t*, int, size_t); |
|
|
|
|
|
|
|
|
|