@ -71,33 +71,759 @@ minMaxIdx_( const T* src, const uchar* mask, WT* _minVal, WT* _maxVal,
* _maxVal = maxVal ;
}
# if CV_SIMD128
template < typename T , typename WT > CV_ALWAYS_INLINE void
minMaxIdx_init ( const T * src , const uchar * mask , WT * minval , WT * maxval ,
size_t * minidx , size_t * maxidx , WT & minVal , WT & maxVal ,
size_t & minIdx , size_t & maxIdx , const WT minInit , const WT maxInit ,
const int nlanes , int len , size_t startidx , int & j , int & len0 )
{
len0 = len & - nlanes ;
j = 0 ;
minVal = * minval , maxVal = * maxval ;
minIdx = * minidx , maxIdx = * maxidx ;
// To handle start values out of range
if ( minVal < minInit | | maxVal < minInit | | minVal > maxInit | | maxVal > maxInit )
{
uchar done = 0x00 ;
for ( ; ( j < len ) & & ( done ! = 0x03 ) ; j + + )
{
if ( ! mask | | mask [ j ] ) {
T val = src [ j ] ;
if ( val < minVal )
{
minVal = val ;
minIdx = startidx + j ;
done | = 0x01 ;
}
if ( val > maxVal )
{
maxVal = val ;
maxIdx = startidx + j ;
done | = 0x02 ;
}
}
}
len0 = j + ( ( len - j ) & - nlanes ) ;
}
}
# if CV_SIMD128_64F
CV_ALWAYS_INLINE double v_reduce_min ( const v_float64x2 & a )
{
double CV_DECL_ALIGNED ( 32 ) idx [ 2 ] ;
v_store_aligned ( idx , a ) ;
return std : : min ( idx [ 0 ] , idx [ 1 ] ) ;
}
CV_ALWAYS_INLINE double v_reduce_max ( const v_float64x2 & a )
{
double CV_DECL_ALIGNED ( 32 ) idx [ 2 ] ;
v_store_aligned ( idx , a ) ;
return std : : max ( idx [ 0 ] , idx [ 1 ] ) ;
}
CV_ALWAYS_INLINE uint64_t v_reduce_min ( const v_uint64x2 & a )
{
uint64_t CV_DECL_ALIGNED ( 32 ) idx [ 2 ] ;
v_store_aligned ( idx , a ) ;
return std : : min ( idx [ 0 ] , idx [ 1 ] ) ;
}
CV_ALWAYS_INLINE v_uint64x2 v_select ( const v_uint64x2 & mask , const v_uint64x2 & a , const v_uint64x2 & b )
{
return b ^ ( ( a ^ b ) & mask ) ;
}
# endif
# define MINMAXIDX_REDUCE(suffix, suffix2, maxLimit, IR) \
template < typename T , typename VT , typename IT > CV_ALWAYS_INLINE void \
minMaxIdx_reduce_ # # suffix ( VT & valMin , VT & valMax , IT & idxMin , IT & idxMax , IT & none , \
T & minVal , T & maxVal , size_t & minIdx , size_t & maxIdx , \
size_t delta ) \
{ \
if ( v_check_any ( idxMin ! = none ) ) \
{ \
minVal = v_reduce_min ( valMin ) ; \
minIdx = ( size_t ) v_reduce_min ( v_select ( v_reinterpret_as_ # # suffix2 ( v_setall_ # # suffix ( ( IR ) minVal ) = = valMin ) , \
idxMin , v_setall_ # # suffix2 ( maxLimit ) ) ) + delta ; \
} \
if ( v_check_any ( idxMax ! = none ) ) \
{ \
maxVal = v_reduce_max ( valMax ) ; \
maxIdx = ( size_t ) v_reduce_min ( v_select ( v_reinterpret_as_ # # suffix2 ( v_setall_ # # suffix ( ( IR ) maxVal ) = = valMax ) , \
idxMax , v_setall_ # # suffix2 ( maxLimit ) ) ) + delta ; \
} \
}
MINMAXIDX_REDUCE ( u8 , u8 , UCHAR_MAX , uchar )
MINMAXIDX_REDUCE ( s8 , u8 , UCHAR_MAX , uchar )
MINMAXIDX_REDUCE ( u16 , u16 , USHRT_MAX , ushort )
MINMAXIDX_REDUCE ( s16 , u16 , USHRT_MAX , ushort )
MINMAXIDX_REDUCE ( s32 , u32 , UINT_MAX , uint )
MINMAXIDX_REDUCE ( f32 , u32 , ( 1 < < 23 ) - 1 , float )
# if CV_SIMD128_64F
MINMAXIDX_REDUCE ( f64 , u64 , UINT_MAX , double )
# endif
template < typename T , typename WT > CV_ALWAYS_INLINE void
minMaxIdx_finish ( const T * src , const uchar * mask , WT * minval , WT * maxval ,
size_t * minidx , size_t * maxidx , WT minVal , WT maxVal ,
size_t minIdx , size_t maxIdx , int len , size_t startidx ,
int j )
{
for ( ; j < len ; j + + )
{
if ( ! mask | | mask [ j ] )
{
T val = src [ j ] ;
if ( val < minVal )
{
minVal = val ;
minIdx = startidx + j ;
}
if ( val > maxVal )
{
maxVal = val ;
maxIdx = startidx + j ;
}
}
}
* minidx = minIdx ;
* maxidx = maxIdx ;
* minval = minVal ;
* maxval = maxVal ;
}
# endif
static void minMaxIdx_8u ( const uchar * src , const uchar * mask , int * minval , int * maxval ,
size_t * minidx , size_t * maxidx , int len , size_t startidx )
{ minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ; }
{
# if CV_SIMD128
if ( len > = v_uint8x16 : : nlanes )
{
int j , len0 ;
int minVal , maxVal ;
size_t minIdx , maxIdx ;
minMaxIdx_init ( src , mask , minval , maxval , minidx , maxidx , minVal , maxVal , minIdx , maxIdx ,
( int ) 0 , ( int ) UCHAR_MAX , v_uint8x16 : : nlanes , len , startidx , j , len0 ) ;
if ( j < = len0 - v_uint8x16 : : nlanes )
{
v_uint8x16 inc = v_setall_u8 ( v_uint8x16 : : nlanes ) ;
v_uint8x16 none = v_reinterpret_as_u8 ( v_setall_s8 ( - 1 ) ) ;
v_uint8x16 idxStart ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
do
{
v_uint8x16 valMin = v_setall_u8 ( ( uchar ) minVal ) , valMax = v_setall_u8 ( ( uchar ) maxVal ) ;
v_uint8x16 idx = idxStart , idxMin = none , idxMax = none ;
int k = j ;
size_t delta = startidx + j ;
if ( ! mask )
{
for ( ; k < std : : min ( len0 , j + 15 * v_uint8x16 : : nlanes ) ; k + = v_uint8x16 : : nlanes )
{
v_uint8x16 data = v_load ( src + k ) ;
v_uint8x16 cmpMin = ( data < valMin ) ;
v_uint8x16 cmpMax = ( data > valMax ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_min ( data , valMin ) ;
valMax = v_max ( data , valMax ) ;
idx + = inc ;
}
}
else
{
for ( ; k < std : : min ( len0 , j + 15 * v_uint8x16 : : nlanes ) ; k + = v_uint8x16 : : nlanes )
{
v_uint8x16 data = v_load ( src + k ) ;
v_uint8x16 maskVal = v_load ( mask + k ) ! = v_setzero_u8 ( ) ;
v_uint8x16 cmpMin = ( data < valMin ) & maskVal ;
v_uint8x16 cmpMax = ( data > valMax ) & maskVal ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_select ( cmpMin , data , valMin ) ;
valMax = v_select ( cmpMax , data , valMax ) ;
idx + = inc ;
}
}
j = k ;
minMaxIdx_reduce_u8 ( valMin , valMax , idxMin , idxMax , none , minVal , maxVal ,
minIdx , maxIdx , delta ) ;
}
while ( j < len0 ) ;
}
minMaxIdx_finish ( src , mask , minval , maxval , minidx , maxidx , minVal , maxVal ,
minIdx , maxIdx , len , startidx , j ) ;
}
else
{
minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ;
}
# else
minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ;
# endif
}
static void minMaxIdx_8s ( const schar * src , const uchar * mask , int * minval , int * maxval ,
size_t * minidx , size_t * maxidx , int len , size_t startidx )
{ minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ; }
{
# if CV_SIMD128
if ( len > = v_int8x16 : : nlanes )
{
int j , len0 ;
int minVal , maxVal ;
size_t minIdx , maxIdx ;
minMaxIdx_init ( src , mask , minval , maxval , minidx , maxidx , minVal , maxVal , minIdx , maxIdx ,
( int ) SCHAR_MIN , ( int ) SCHAR_MAX , v_int8x16 : : nlanes , len , startidx , j , len0 ) ;
if ( j < = len0 - v_int8x16 : : nlanes )
{
v_uint8x16 inc = v_setall_u8 ( v_int8x16 : : nlanes ) ;
v_uint8x16 none = v_reinterpret_as_u8 ( v_setall_s8 ( - 1 ) ) ;
v_uint8x16 idxStart ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
do
{
v_int8x16 valMin = v_setall_s8 ( ( schar ) minVal ) , valMax = v_setall_s8 ( ( schar ) maxVal ) ;
v_uint8x16 idx = idxStart , idxMin = none , idxMax = none ;
int k = j ;
size_t delta = startidx + j ;
if ( ! mask )
{
for ( ; k < std : : min ( len0 , j + 15 * v_int8x16 : : nlanes ) ; k + = v_int8x16 : : nlanes )
{
v_int8x16 data = v_load ( src + k ) ;
v_uint8x16 cmpMin = v_reinterpret_as_u8 ( data < valMin ) ;
v_uint8x16 cmpMax = v_reinterpret_as_u8 ( data > valMax ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_min ( data , valMin ) ;
valMax = v_max ( data , valMax ) ;
idx + = inc ;
}
}
else
{
for ( ; k < std : : min ( len0 , j + 15 * v_int8x16 : : nlanes ) ; k + = v_int8x16 : : nlanes )
{
v_int8x16 data = v_load ( src + k ) ;
v_uint8x16 maskVal = v_load ( mask + k ) ! = v_setzero_u8 ( ) ;
v_uint8x16 cmpMin = v_reinterpret_as_u8 ( data < valMin ) & maskVal ;
v_uint8x16 cmpMax = v_reinterpret_as_u8 ( data > valMax ) & maskVal ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_select ( v_reinterpret_as_s8 ( cmpMin ) , data , valMin ) ;
valMax = v_select ( v_reinterpret_as_s8 ( cmpMax ) , data , valMax ) ;
idx + = inc ;
}
}
j = k ;
minMaxIdx_reduce_s8 ( valMin , valMax , idxMin , idxMax , none , minVal , maxVal ,
minIdx , maxIdx , delta ) ;
}
while ( j < len0 ) ;
}
minMaxIdx_finish ( src , mask , minval , maxval , minidx , maxidx , minVal , maxVal ,
minIdx , maxIdx , len , startidx , j ) ;
}
else
{
minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ;
}
# else
minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ;
# endif
}
static void minMaxIdx_16u ( const ushort * src , const uchar * mask , int * minval , int * maxval ,
size_t * minidx , size_t * maxidx , int len , size_t startidx )
{ minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ; }
{
# if CV_SIMD128
if ( len > = v_uint16x8 : : nlanes )
{
int j , len0 ;
int minVal , maxVal ;
size_t minIdx , maxIdx ;
minMaxIdx_init ( src , mask , minval , maxval , minidx , maxidx , minVal , maxVal , minIdx , maxIdx ,
( int ) 0 , ( int ) USHRT_MAX , v_uint16x8 : : nlanes , len , startidx , j , len0 ) ;
if ( j < = len0 - v_uint16x8 : : nlanes )
{
v_uint16x8 inc = v_setall_u16 ( v_uint16x8 : : nlanes ) ;
v_uint16x8 none = v_reinterpret_as_u16 ( v_setall_s16 ( - 1 ) ) ;
v_uint16x8 idxStart ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
do
{
v_uint16x8 valMin = v_setall_u16 ( ( ushort ) minVal ) , valMax = v_setall_u16 ( ( ushort ) maxVal ) ;
v_uint16x8 idx = idxStart , idxMin = none , idxMax = none ;
int k = j ;
size_t delta = startidx + j ;
if ( ! mask )
{
for ( ; k < std : : min ( len0 , j + 8191 * v_uint16x8 : : nlanes ) ; k + = v_uint16x8 : : nlanes )
{
v_uint16x8 data = v_load ( src + k ) ;
v_uint16x8 cmpMin = ( data < valMin ) ;
v_uint16x8 cmpMax = ( data > valMax ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_min ( data , valMin ) ;
valMax = v_max ( data , valMax ) ;
idx + = inc ;
}
}
else
{
for ( ; k < std : : min ( len0 , j + 8191 * v_uint16x8 : : nlanes ) ; k + = v_uint16x8 : : nlanes )
{
v_uint16x8 data = v_load ( src + k ) ;
v_uint16x8 maskVal = v_load_expand ( mask + k ) ! = v_setzero_u16 ( ) ;
v_uint16x8 cmpMin = ( data < valMin ) & maskVal ;
v_uint16x8 cmpMax = ( data > valMax ) & maskVal ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_select ( cmpMin , data , valMin ) ;
valMax = v_select ( cmpMax , data , valMax ) ;
idx + = inc ;
}
}
j = k ;
minMaxIdx_reduce_u16 ( valMin , valMax , idxMin , idxMax , none , minVal , maxVal ,
minIdx , maxIdx , delta ) ;
}
while ( j < len0 ) ;
}
minMaxIdx_finish ( src , mask , minval , maxval , minidx , maxidx , minVal , maxVal ,
minIdx , maxIdx , len , startidx , j ) ;
}
else
{
minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ;
}
# else
minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ;
# endif
}
static void minMaxIdx_16s ( const short * src , const uchar * mask , int * minval , int * maxval ,
size_t * minidx , size_t * maxidx , int len , size_t startidx )
{ minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ; }
{
# if CV_SIMD128
if ( len > = v_int16x8 : : nlanes )
{
int j , len0 ;
int minVal , maxVal ;
size_t minIdx , maxIdx ;
minMaxIdx_init ( src , mask , minval , maxval , minidx , maxidx , minVal , maxVal , minIdx , maxIdx ,
( int ) SHRT_MIN , ( int ) SHRT_MAX , v_int16x8 : : nlanes , len , startidx , j , len0 ) ;
if ( j < = len0 - v_int16x8 : : nlanes )
{
v_uint16x8 inc = v_setall_u16 ( v_int16x8 : : nlanes ) ;
v_uint16x8 none = v_reinterpret_as_u16 ( v_setall_s16 ( - 1 ) ) ;
v_uint16x8 idxStart ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
do
{
v_int16x8 valMin = v_setall_s16 ( ( short ) minVal ) , valMax = v_setall_s16 ( ( short ) maxVal ) ;
v_uint16x8 idx = idxStart , idxMin = none , idxMax = none ;
int k = j ;
size_t delta = startidx + j ;
if ( ! mask )
{
for ( ; k < std : : min ( len0 , j + 8191 * v_int16x8 : : nlanes ) ; k + = v_int16x8 : : nlanes )
{
v_int16x8 data = v_load ( src + k ) ;
v_uint16x8 cmpMin = v_reinterpret_as_u16 ( data < valMin ) ;
v_uint16x8 cmpMax = v_reinterpret_as_u16 ( data > valMax ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_min ( data , valMin ) ;
valMax = v_max ( data , valMax ) ;
idx + = inc ;
}
}
else
{
for ( ; k < std : : min ( len0 , j + 8191 * v_int16x8 : : nlanes ) ; k + = v_int16x8 : : nlanes )
{
v_int16x8 data = v_load ( src + k ) ;
v_uint16x8 maskVal = v_load_expand ( mask + k ) ! = v_setzero_u16 ( ) ;
v_uint16x8 cmpMin = v_reinterpret_as_u16 ( data < valMin ) & maskVal ;
v_uint16x8 cmpMax = v_reinterpret_as_u16 ( data > valMax ) & maskVal ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_select ( v_reinterpret_as_s16 ( cmpMin ) , data , valMin ) ;
valMax = v_select ( v_reinterpret_as_s16 ( cmpMax ) , data , valMax ) ;
idx + = inc ;
}
}
j = k ;
minMaxIdx_reduce_s16 ( valMin , valMax , idxMin , idxMax , none , minVal , maxVal ,
minIdx , maxIdx , delta ) ;
}
while ( j < len0 ) ;
}
minMaxIdx_finish ( src , mask , minval , maxval , minidx , maxidx , minVal , maxVal ,
minIdx , maxIdx , len , startidx , j ) ;
}
else
{
minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ;
}
# else
minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ;
# endif
}
static void minMaxIdx_32s ( const int * src , const uchar * mask , int * minval , int * maxval ,
size_t * minidx , size_t * maxidx , int len , size_t startidx )
{ minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ; }
{
# if CV_SIMD128
if ( len > = 2 * v_int32x4 : : nlanes )
{
int j = 0 , len0 = len & - ( 2 * v_int32x4 : : nlanes ) ;
int minVal = * minval , maxVal = * maxval ;
size_t minIdx = * minidx , maxIdx = * maxidx ;
{
v_uint32x4 inc = v_setall_u32 ( v_int32x4 : : nlanes ) ;
v_uint32x4 none = v_reinterpret_as_u32 ( v_setall_s32 ( - 1 ) ) ;
v_uint32x4 idxStart ( 0 , 1 , 2 , 3 ) ;
do
{
v_int32x4 valMin = v_setall_s32 ( minVal ) , valMax = v_setall_s32 ( maxVal ) ;
v_uint32x4 idx = idxStart , idxMin = none , idxMax = none ;
int k = j ;
size_t delta = startidx + j ;
if ( ! mask )
{
for ( ; k < std : : min ( len0 , j + 32766 * 2 * v_int32x4 : : nlanes ) ; k + = 2 * v_int32x4 : : nlanes )
{
v_int32x4 data = v_load ( src + k ) ;
v_uint32x4 cmpMin = v_reinterpret_as_u32 ( data < valMin ) ;
v_uint32x4 cmpMax = v_reinterpret_as_u32 ( data > valMax ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_min ( data , valMin ) ;
valMax = v_max ( data , valMax ) ;
idx + = inc ;
data = v_load ( src + k + v_int32x4 : : nlanes ) ;
cmpMin = v_reinterpret_as_u32 ( data < valMin ) ;
cmpMax = v_reinterpret_as_u32 ( data > valMax ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_min ( data , valMin ) ;
valMax = v_max ( data , valMax ) ;
idx + = inc ;
}
}
else
{
for ( ; k < std : : min ( len0 , j + 32766 * 2 * v_int32x4 : : nlanes ) ; k + = 2 * v_int32x4 : : nlanes )
{
v_int32x4 data = v_load ( src + k ) ;
v_uint16x8 maskVal = v_load_expand ( mask + k ) ! = v_setzero_u16 ( ) ;
v_int32x4 maskVal1 , maskVal2 ;
v_expand ( v_reinterpret_as_s16 ( maskVal ) , maskVal1 , maskVal2 ) ;
v_uint32x4 cmpMin = v_reinterpret_as_u32 ( ( data < valMin ) & maskVal1 ) ;
v_uint32x4 cmpMax = v_reinterpret_as_u32 ( ( data > valMax ) & maskVal1 ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_select ( v_reinterpret_as_s32 ( cmpMin ) , data , valMin ) ;
valMax = v_select ( v_reinterpret_as_s32 ( cmpMax ) , data , valMax ) ;
idx + = inc ;
data = v_load ( src + k + v_int32x4 : : nlanes ) ;
cmpMin = v_reinterpret_as_u32 ( ( data < valMin ) & maskVal2 ) ;
cmpMax = v_reinterpret_as_u32 ( ( data > valMax ) & maskVal2 ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_select ( v_reinterpret_as_s32 ( cmpMin ) , data , valMin ) ;
valMax = v_select ( v_reinterpret_as_s32 ( cmpMax ) , data , valMax ) ;
idx + = inc ;
}
}
j = k ;
minMaxIdx_reduce_s32 ( valMin , valMax , idxMin , idxMax , none , minVal , maxVal ,
minIdx , maxIdx , delta ) ;
}
while ( j < len0 ) ;
}
minMaxIdx_finish ( src , mask , minval , maxval , minidx , maxidx , minVal , maxVal ,
minIdx , maxIdx , len , startidx , j ) ;
}
else
{
minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ;
}
# else
minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ;
# endif
}
static void minMaxIdx_32f ( const float * src , const uchar * mask , float * minval , float * maxval ,
size_t * minidx , size_t * maxidx , int len , size_t startidx )
{ minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ; }
{
# if CV_SIMD128
if ( len > = 2 * v_float32x4 : : nlanes )
{
int j , len0 ;
float minVal , maxVal ;
size_t minIdx , maxIdx ;
minMaxIdx_init ( src , mask , minval , maxval , minidx , maxidx , minVal , maxVal , minIdx , maxIdx ,
FLT_MIN , FLT_MAX , 2 * v_float32x4 : : nlanes , len , startidx , j , len0 ) ;
if ( j < = len0 - 2 * v_float32x4 : : nlanes )
{
v_uint32x4 inc = v_setall_u32 ( v_float32x4 : : nlanes ) ;
v_uint32x4 none = v_reinterpret_as_u32 ( v_setall_s32 ( - 1 ) ) ;
v_uint32x4 idxStart ( 0 , 1 , 2 , 3 ) ;
do
{
v_float32x4 valMin = v_setall_f32 ( minVal ) , valMax = v_setall_f32 ( maxVal ) ;
v_uint32x4 idx = idxStart , idxMin = none , idxMax = none ;
int k = j ;
size_t delta = startidx + j ;
if ( ! mask )
{
for ( ; k < std : : min ( len0 , j + 32766 * 2 * v_float32x4 : : nlanes ) ; k + = 2 * v_float32x4 : : nlanes )
{
v_float32x4 data = v_load ( src + k ) ;
v_uint32x4 cmpMin = v_reinterpret_as_u32 ( data < valMin ) ;
v_uint32x4 cmpMax = v_reinterpret_as_u32 ( data > valMax ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_min ( data , valMin ) ;
valMax = v_max ( data , valMax ) ;
idx + = inc ;
data = v_load ( src + k + v_float32x4 : : nlanes ) ;
cmpMin = v_reinterpret_as_u32 ( data < valMin ) ;
cmpMax = v_reinterpret_as_u32 ( data > valMax ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_min ( data , valMin ) ;
valMax = v_max ( data , valMax ) ;
idx + = inc ;
}
}
else
{
for ( ; k < std : : min ( len0 , j + 32766 * 2 * v_float32x4 : : nlanes ) ; k + = 2 * v_float32x4 : : nlanes )
{
v_float32x4 data = v_load ( src + k ) ;
v_uint16x8 maskVal = v_load_expand ( mask + k ) ! = v_setzero_u16 ( ) ;
v_int32x4 maskVal1 , maskVal2 ;
v_expand ( v_reinterpret_as_s16 ( maskVal ) , maskVal1 , maskVal2 ) ;
v_uint32x4 cmpMin = v_reinterpret_as_u32 ( v_reinterpret_as_s32 ( data < valMin ) & maskVal1 ) ;
v_uint32x4 cmpMax = v_reinterpret_as_u32 ( v_reinterpret_as_s32 ( data > valMax ) & maskVal1 ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_select ( v_reinterpret_as_f32 ( cmpMin ) , data , valMin ) ;
valMax = v_select ( v_reinterpret_as_f32 ( cmpMax ) , data , valMax ) ;
idx + = inc ;
data = v_load ( src + k + v_float32x4 : : nlanes ) ;
cmpMin = v_reinterpret_as_u32 ( v_reinterpret_as_s32 ( data < valMin ) & maskVal2 ) ;
cmpMax = v_reinterpret_as_u32 ( v_reinterpret_as_s32 ( data > valMax ) & maskVal2 ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_select ( v_reinterpret_as_f32 ( cmpMin ) , data , valMin ) ;
valMax = v_select ( v_reinterpret_as_f32 ( cmpMax ) , data , valMax ) ;
idx + = inc ;
}
}
j = k ;
minMaxIdx_reduce_f32 ( valMin , valMax , idxMin , idxMax , none , minVal , maxVal ,
minIdx , maxIdx , delta ) ;
}
while ( j < len0 ) ;
}
minMaxIdx_finish ( src , mask , minval , maxval , minidx , maxidx , minVal , maxVal ,
minIdx , maxIdx , len , startidx , j ) ;
}
else
{
minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ;
}
# else
minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ;
# endif
}
static void minMaxIdx_64f ( const double * src , const uchar * mask , double * minval , double * maxval ,
size_t * minidx , size_t * maxidx , int len , size_t startidx )
{ minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ; }
{
# if CV_SIMD128_64F
if ( len > = 4 * v_float64x2 : : nlanes )
{
int j , len0 ;
double minVal , maxVal ;
size_t minIdx , maxIdx ;
minMaxIdx_init ( src , mask , minval , maxval , minidx , maxidx , minVal , maxVal , minIdx , maxIdx ,
DBL_MIN , DBL_MAX , 4 * v_float64x2 : : nlanes , len , startidx , j , len0 ) ;
if ( j < = len0 - 4 * v_float64x2 : : nlanes )
{
v_uint64x2 inc = v_setall_u64 ( v_float64x2 : : nlanes ) ;
v_uint64x2 none = v_reinterpret_as_u64 ( v_setall_s64 ( - 1 ) ) ;
v_uint64x2 idxStart ( 0 , 1 ) ;
do
{
v_float64x2 valMin = v_setall_f64 ( minVal ) , valMax = v_setall_f64 ( maxVal ) ;
v_uint64x2 idx = idxStart , idxMin = none , idxMax = none ;
int k = j ;
size_t delta = startidx + j ;
if ( ! mask )
{
for ( ; k < std : : min ( len0 , j + 32764 * 4 * v_float64x2 : : nlanes ) ; k + = 4 * v_float64x2 : : nlanes )
{
v_float64x2 data = v_load ( src + k ) ;
v_uint64x2 cmpMin = v_reinterpret_as_u64 ( data < valMin ) ;
v_uint64x2 cmpMax = v_reinterpret_as_u64 ( data > valMax ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_min ( data , valMin ) ;
valMax = v_max ( data , valMax ) ;
idx + = inc ;
data = v_load ( src + k + v_float64x2 : : nlanes ) ;
cmpMin = v_reinterpret_as_u64 ( data < valMin ) ;
cmpMax = v_reinterpret_as_u64 ( data > valMax ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_min ( data , valMin ) ;
valMax = v_max ( data , valMax ) ;
idx + = inc ;
data = v_load ( src + k + 2 * v_float64x2 : : nlanes ) ;
cmpMin = v_reinterpret_as_u64 ( data < valMin ) ;
cmpMax = v_reinterpret_as_u64 ( data > valMax ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_min ( data , valMin ) ;
valMax = v_max ( data , valMax ) ;
idx + = inc ;
data = v_load ( src + k + 3 * v_float64x2 : : nlanes ) ;
cmpMin = v_reinterpret_as_u64 ( data < valMin ) ;
cmpMax = v_reinterpret_as_u64 ( data > valMax ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_min ( data , valMin ) ;
valMax = v_max ( data , valMax ) ;
idx + = inc ;
}
}
else
{
for ( ; k < std : : min ( len0 , j + 32764 * 4 * v_float64x2 : : nlanes ) ; k + = 4 * v_float64x2 : : nlanes )
{
v_float64x2 data = v_load ( src + k ) ;
v_uint16x8 maskVal = v_load_expand ( mask + k ) ! = v_setzero_u16 ( ) ;
v_int32x4 maskVal1 , maskVal2 ;
v_expand ( v_reinterpret_as_s16 ( maskVal ) , maskVal1 , maskVal2 ) ;
v_int64x2 maskVal3 , maskVal4 ;
v_expand ( maskVal1 , maskVal3 , maskVal4 ) ;
v_uint64x2 cmpMin = v_reinterpret_as_u64 ( v_reinterpret_as_s64 ( data < valMin ) & maskVal3 ) ;
v_uint64x2 cmpMax = v_reinterpret_as_u64 ( v_reinterpret_as_s64 ( data > valMax ) & maskVal3 ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_select ( v_reinterpret_as_f64 ( cmpMin ) , data , valMin ) ;
valMax = v_select ( v_reinterpret_as_f64 ( cmpMax ) , data , valMax ) ;
idx + = inc ;
data = v_load ( src + k + v_float64x2 : : nlanes ) ;
cmpMin = v_reinterpret_as_u64 ( v_reinterpret_as_s64 ( data < valMin ) & maskVal4 ) ;
cmpMax = v_reinterpret_as_u64 ( v_reinterpret_as_s64 ( data > valMax ) & maskVal4 ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_select ( v_reinterpret_as_f64 ( cmpMin ) , data , valMin ) ;
valMax = v_select ( v_reinterpret_as_f64 ( cmpMax ) , data , valMax ) ;
idx + = inc ;
data = v_load ( src + k + 2 * v_float64x2 : : nlanes ) ;
v_expand ( maskVal2 , maskVal3 , maskVal4 ) ;
cmpMin = v_reinterpret_as_u64 ( v_reinterpret_as_s64 ( data < valMin ) & maskVal3 ) ;
cmpMax = v_reinterpret_as_u64 ( v_reinterpret_as_s64 ( data > valMax ) & maskVal3 ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_select ( v_reinterpret_as_f64 ( cmpMin ) , data , valMin ) ;
valMax = v_select ( v_reinterpret_as_f64 ( cmpMax ) , data , valMax ) ;
idx + = inc ;
data = v_load ( src + k + 3 * v_float64x2 : : nlanes ) ;
cmpMin = v_reinterpret_as_u64 ( v_reinterpret_as_s64 ( data < valMin ) & maskVal4 ) ;
cmpMax = v_reinterpret_as_u64 ( v_reinterpret_as_s64 ( data > valMax ) & maskVal4 ) ;
idxMin = v_select ( cmpMin , idx , idxMin ) ;
idxMax = v_select ( cmpMax , idx , idxMax ) ;
valMin = v_select ( v_reinterpret_as_f64 ( cmpMin ) , data , valMin ) ;
valMax = v_select ( v_reinterpret_as_f64 ( cmpMax ) , data , valMax ) ;
idx + = inc ;
}
}
j = k ;
minMaxIdx_reduce_f64 ( valMin , valMax , idxMin , idxMax , none , minVal , maxVal ,
minIdx , maxIdx , delta ) ;
}
while ( j < len0 ) ;
}
minMaxIdx_finish ( src , mask , minval , maxval , minidx , maxidx , minVal , maxVal ,
minIdx , maxIdx , len , startidx , j ) ;
}
else
{
minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ;
}
# else
minMaxIdx_ ( src , mask , minval , maxval , minidx , maxidx , len , startidx ) ;
# endif
}
typedef void ( * MinMaxIdxFunc ) ( const uchar * , const uchar * , int * , int * , size_t * , size_t * , int , size_t ) ;