@ -215,7 +215,7 @@ template<typename T1, typename Tvec>
struct op_add
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a + b ; }
{ return v_add ( a , b ) ; }
static inline T1 r ( T1 a , T1 b )
{ return c_add ( a , b ) ; }
} ;
@ -225,7 +225,7 @@ template<typename T1, typename Tvec>
struct op_sub
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a - b ; }
{ return v_sub ( a , b ) ; }
static inline T1 r ( T1 a , T1 b )
{ return c_sub ( a , b ) ; }
} ;
@ -262,7 +262,7 @@ struct op_absdiff
template < >
struct op_absdiff < schar , v_int8 >
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_int8 r ( const v_int8 & a , const v_int8 & b )
{ return v_absdiffs ( a , b ) ; }
# endif
@ -272,7 +272,7 @@ struct op_absdiff<schar, v_int8>
template < >
struct op_absdiff < short , v_int16 >
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_int16 r ( const v_int16 & a , const v_int16 & b )
{ return v_absdiffs ( a , b ) ; }
# endif
@ -282,7 +282,7 @@ struct op_absdiff<short, v_int16>
template < >
struct op_absdiff < int , v_int32 >
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_int32 r ( const v_int32 & a , const v_int32 & b )
{ return v_reinterpret_as_s32 ( v_absdiff ( a , b ) ) ; }
# endif
@ -295,7 +295,7 @@ template<typename T1, typename Tvec>
struct op_or
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a | b ; }
{ return v_or ( a , b ) ; }
static inline T1 r ( T1 a , T1 b )
{ return a | b ; }
} ;
@ -303,7 +303,7 @@ template<typename T1, typename Tvec>
struct op_xor
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a ^ b ; }
{ return v_xor ( a , b ) ; }
static inline T1 r ( T1 a , T1 b )
{ return a ^ b ; }
} ;
@ -311,7 +311,7 @@ template<typename T1, typename Tvec>
struct op_and
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a & b ; }
{ return v_and ( a , b ) ; }
static inline T1 r ( T1 a , T1 b )
{ return a & b ; }
} ;
@ -320,14 +320,14 @@ struct op_not
{
// ignored b from loader level
static inline Tvec r ( const Tvec & a )
{ return ~ a ; }
{ return v_not ( a ) ; }
static inline T1 r ( T1 a , T1 )
{ return ~ a ; }
} ;
//////////////////////////// Loaders /////////////////////////////////
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
template < template < typename T1 , typename Tvec > class OP , typename T1 , typename Tvec >
struct bin_loader
@ -392,13 +392,13 @@ template<template<typename T1, typename Tvec> class OP, typename T1, typename Tv
static void bin_loop ( const T1 * src1 , size_t step1 , const T1 * src2 , size_t step2 , T1 * dst , size_t step , int width , int height )
{
typedef OP < T1 , Tvec > op ;
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
typedef bin_loader < OP , T1 , Tvec > ldr ;
enum { wide_step = Tvec : : nlanes } ;
const int wide_step = VTraits < Tvec > : : vlanes ( ) ;
# if !CV_NEON && CV_SIMD_WIDTH == 16
enum { wide_step_l = wide_step * 2 } ;
const int wide_step_l = wide_step * 2 ;
# else
enum { wide_step_l = wide_step } ;
const int wide_step_l = wide_step ;
# endif
# endif // CV_SIMD
@ -410,7 +410,7 @@ static void bin_loop(const T1* src1, size_t step1, const T1* src2, size_t step2,
{
int x = 0 ;
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
# if !CV_NEON && !CV_MSA
if ( is_aligned ( src1 , src2 , dst ) )
{
@ -583,7 +583,7 @@ template<typename T1, typename Tvec>
struct op_cmplt
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a < b ; }
{ return v_lt ( a , b ) ; }
static inline uchar r ( T1 a , T1 b )
{ return ( uchar ) - ( int ) ( a < b ) ; }
} ;
@ -592,7 +592,7 @@ template<typename T1, typename Tvec>
struct op_cmple
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a < = b ; }
{ return v_le ( a , b ) ; }
static inline uchar r ( T1 a , T1 b )
{ return ( uchar ) - ( int ) ( a < = b ) ; }
} ;
@ -601,7 +601,7 @@ template<typename T1, typename Tvec>
struct op_cmpeq
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a = = b ; }
{ return v_eq ( a , b ) ; }
static inline uchar r ( T1 a , T1 b )
{ return ( uchar ) - ( int ) ( a = = b ) ; }
} ;
@ -610,14 +610,14 @@ template<typename T1, typename Tvec>
struct op_cmpne
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a ! = b ; }
{ return v_ne ( a , b ) ; }
static inline uchar r ( T1 a , T1 b )
{ return ( uchar ) - ( int ) ( a ! = b ) ; }
} ;
//////////////////////////// Loaders /////////////////////////////////
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
// todo: add support for RW alignment & stream
template < int nload , template < typename T1 , typename Tvec > class OP , typename T1 , typename Tvec >
struct cmp_loader_n
@ -642,10 +642,10 @@ template<template<typename T1, typename Tvec> class OP, typename T1, typename Tv
struct cmp_loader_n < sizeof ( ushort ) , OP , T1 , Tvec >
{
typedef OP < T1 , Tvec > op ;
enum { step = Tvec : : nlanes } ;
static inline void l ( const T1 * src1 , const T1 * src2 , uchar * dst )
{
const int step = VTraits < Tvec > : : vlanes ( ) ;
Tvec c0 = op : : r ( vx_load ( src1 ) , vx_load ( src2 ) ) ;
Tvec c1 = op : : r ( vx_load ( src1 + step ) , vx_load ( src2 + step ) ) ;
v_store ( dst , v_pack_b ( v_reinterpret_as_u16 ( c0 ) , v_reinterpret_as_u16 ( c1 ) ) ) ;
@ -656,10 +656,10 @@ template<template<typename T1, typename Tvec> class OP, typename T1, typename Tv
struct cmp_loader_n < sizeof ( unsigned ) , OP , T1 , Tvec >
{
typedef OP < T1 , Tvec > op ;
enum { step = Tvec : : nlanes } ;
static inline void l ( const T1 * src1 , const T1 * src2 , uchar * dst )
{
const int step = VTraits < Tvec > : : vlanes ( ) ;
v_uint32 c0 = v_reinterpret_as_u32 ( op : : r ( vx_load ( src1 ) , vx_load ( src2 ) ) ) ;
v_uint32 c1 = v_reinterpret_as_u32 ( op : : r ( vx_load ( src1 + step ) , vx_load ( src2 + step ) ) ) ;
v_uint32 c2 = v_reinterpret_as_u32 ( op : : r ( vx_load ( src1 + step * 2 ) , vx_load ( src2 + step * 2 ) ) ) ;
@ -672,10 +672,10 @@ template<template<typename T1, typename Tvec> class OP, typename T1, typename Tv
struct cmp_loader_n < sizeof ( double ) , OP , T1 , Tvec >
{
typedef OP < T1 , Tvec > op ;
enum { step = Tvec : : nlanes } ;
static inline void l ( const T1 * src1 , const T1 * src2 , uchar * dst )
{
const int step = VTraits < Tvec > : : vlanes ( ) ;
v_uint64 c0 = v_reinterpret_as_u64 ( op : : r ( vx_load ( src1 ) , vx_load ( src2 ) ) ) ;
v_uint64 c1 = v_reinterpret_as_u64 ( op : : r ( vx_load ( src1 + step ) , vx_load ( src2 + step ) ) ) ;
v_uint64 c2 = v_reinterpret_as_u64 ( op : : r ( vx_load ( src1 + step * 2 ) , vx_load ( src2 + step * 2 ) ) ) ;
@ -697,9 +697,9 @@ template<template<typename T1, typename Tvec> class OP, typename T1, typename Tv
static void cmp_loop ( const T1 * src1 , size_t step1 , const T1 * src2 , size_t step2 , uchar * dst , size_t step , int width , int height )
{
typedef OP < T1 , Tvec > op ;
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
typedef cmp_loader_n < sizeof ( T1 ) , OP , T1 , Tvec > ldr ;
enum { wide_step = Tvec : : nlanes * sizeof ( T1 ) } ;
const int wide_step = VTraits < Tvec > : : vlanes ( ) * sizeof ( T1 ) ;
# endif // CV_SIMD
step1 / = sizeof ( T1 ) ;
@ -709,7 +709,7 @@ static void cmp_loop(const T1* src1, size_t step1, const T1* src2, size_t step2,
{
int x = 0 ;
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
for ( ; x < = width - wide_step ; x + = wide_step )
{
ldr : : l ( src1 + x , src2 + x , dst + x ) ;
@ -876,7 +876,7 @@ DEFINE_SIMD_ALL(cmp)
//////////////////////////// Loaders ///////////////////////////////
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
// todo: add support for RW alignment & stream
template < int nload , template < typename T1 , typename T2 , typename Tvec > class OP , typename T1 , typename T2 , typename Tvec >
struct scalar_loader_n
@ -1009,10 +1009,10 @@ template<template<typename T1, typename T2, typename Tvec> class OP, typename T2
struct scalar_loader_n < sizeof ( int ) , OP , int , T2 , v_int32 >
{
typedef OP < int , T2 , v_int32 > op ;
enum { step = v_int32 : : nlanes } ;
static inline void l ( const int * src1 , const int * src2 , const T2 * scalar , int * dst )
{
const int step = VTraits < v_int32 > : : vlanes ( ) ;
v_int32 v_src1 = vx_load ( src1 ) ;
v_int32 v_src2 = vx_load ( src2 ) ;
v_int32 v_src1s = vx_load ( src1 + step ) ;
@ -1039,6 +1039,7 @@ struct scalar_loader_n<sizeof(int), OP, int, T2, v_int32>
static inline void l ( const int * src1 , const T2 * scalar , int * dst )
{
const int step = VTraits < v_int32 > : : vlanes ( ) ;
v_int32 v_src1 = vx_load ( src1 ) ;
v_int32 v_src1s = vx_load ( src1 + step ) ;
@ -1064,10 +1065,9 @@ template<template<typename T1, typename T2, typename Tvec> class OP, typename T2
struct scalar_loader_n < sizeof ( float ) , OP , float , T2 , v_float32 >
{
typedef OP < float , T2 , v_float32 > op ;
enum { step = v_float32 : : nlanes } ;
static inline void l ( const float * src1 , const float * src2 , const T2 * scalar , float * dst )
{
const int step = VTraits < v_float32 > : : vlanes ( ) ;
v_float32 v_src1 = vx_load ( src1 ) ;
v_float32 v_src2 = vx_load ( src2 ) ;
v_float32 v_src1s = vx_load ( src1 + step ) ;
@ -1082,6 +1082,7 @@ struct scalar_loader_n<sizeof(float), OP, float, T2, v_float32>
static inline void l ( const float * src1 , const T2 * scalar , float * dst )
{
const int step = VTraits < v_float32 > : : vlanes ( ) ;
v_float32 v_src1 = vx_load ( src1 ) ;
v_float32 v_src1s = vx_load ( src1 + step ) ;
@ -1258,10 +1259,10 @@ static void scalar_loop(const T1* src1, size_t step1, const T1* src2, size_t ste
T1 * dst , size_t step , int width , int height , const T2 * scalar )
{
typedef OP < T1 , T2 , Tvec > op ;
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
typedef scalar_loader_n < sizeof ( T1 ) , OP , T1 , T2 , Tvec > ldr ;
const int wide_step = sizeof ( T1 ) > sizeof ( ushort ) ? Tvec : : nlanes * 2 :
sizeof ( T1 ) = = sizeof ( uchar ) ? Tvec : : nlanes / 2 : Tvec : : nlanes ;
const int wide_step = sizeof ( T1 ) > sizeof ( ushort ) ? VTraits < Tvec > : : vlanes ( ) * 2 :
sizeof ( T1 ) = = sizeof ( uchar ) ? VTraits < Tvec > : : vlanes ( ) / 2 : VTraits < Tvec > : : vlanes ( ) ;
# endif // CV_SIMD
step1 / = sizeof ( T1 ) ;
@ -1272,7 +1273,7 @@ static void scalar_loop(const T1* src1, size_t step1, const T1* src2, size_t ste
{
int x = 0 ;
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
for ( ; x < = width - wide_step ; x + = wide_step )
{
ldr : : l ( src1 + x , src2 + x , scalar , dst + x ) ;
@ -1304,10 +1305,10 @@ template<template<typename T1, typename T2, typename Tvec> class OP, typename T1
static void scalar_loop ( const T1 * src1 , size_t step1 , T1 * dst , size_t step , int width , int height , const T2 * scalar )
{
typedef OP < T1 , T2 , Tvec > op ;
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
typedef scalar_loader_n < sizeof ( T1 ) , OP , T1 , T2 , Tvec > ldr ;
const int wide_step = sizeof ( T1 ) > sizeof ( ushort ) ? Tvec : : nlanes * 2 :
sizeof ( T1 ) = = sizeof ( uchar ) ? Tvec : : nlanes / 2 : Tvec : : nlanes ;
const int wide_step = sizeof ( T1 ) > sizeof ( ushort ) ? VTraits < Tvec > : : vlanes ( ) * 2 :
sizeof ( T1 ) = = sizeof ( uchar ) ? VTraits < Tvec > : : vlanes ( ) / 2 : VTraits < Tvec > : : vlanes ( ) ;
# endif // CV_SIMD
step1 / = sizeof ( T1 ) ;
@ -1317,7 +1318,7 @@ static void scalar_loop(const T1* src1, size_t step1, T1* dst, size_t step, int
{
int x = 0 ;
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
for ( ; x < = width - wide_step ; x + = wide_step )
{
ldr : : l ( src1 + x , scalar , dst + x ) ;
@ -1424,7 +1425,7 @@ template<typename T1, typename Tvec>
struct op_mul
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a * b ; }
{ return v_mul ( a , b ) ; }
static inline T1 r ( T1 a , T1 b )
{ return saturate_cast < T1 > ( a * b ) ; }
} ;
@ -1432,11 +1433,11 @@ struct op_mul
template < typename T1 , typename T2 , typename Tvec >
struct op_mul_scale
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_float32 r ( const v_float32 & a , const v_float32 & b , const T2 * scalar )
{
const v_float32 v_scalar = vx_setall_f32 ( * scalar ) ;
return v_scalar * a * b ;
return v_mul ( v_scalar , a , b ) ;
}
# endif
static inline T1 r ( T1 a , T1 b , const T2 * scalar )
@ -1452,7 +1453,7 @@ struct op_mul_scale<double, double, v_float64>
static inline v_float64 r ( const v_float64 & a , const v_float64 & b , const double * scalar )
{
const v_float64 v_scalar = vx_setall_f64 ( * scalar ) ;
return v_scalar * a * b ;
return v_mul ( v_mul ( v_scalar , a ) , b ) ;
}
# endif
static inline double r ( double a , double b , const double * scalar )
@ -1565,7 +1566,7 @@ template<typename T1, typename Tvec>
struct op_div_f
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a / b ; }
{ return v_div ( a , b ) ; }
static inline T1 r ( T1 a , T1 b )
{ return a / b ; }
} ;
@ -1573,16 +1574,16 @@ struct op_div_f
template < typename T1 , typename T2 , typename Tvec >
struct op_div_scale
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_float32 r ( const v_float32 & a , const v_float32 & b , const T2 * scalar )
{
const v_float32 v_scalar = vx_setall_f32 ( * scalar ) ;
return a * v_scalar / b ;
return v_div ( v_mul ( a , v_scalar ) , b ) ;
}
static inline Tvec pre ( const Tvec & denom , const Tvec & res )
{
const Tvec v_zero = vx_setall < typename Tvec : : lane_type > ( 0 ) ;
return v_select ( denom = = v_zero , v_zero , res ) ;
const Tvec v_zero = vx_setall < typename VTraits < Tvec > : : lane_type > ( 0 ) ;
return v_select ( v_eq ( denom , v_zero ) , v_zero , res ) ;
}
# endif
static inline T1 r ( T1 a , T1 denom , const T2 * scalar )
@ -1595,11 +1596,11 @@ struct op_div_scale
template < >
struct op_div_scale < float , float , v_float32 >
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_float32 r ( const v_float32 & a , const v_float32 & b , const float * scalar )
{
const v_float32 v_scalar = vx_setall_f32 ( * scalar ) ;
return a * v_scalar / b ;
return v_div ( v_mul ( a , v_scalar ) , b ) ;
}
# endif
static inline float r ( float a , float denom , const float * scalar )
@ -1613,7 +1614,7 @@ struct op_div_scale<double, double, v_float64>
static inline v_float64 r ( const v_float64 & a , const v_float64 & b , const double * scalar )
{
const v_float64 v_scalar = vx_setall_f64 ( * scalar ) ;
return a * v_scalar / b ;
return v_div ( v_mul ( a , v_scalar ) , b ) ;
}
# endif
static inline double r ( double a , double denom , const double * scalar )
@ -1681,7 +1682,7 @@ DEFINE_SIMD_ALL(div, div_loop)
template < typename T1 , typename T2 , typename Tvec >
struct op_add_scale
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_float32 r ( const v_float32 & a , const v_float32 & b , const T2 * scalar )
{
const v_float32 v_alpha = vx_setall_f32 ( * scalar ) ;
@ -1714,7 +1715,7 @@ struct op_add_scale<double, double, v_float64>
template < typename T1 , typename T2 , typename Tvec >
struct op_add_weighted
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_float32 r ( const v_float32 & a , const v_float32 & b , const T2 * scalars )
{
const v_float32 v_alpha = vx_setall_f32 ( scalars [ 0 ] ) ;
@ -1831,16 +1832,16 @@ DEFINE_SIMD_F64(addWeighted, add_weighted_loop_d)
template < typename T1 , typename T2 , typename Tvec >
struct op_recip
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_float32 r ( const v_float32 & a , const T2 * scalar )
{
const v_float32 v_scalar = vx_setall_f32 ( * scalar ) ;
return v_scalar / a ;
return v_div ( v_scalar , a ) ;
}
static inline Tvec pre ( const Tvec & denom , const Tvec & res )
{
const Tvec v_zero = vx_setall < typename Tvec : : lane_type > ( 0 ) ;
return v_select ( denom = = v_zero , v_zero , res ) ;
const Tvec v_zero = vx_setall < typename VTraits < Tvec > : : lane_type > ( 0 ) ;
return v_select ( v_eq ( denom , v_zero ) , v_zero , res ) ;
}
# endif
static inline T1 r ( T1 denom , const T2 * scalar )
@ -1853,11 +1854,11 @@ struct op_recip
template < >
struct op_recip < float , float , v_float32 >
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_float32 r ( const v_float32 & a , const float * scalar )
{
const v_float32 v_scalar = vx_setall_f32 ( * scalar ) ;
return v_scalar / a ;
return v_div ( v_scalar , a ) ;
}
# endif
static inline float r ( float denom , const float * scalar )
@ -1871,7 +1872,7 @@ struct op_recip<double, double, v_float64>
static inline v_float64 r ( const v_float64 & a , const double * scalar )
{
const v_float64 v_scalar = vx_setall_f64 ( * scalar ) ;
return v_scalar / a ;
return v_div ( v_scalar , a ) ;
}
# endif
static inline double r ( double denom , const double * scalar )