@ -219,7 +219,7 @@ template<typename T1, typename Tvec>
struct op_add
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a + b ; }
{ return v_add ( a , b ) ; }
static inline T1 r ( T1 a , T1 b )
{ return c_add ( a , b ) ; }
} ;
@ -229,7 +229,7 @@ template<typename T1, typename Tvec>
struct op_sub
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a - b ; }
{ return v_sub ( a , b ) ; }
static inline T1 r ( T1 a , T1 b )
{ return c_sub ( a , b ) ; }
} ;
@ -266,7 +266,7 @@ struct op_absdiff
template < >
struct op_absdiff < schar , v_int8 >
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_int8 r ( const v_int8 & a , const v_int8 & b )
{ return v_absdiffs ( a , b ) ; }
# endif
@ -276,7 +276,7 @@ struct op_absdiff<schar, v_int8>
template < >
struct op_absdiff < short , v_int16 >
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_int16 r ( const v_int16 & a , const v_int16 & b )
{ return v_absdiffs ( a , b ) ; }
# endif
@ -286,7 +286,7 @@ struct op_absdiff<short, v_int16>
template < >
struct op_absdiff < int , v_int32 >
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_int32 r ( const v_int32 & a , const v_int32 & b )
{ return v_reinterpret_as_s32 ( v_absdiff ( a , b ) ) ; }
# endif
@ -299,7 +299,7 @@ template<typename T1, typename Tvec>
struct op_or
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a | b ; }
{ return v_or ( a , b ) ; }
static inline T1 r ( T1 a , T1 b )
{ return a | b ; }
} ;
@ -307,7 +307,7 @@ template<typename T1, typename Tvec>
struct op_xor
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a ^ b ; }
{ return v_xor ( a , b ) ; }
static inline T1 r ( T1 a , T1 b )
{ return a ^ b ; }
} ;
@ -315,7 +315,7 @@ template<typename T1, typename Tvec>
struct op_and
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a & b ; }
{ return v_and ( a , b ) ; }
static inline T1 r ( T1 a , T1 b )
{ return a & b ; }
} ;
@ -324,14 +324,14 @@ struct op_not
{
// ignored b from loader level
static inline Tvec r ( const Tvec & a )
{ return ~ a ; }
{ return v_not ( a ) ; }
static inline T1 r ( T1 a , T1 )
{ return ~ a ; }
} ;
//////////////////////////// Loaders /////////////////////////////////
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
template < template < typename T1 , typename Tvec > class OP , typename T1 , typename Tvec >
struct bin_loader
@ -396,13 +396,13 @@ template<template<typename T1, typename Tvec> class OP, typename T1, typename Tv
static void bin_loop ( const T1 * src1 , size_t step1 , const T1 * src2 , size_t step2 , T1 * dst , size_t step , int width , int height )
{
typedef OP < T1 , Tvec > op ;
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
typedef bin_loader < OP , T1 , Tvec > ldr ;
enum { wide_step = Tvec : : nlanes } ;
const int wide_step = VTraits < Tvec > : : vlanes ( ) ;
# if !CV_NEON && CV_SIMD_WIDTH == 16
enum { wide_step_l = wide_step * 2 } ;
const int wide_step_l = wide_step * 2 ;
# else
enum { wide_step_l = wide_step } ;
const int wide_step_l = wide_step ;
# endif
# endif // CV_SIMD
@ -414,7 +414,7 @@ static void bin_loop(const T1* src1, size_t step1, const T1* src2, size_t step2,
{
int x = 0 ;
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
# if !CV_NEON && !CV_MSA
if ( is_aligned ( src1 , src2 , dst ) )
{
@ -587,7 +587,7 @@ template<typename T1, typename Tvec>
struct op_cmplt
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a < b ; }
{ return v_lt ( a , b ) ; }
static inline uchar r ( T1 a , T1 b )
{ return ( uchar ) - ( int ) ( a < b ) ; }
} ;
@ -596,7 +596,7 @@ template<typename T1, typename Tvec>
struct op_cmple
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a < = b ; }
{ return v_le ( a , b ) ; }
static inline uchar r ( T1 a , T1 b )
{ return ( uchar ) - ( int ) ( a < = b ) ; }
} ;
@ -605,7 +605,7 @@ template<typename T1, typename Tvec>
struct op_cmpeq
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a = = b ; }
{ return v_eq ( a , b ) ; }
static inline uchar r ( T1 a , T1 b )
{ return ( uchar ) - ( int ) ( a = = b ) ; }
} ;
@ -614,14 +614,14 @@ template<typename T1, typename Tvec>
struct op_cmpne
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a ! = b ; }
{ return v_ne ( a , b ) ; }
static inline uchar r ( T1 a , T1 b )
{ return ( uchar ) - ( int ) ( a ! = b ) ; }
} ;
//////////////////////////// Loaders /////////////////////////////////
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
// todo: add support for RW alignment & stream
template < int nload , template < typename T1 , typename Tvec > class OP , typename T1 , typename Tvec >
struct cmp_loader_n
@ -646,10 +646,10 @@ template<template<typename T1, typename Tvec> class OP, typename T1, typename Tv
struct cmp_loader_n < sizeof ( ushort ) , OP , T1 , Tvec >
{
typedef OP < T1 , Tvec > op ;
enum { step = Tvec : : nlanes } ;
static inline void l ( const T1 * src1 , const T1 * src2 , uchar * dst )
{
const int step = VTraits < Tvec > : : vlanes ( ) ;
Tvec c0 = op : : r ( vx_load ( src1 ) , vx_load ( src2 ) ) ;
Tvec c1 = op : : r ( vx_load ( src1 + step ) , vx_load ( src2 + step ) ) ;
v_store ( dst , v_pack_b ( v_reinterpret_as_u16 ( c0 ) , v_reinterpret_as_u16 ( c1 ) ) ) ;
@ -660,10 +660,10 @@ template<template<typename T1, typename Tvec> class OP, typename T1, typename Tv
struct cmp_loader_n < sizeof ( unsigned ) , OP , T1 , Tvec >
{
typedef OP < T1 , Tvec > op ;
enum { step = Tvec : : nlanes } ;
static inline void l ( const T1 * src1 , const T1 * src2 , uchar * dst )
{
const int step = VTraits < Tvec > : : vlanes ( ) ;
v_uint32 c0 = v_reinterpret_as_u32 ( op : : r ( vx_load ( src1 ) , vx_load ( src2 ) ) ) ;
v_uint32 c1 = v_reinterpret_as_u32 ( op : : r ( vx_load ( src1 + step ) , vx_load ( src2 + step ) ) ) ;
v_uint32 c2 = v_reinterpret_as_u32 ( op : : r ( vx_load ( src1 + step * 2 ) , vx_load ( src2 + step * 2 ) ) ) ;
@ -676,10 +676,10 @@ template<template<typename T1, typename Tvec> class OP, typename T1, typename Tv
struct cmp_loader_n < sizeof ( double ) , OP , T1 , Tvec >
{
typedef OP < T1 , Tvec > op ;
enum { step = Tvec : : nlanes } ;
static inline void l ( const T1 * src1 , const T1 * src2 , uchar * dst )
{
const int step = VTraits < Tvec > : : vlanes ( ) ;
v_uint64 c0 = v_reinterpret_as_u64 ( op : : r ( vx_load ( src1 ) , vx_load ( src2 ) ) ) ;
v_uint64 c1 = v_reinterpret_as_u64 ( op : : r ( vx_load ( src1 + step ) , vx_load ( src2 + step ) ) ) ;
v_uint64 c2 = v_reinterpret_as_u64 ( op : : r ( vx_load ( src1 + step * 2 ) , vx_load ( src2 + step * 2 ) ) ) ;
@ -701,9 +701,9 @@ template<template<typename T1, typename Tvec> class OP, typename T1, typename Tv
static void cmp_loop ( const T1 * src1 , size_t step1 , const T1 * src2 , size_t step2 , uchar * dst , size_t step , int width , int height )
{
typedef OP < T1 , Tvec > op ;
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
typedef cmp_loader_n < sizeof ( T1 ) , OP , T1 , Tvec > ldr ;
enum { wide_step = Tvec : : nlanes * sizeof ( T1 ) } ;
const int wide_step = VTraits < Tvec > : : vlanes ( ) * sizeof ( T1 ) ;
# endif // CV_SIMD
step1 / = sizeof ( T1 ) ;
@ -713,7 +713,7 @@ static void cmp_loop(const T1* src1, size_t step1, const T1* src2, size_t step2,
{
int x = 0 ;
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
for ( ; x < = width - wide_step ; x + = wide_step )
{
ldr : : l ( src1 + x , src2 + x , dst + x ) ;
@ -880,7 +880,7 @@ DEFINE_SIMD_ALL(cmp)
//////////////////////////// Loaders ///////////////////////////////
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
// todo: add support for RW alignment & stream
template < int nload , template < typename T1 , typename T2 , typename Tvec > class OP , typename T1 , typename T2 , typename Tvec >
struct scalar_loader_n
@ -1013,10 +1013,10 @@ template<template<typename T1, typename T2, typename Tvec> class OP, typename T2
struct scalar_loader_n < sizeof ( int ) , OP , int , T2 , v_int32 >
{
typedef OP < int , T2 , v_int32 > op ;
enum { step = v_int32 : : nlanes } ;
static inline void l ( const int * src1 , const int * src2 , const T2 * scalar , int * dst )
{
const int step = VTraits < v_int32 > : : vlanes ( ) ;
v_int32 v_src1 = vx_load ( src1 ) ;
v_int32 v_src2 = vx_load ( src2 ) ;
v_int32 v_src1s = vx_load ( src1 + step ) ;
@ -1043,6 +1043,7 @@ struct scalar_loader_n<sizeof(int), OP, int, T2, v_int32>
static inline void l ( const int * src1 , const T2 * scalar , int * dst )
{
const int step = VTraits < v_int32 > : : vlanes ( ) ;
v_int32 v_src1 = vx_load ( src1 ) ;
v_int32 v_src1s = vx_load ( src1 + step ) ;
@ -1068,10 +1069,9 @@ template<template<typename T1, typename T2, typename Tvec> class OP, typename T2
struct scalar_loader_n < sizeof ( float ) , OP , float , T2 , v_float32 >
{
typedef OP < float , T2 , v_float32 > op ;
enum { step = v_float32 : : nlanes } ;
static inline void l ( const float * src1 , const float * src2 , const T2 * scalar , float * dst )
{
const int step = VTraits < v_float32 > : : vlanes ( ) ;
v_float32 v_src1 = vx_load ( src1 ) ;
v_float32 v_src2 = vx_load ( src2 ) ;
v_float32 v_src1s = vx_load ( src1 + step ) ;
@ -1086,6 +1086,7 @@ struct scalar_loader_n<sizeof(float), OP, float, T2, v_float32>
static inline void l ( const float * src1 , const T2 * scalar , float * dst )
{
const int step = VTraits < v_float32 > : : vlanes ( ) ;
v_float32 v_src1 = vx_load ( src1 ) ;
v_float32 v_src1s = vx_load ( src1 + step ) ;
@ -1262,10 +1263,10 @@ static void scalar_loop(const T1* src1, size_t step1, const T1* src2, size_t ste
T1 * dst , size_t step , int width , int height , const T2 * scalar )
{
typedef OP < T1 , T2 , Tvec > op ;
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
typedef scalar_loader_n < sizeof ( T1 ) , OP , T1 , T2 , Tvec > ldr ;
const int wide_step = sizeof ( T1 ) > sizeof ( ushort ) ? Tvec : : nlanes * 2 :
sizeof ( T1 ) = = sizeof ( uchar ) ? Tvec : : nlanes / 2 : Tvec : : nlanes ;
const int wide_step = sizeof ( T1 ) > sizeof ( ushort ) ? VTraits < Tvec > : : vlanes ( ) * 2 :
sizeof ( T1 ) = = sizeof ( uchar ) ? VTraits < Tvec > : : vlanes ( ) / 2 : VTraits < Tvec > : : vlanes ( ) ;
# endif // CV_SIMD
step1 / = sizeof ( T1 ) ;
@ -1276,7 +1277,7 @@ static void scalar_loop(const T1* src1, size_t step1, const T1* src2, size_t ste
{
int x = 0 ;
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
for ( ; x < = width - wide_step ; x + = wide_step )
{
ldr : : l ( src1 + x , src2 + x , scalar , dst + x ) ;
@ -1308,10 +1309,10 @@ template<template<typename T1, typename T2, typename Tvec> class OP, typename T1
static void scalar_loop ( const T1 * src1 , size_t step1 , T1 * dst , size_t step , int width , int height , const T2 * scalar )
{
typedef OP < T1 , T2 , Tvec > op ;
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
typedef scalar_loader_n < sizeof ( T1 ) , OP , T1 , T2 , Tvec > ldr ;
const int wide_step = sizeof ( T1 ) > sizeof ( ushort ) ? Tvec : : nlanes * 2 :
sizeof ( T1 ) = = sizeof ( uchar ) ? Tvec : : nlanes / 2 : Tvec : : nlanes ;
const int wide_step = sizeof ( T1 ) > sizeof ( ushort ) ? VTraits < Tvec > : : vlanes ( ) * 2 :
sizeof ( T1 ) = = sizeof ( uchar ) ? VTraits < Tvec > : : vlanes ( ) / 2 : VTraits < Tvec > : : vlanes ( ) ;
# endif // CV_SIMD
step1 / = sizeof ( T1 ) ;
@ -1321,7 +1322,7 @@ static void scalar_loop(const T1* src1, size_t step1, T1* dst, size_t step, int
{
int x = 0 ;
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
for ( ; x < = width - wide_step ; x + = wide_step )
{
ldr : : l ( src1 + x , scalar , dst + x ) ;
@ -1428,7 +1429,7 @@ template<typename T1, typename Tvec>
struct op_mul
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a * b ; }
{ return v_mul ( a , b ) ; }
static inline T1 r ( T1 a , T1 b )
{ return saturate_cast < T1 > ( a * b ) ; }
} ;
@ -1436,11 +1437,11 @@ struct op_mul
template < typename T1 , typename T2 , typename Tvec >
struct op_mul_scale
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_float32 r ( const v_float32 & a , const v_float32 & b , const T2 * scalar )
{
const v_float32 v_scalar = vx_setall_f32 ( * scalar ) ;
return v_scalar * a * b ;
return v_mul ( v_scalar , a , b ) ;
}
# endif
static inline T1 r ( T1 a , T1 b , const T2 * scalar )
@ -1456,7 +1457,7 @@ struct op_mul_scale<double, double, v_float64>
static inline v_float64 r ( const v_float64 & a , const v_float64 & b , const double * scalar )
{
const v_float64 v_scalar = vx_setall_f64 ( * scalar ) ;
return v_scalar * a * b ;
return v_mul ( v_mul ( v_scalar , a ) , b ) ;
}
# endif
static inline double r ( double a , double b , const double * scalar )
@ -1569,7 +1570,7 @@ template<typename T1, typename Tvec>
struct op_div_f
{
static inline Tvec r ( const Tvec & a , const Tvec & b )
{ return a / b ; }
{ return v_div ( a , b ) ; }
static inline T1 r ( T1 a , T1 b )
{ return a / b ; }
} ;
@ -1577,16 +1578,16 @@ struct op_div_f
template < typename T1 , typename T2 , typename Tvec >
struct op_div_scale
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_float32 r ( const v_float32 & a , const v_float32 & b , const T2 * scalar )
{
const v_float32 v_scalar = vx_setall_f32 ( * scalar ) ;
return a * v_scalar / b ;
return v_div ( v_mul ( a , v_scalar ) , b ) ;
}
static inline Tvec pre ( const Tvec & denom , const Tvec & res )
{
const Tvec v_zero = vx_setall < typename Tvec : : lane_type > ( 0 ) ;
return v_select ( denom = = v_zero , v_zero , res ) ;
const Tvec v_zero = vx_setall < typename VTraits < Tvec > : : lane_type > ( 0 ) ;
return v_select ( v_eq ( denom , v_zero ) , v_zero , res ) ;
}
# endif
static inline T1 r ( T1 a , T1 denom , const T2 * scalar )
@ -1599,11 +1600,11 @@ struct op_div_scale
template < >
struct op_div_scale < float , float , v_float32 >
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_float32 r ( const v_float32 & a , const v_float32 & b , const float * scalar )
{
const v_float32 v_scalar = vx_setall_f32 ( * scalar ) ;
return a * v_scalar / b ;
return v_div ( v_mul ( a , v_scalar ) , b ) ;
}
# endif
static inline float r ( float a , float denom , const float * scalar )
@ -1617,7 +1618,7 @@ struct op_div_scale<double, double, v_float64>
static inline v_float64 r ( const v_float64 & a , const v_float64 & b , const double * scalar )
{
const v_float64 v_scalar = vx_setall_f64 ( * scalar ) ;
return a * v_scalar / b ;
return v_div ( v_mul ( a , v_scalar ) , b ) ;
}
# endif
static inline double r ( double a , double denom , const double * scalar )
@ -1685,7 +1686,7 @@ DEFINE_SIMD_ALL(div, div_loop)
template < typename T1 , typename T2 , typename Tvec >
struct op_add_scale
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_float32 r ( const v_float32 & a , const v_float32 & b , const T2 * scalar )
{
const v_float32 v_alpha = vx_setall_f32 ( * scalar ) ;
@ -1718,7 +1719,7 @@ struct op_add_scale<double, double, v_float64>
template < typename T1 , typename T2 , typename Tvec >
struct op_add_weighted
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_float32 r ( const v_float32 & a , const v_float32 & b , const T2 * scalars )
{
const v_float32 v_alpha = vx_setall_f32 ( scalars [ 0 ] ) ;
@ -1835,16 +1836,16 @@ DEFINE_SIMD_F64(addWeighted, add_weighted_loop_d)
template < typename T1 , typename T2 , typename Tvec >
struct op_recip
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_float32 r ( const v_float32 & a , const T2 * scalar )
{
const v_float32 v_scalar = vx_setall_f32 ( * scalar ) ;
return v_scalar / a ;
return v_div ( v_scalar , a ) ;
}
static inline Tvec pre ( const Tvec & denom , const Tvec & res )
{
const Tvec v_zero = vx_setall < typename Tvec : : lane_type > ( 0 ) ;
return v_select ( denom = = v_zero , v_zero , res ) ;
const Tvec v_zero = vx_setall < typename VTraits < Tvec > : : lane_type > ( 0 ) ;
return v_select ( v_eq ( denom , v_zero ) , v_zero , res ) ;
}
# endif
static inline T1 r ( T1 denom , const T2 * scalar )
@ -1857,11 +1858,11 @@ struct op_recip
template < >
struct op_recip < float , float , v_float32 >
{
# if CV_SIMD
# if CV_SIMD || CV_SIMD_SCALABLE
static inline v_float32 r ( const v_float32 & a , const float * scalar )
{
const v_float32 v_scalar = vx_setall_f32 ( * scalar ) ;
return v_scalar / a ;
return v_div ( v_scalar , a ) ;
}
# endif
static inline float r ( float denom , const float * scalar )
@ -1875,7 +1876,7 @@ struct op_recip<double, double, v_float64>
static inline v_float64 r ( const v_float64 & a , const double * scalar )
{
const v_float64 v_scalar = vx_setall_f64 ( * scalar ) ;
return v_scalar / a ;
return v_div ( v_scalar , a ) ;
}
# endif
static inline double r ( double denom , const double * scalar )