@ -16,8 +16,10 @@ CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
void cvt16f32f ( const float16_t * src , float * dst , int len ) ;
void cvt32f16f ( const float * src , float16_t * dst , int len ) ;
void addRNGBias32f ( float * arr , const float * scaleBiasPairs , int len ) ;
void addRNGBias64f ( double * arr , const double * scaleBiasPairs , int len ) ;
void cvt16bf32f ( const bfloat16_t * src , float * dst , int len ) ;
void cvt32f16bf ( const float * src , bfloat16_t * dst , int len ) ;
void addRNGBias32f ( float * arr , const float * scaleBiasPairs , int len , int cn ) ;
void addRNGBias64f ( double * arr , const double * scaleBiasPairs , int len , int cn ) ;
CV_CPU_OPTIMIZATION_NAMESPACE_END
} // namespace cv::hal
@ -77,20 +79,63 @@ void cvt32f16f( const float* src, float16_t* dst, int len )
dst [ j ] = float16_t ( src [ j ] ) ;
}
void addRNGBias32f ( float * arr , const float * scaleBiasPairs , int len )
void cvt32f16bf ( const float * src , bfloat16_t * dst , int len )
{
CV_INSTRUMENT_REGION ( ) ;
// the loop is simple enough, so we let the compiler to vectorize it
for ( int i = 0 ; i < len ; i + + )
arr [ i ] + = scaleBiasPairs [ i * 2 + 1 ] ;
int j = 0 ;
# if CV_SIMD
const int VECSZ = v_float32 : : nlanes ;
for ( ; j < len ; j + = VECSZ )
{
if ( j > len - VECSZ )
{
if ( j = = 0 )
break ;
j = len - VECSZ ;
}
v_pack_store ( dst + j , vx_load ( src + j ) ) ;
}
# endif
for ( ; j < len ; j + + )
dst [ j ] = bfloat16_t ( src [ j ] ) ;
}
void addRNGBias64f ( double * arr , const double * scaleBiasPairs , int len )
void addRNGBias32f ( float * arr , const float * scaleBiasPairs , int len , int c n )
{
CV_INSTRUMENT_REGION ( ) ;
// the loop is simple enough, so we let the compiler to vectorize it
for ( int i = 0 ; i < len ; i + + )
arr [ i ] + = scaleBiasPairs [ i * 2 + 1 ] ;
if ( cn = = 1 ) {
float bias = scaleBiasPairs [ 1 ] ;
for ( int i = 0 ; i < len ; i + + ) {
arr [ i ] + = bias ;
}
} else {
int k = 0 ;
len * = cn ;
cn - - ;
for ( int i = 0 ; i < len ; i + + ) {
arr [ i ] + = scaleBiasPairs [ k * 2 + 1 ] ;
k = ( k + 1 ) & ( ( k > = cn ) - 1 ) ;
}
}
}
void addRNGBias64f ( double * arr , const double * scaleBiasPairs , int len , int cn )
{
CV_INSTRUMENT_REGION ( ) ;
if ( cn = = 1 ) {
double bias = scaleBiasPairs [ 1 ] ;
for ( int i = 0 ; i < len ; i + + ) {
arr [ i ] + = bias ;
}
} else {
int k = 0 ;
len * = cn ;
cn - - ;
for ( int i = 0 ; i < len ; i + + ) {
arr [ i ] + = scaleBiasPairs [ k * 2 + 1 ] ;
k = ( k + 1 ) & ( ( k > = cn ) - 1 ) ;
}
}
}
CV_CPU_OPTIMIZATION_NAMESPACE_END
@ -128,6 +173,35 @@ cvt_( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size )
}
}
template < typename _Ts , typename _Td , typename dummy > static inline void
cvt_64f ( const _Ts * src , size_t sstep , _Td * dst , size_t dstep , Size size )
{
sstep / = sizeof ( src [ 0 ] ) ;
dstep / = sizeof ( dst [ 0 ] ) ;
for ( int i = 0 ; i < size . height ; i + + , src + = sstep , dst + = dstep )
{
int j = 0 ;
# if CV_SIMD_64F
const int VECSZ = v_float64 : : nlanes * 2 ;
for ( ; j < size . width ; j + = VECSZ )
{
if ( j > size . width - VECSZ )
{
if ( j = = 0 | | src = = ( _Ts * ) dst )
break ;
j = size . width - VECSZ ;
}
v_float64 v0 , v1 ;
vx_load_pair_as ( src + j , v0 , v1 ) ;
v_store_pair_as ( dst + j , v0 , v1 ) ;
}
# endif
for ( ; j < size . width ; j + + )
dst [ j ] = saturate_cast < _Td > ( src [ j ] ) ;
}
}
// in order to reduce the code size, for (16f <-> ...) conversions
// we add a conversion function without loop unrolling
template < typename _Ts , typename _Td , typename _Twvec > static inline void
@ -180,25 +254,102 @@ static void cvt##suffix(const uchar* src_, size_t sstep, const uchar*, size_t, \
cvtfunc < _Ts , _Td , _Twvec > ( src , sstep , dst , dstep , size ) ; \
}
# define DEF_CVT2BOOL_FUNC(suffix, _Ts, shift) \
static void cvt # # suffix ( const uchar * src_ , size_t sstep , const uchar * , size_t , \
uchar * dst , size_t dstep , Size size , void * ) \
{ \
CV_INSTRUMENT_REGION ( ) ; \
const _Ts * src = ( const _Ts * ) src_ ; \
sstep / = sizeof ( src [ 0 ] ) ; \
\
for ( int i = 0 ; i < size . height ; i + + , src + = sstep , dst + = dstep ) { \
for ( int j = 0 ; j < size . width ; j + + ) \
dst [ j ] = ( src [ j ] < < shift ) ! = 0 ; \
} \
}
# define DEF_CVTBOOL2_FUNC(suffix, _Td, scale) \
static void cvt # # suffix ( const uchar * src , size_t sstep , const uchar * , size_t , \
uchar * dst_ , size_t dstep , Size size , void * ) \
{ \
CV_INSTRUMENT_REGION ( ) ; \
_Td * dst = ( _Td * ) dst_ ; \
dstep / = sizeof ( dst [ 0 ] ) ; \
\
for ( int i = 0 ; i < size . height ; i + + , src + = sstep , dst + = dstep ) { \
for ( int j = 0 ; j < size . width ; j + + ) \
dst [ j ] = ( _Td ) ( ( src [ j ] ! = 0 ) * scale ) ; \
} \
}
# define DEF_CVT_SCALAR_FUNC(suffix, _Ts, _Td) \
static void cvt # # suffix ( const uchar * src_ , size_t sstep , const uchar * , size_t , \
uchar * dst_ , size_t dstep , Size size , void * ) \
{ \
CV_INSTRUMENT_REGION ( ) ; \
const _Ts * src = ( const _Ts * ) src_ ; \
_Td * dst = ( _Td * ) dst_ ; \
sstep / = sizeof ( src [ 0 ] ) ; \
dstep / = sizeof ( dst [ 0 ] ) ; \
\
for ( int i = 0 ; i < size . height ; i + + , src + = sstep , dst + = dstep ) { \
for ( int j = 0 ; j < size . width ; j + + ) \
dst [ j ] = saturate_cast < _Td > ( src [ j ] ) ; \
} \
}
# define DEF_CVT_SCALAR_FUNC_S2U(suffix, _Ts, _Td, _Tw) \
static void cvt # # suffix ( const uchar * src_ , size_t sstep , const uchar * , size_t , \
uchar * dst_ , size_t dstep , Size size , void * ) \
{ \
CV_INSTRUMENT_REGION ( ) ; \
const _Ts * src = ( const _Ts * ) src_ ; \
_Td * dst = ( _Td * ) dst_ ; \
sstep / = sizeof ( src [ 0 ] ) ; \
dstep / = sizeof ( dst [ 0 ] ) ; \
\
for ( int i = 0 ; i < size . height ; i + + , src + = sstep , dst + = dstep ) { \
for ( int j = 0 ; j < size . width ; j + + ) \
dst [ j ] = saturate_cast < _Td > ( std : : max ( ( _Tw ) src [ j ] , ( _Tw ) 0 ) ) ; \
} \
}
////////////////////// 8u -> ... ////////////////////////
DEF_CVT_FUNC ( 8u 8 s , cvt_ , uchar , schar , v_int16 )
DEF_CVT_FUNC ( 8u 16u , cvt_ , uchar , ushort , v_uint16 )
DEF_CVT_FUNC ( 8u 16 s , cvt_ , uchar , short , v_int16 )
DEF_CVT_FUNC ( 8u 32 s , cvt_ , uchar , int , v_int32 )
DEF_CVT_FUNC ( 8u 32f , cvt_ , uchar , float , v_float32 )
DEF_CVT_FUNC ( 8u 64f , cvt_ , uchar , double , v_int32 )
DEF_CVT_SCALAR_FUNC ( 8u 64 s , uchar , int64_t )
DEF_CVT_FUNC ( 8u 16f , cvt1_ , uchar , float16_t , v_float32 )
DEF_CVT_FUNC ( 8u 16 bf , cvt1_ , uchar , bfloat16_t , v_float32 )
DEF_CVT2BOOL_FUNC ( 8u 8 b , uchar , 0 )
////////////////////// 8s -> ... ////////////////////////
DEF_CVT_FUNC ( 8 s8u , cvt_ , schar , uchar , v_int16 )
DEF_CVT_FUNC ( 8 s16u , cvt_ , schar , ushort , v_uint16 )
DEF_CVT_FUNC ( 8 s16s , cvt_ , schar , short , v_int16 )
DEF_CVT_FUNC ( 8 s32u , cvt_ , schar , unsigned , v_uint32 )
DEF_CVT_FUNC ( 8 s32s , cvt_ , schar , int , v_int32 )
DEF_CVT_FUNC ( 8 s32f , cvt_ , schar , float , v_float32 )
DEF_CVT_FUNC ( 8 s64f , cvt_ , schar , double , v_int32 )
DEF_CVT_FUNC ( 8 s64u , cvt_ , schar , uint64_t , v_uint32 )
DEF_CVT_FUNC ( 8 s64s , cvt_ , schar , int64_t , v_int32 )
DEF_CVT_FUNC ( 8 s16f , cvt1_ , schar , float16_t , v_float32 )
DEF_CVT_FUNC ( 8 s16bf , cvt1_ , schar , bfloat16_t , v_float32 )
////////////////////// 8b -> ... ////////////////////////
DEF_CVTBOOL2_FUNC ( 8 b8u , uchar , 1 )
DEF_CVTBOOL2_FUNC ( 8 b16s , short , 1 )
DEF_CVTBOOL2_FUNC ( 8 b32s , int , 1 )
DEF_CVTBOOL2_FUNC ( 8 b32f , float , 1 )
DEF_CVTBOOL2_FUNC ( 8 b64f , double , 1 )
DEF_CVTBOOL2_FUNC ( 8 b64s , int64_t , 1 )
DEF_CVTBOOL2_FUNC ( 8 b16f , uint16_t , 0x3c00 ) // float16_t(1.0f)
DEF_CVTBOOL2_FUNC ( 8 b16bf , uint16_t , 0x3f80 ) // bfloat16_t(1.0f)
////////////////////// 16u -> ... ////////////////////////
@ -208,17 +359,37 @@ DEF_CVT_FUNC(16u16s, cvt_, ushort, short, v_int32)
DEF_CVT_FUNC ( 16u 32 s , cvt_ , ushort , int , v_int32 )
DEF_CVT_FUNC ( 16u 32f , cvt_ , ushort , float , v_float32 )
DEF_CVT_FUNC ( 16u 64f , cvt_ , ushort , double , v_int32 )
DEF_CVT_SCALAR_FUNC ( 16u 64 s , ushort , int64_t )
DEF_CVT_FUNC ( 16u 16f , cvt1_ , ushort , float16_t , v_float32 )
DEF_CVT_FUNC ( 16u 16 bf , cvt1_ , ushort , bfloat16_t , v_float32 )
////////////////////// 16s -> ... ////////////////////////
DEF_CVT_FUNC ( 16 s8u , cvt_ , short , uchar , v_int16 )
DEF_CVT_FUNC ( 16 s8s , cvt_ , short , schar , v_int16 )
DEF_CVT_FUNC ( 16 s16u , cvt_ , short , ushort , v_int32 )
DEF_CVT_FUNC ( 16 s32u , cvt_ , short , unsigned , v_uint32 )
DEF_CVT_FUNC ( 16 s32s , cvt_ , short , int , v_int32 )
DEF_CVT_FUNC ( 16 s32f , cvt_ , short , float , v_float32 )
DEF_CVT_FUNC ( 16 s64f , cvt_ , short , double , v_int32 )
DEF_CVT_FUNC ( 16 s64u , cvt_ , short , uint64_t , v_uint32 )
DEF_CVT_FUNC ( 16 s64s , cvt_ , short , int64_t , v_int32 )
DEF_CVT_FUNC ( 16 s16f , cvt1_ , short , float16_t , v_float32 )
DEF_CVT_FUNC ( 16 s16bf , cvt1_ , short , bfloat16_t , v_float32 )
DEF_CVT2BOOL_FUNC ( 16 s8b , short , 0 )
////////////////////// 32u -> ... ////////////////////////
DEF_CVT_FUNC ( 32u 8u , cvt_ , unsigned , uchar , v_uint32 )
DEF_CVT_FUNC ( 32u 8 s , cvt_ , unsigned , schar , v_int32 )
DEF_CVT_FUNC ( 32u 16u , cvt_ , unsigned , ushort , v_uint32 )
DEF_CVT_FUNC ( 32u 16 s , cvt_ , unsigned , short , v_int32 )
DEF_CVT_SCALAR_FUNC ( 32u 32 s , unsigned , int )
DEF_CVT_FUNC ( 32u 32f , cvt_ , unsigned , float , v_float32 )
DEF_CVT_FUNC ( 32u 64f , cvt_ , unsigned , double , v_float32 )
DEF_CVT_SCALAR_FUNC ( 32u 64 s , unsigned , int64_t )
DEF_CVT_FUNC ( 32u 16f , cvt1_ , unsigned , float16_t , v_float32 )
DEF_CVT_FUNC ( 32u 16 bf , cvt1_ , int , bfloat16_t , v_float32 )
////////////////////// 32s -> ... ////////////////////////
@ -226,9 +397,14 @@ DEF_CVT_FUNC(32s8u, cvt_, int, uchar, v_int32)
DEF_CVT_FUNC ( 32 s8s , cvt_ , int , schar , v_int32 )
DEF_CVT_FUNC ( 32 s16u , cvt_ , int , ushort , v_int32 )
DEF_CVT_FUNC ( 32 s16s , cvt_ , int , short , v_int32 )
DEF_CVT_FUNC ( 32 s32u , cvt_ , int , unsigned , v_uint32 )
DEF_CVT_FUNC ( 32 s32f , cvt_ , int , float , v_float32 )
DEF_CVT_FUNC ( 32 s64f , cvt_ , int , double , v_int32 )
DEF_CVT_FUNC ( 32 s64u , cvt_ , int , uint64_t , v_uint32 )
DEF_CVT_FUNC ( 32 s64s , cvt_ , int , int64_t , v_int32 )
DEF_CVT_FUNC ( 32 s16f , cvt1_ , int , float16_t , v_float32 )
DEF_CVT_FUNC ( 32 s16bf , cvt1_ , int , bfloat16_t , v_float32 )
DEF_CVT2BOOL_FUNC ( 32 s8b , int , 0 )
////////////////////// 32f -> ... ////////////////////////
@ -236,9 +412,14 @@ DEF_CVT_FUNC(32f8u, cvt_, float, uchar, v_float32)
DEF_CVT_FUNC ( 32f 8 s , cvt_ , float , schar , v_float32 )
DEF_CVT_FUNC ( 32f 16u , cvt_ , float , ushort , v_float32 )
DEF_CVT_FUNC ( 32f 16 s , cvt_ , float , short , v_float32 )
DEF_CVT_FUNC ( 32f 32u , cvt_ , float , unsigned , v_float32 )
DEF_CVT_FUNC ( 32f 32 s , cvt_ , float , int , v_float32 )
DEF_CVT_FUNC ( 32f 64f , cvt_ , float , double , v_float32 )
DEF_CVT_FUNC ( 32f 64u , cvt_64f , float , uint64_t , v_float64 )
DEF_CVT_FUNC ( 32f 64 s , cvt_64f , float , int64_t , v_float64 )
DEF_CVT_FUNC ( 32f 16f , cvt1_ , float , float16_t , v_float32 )
DEF_CVT_FUNC ( 32f 16 bf , cvt1_ , float , bfloat16_t , v_float32 )
DEF_CVT2BOOL_FUNC ( 32f 8 b , int , 1 )
////////////////////// 64f -> ... ////////////////////////
@ -246,9 +427,14 @@ DEF_CVT_FUNC(64f8u, cvt_, double, uchar, v_int32)
DEF_CVT_FUNC ( 64f 8 s , cvt_ , double , schar , v_int32 )
DEF_CVT_FUNC ( 64f 16u , cvt_ , double , ushort , v_int32 )
DEF_CVT_FUNC ( 64f 16 s , cvt_ , double , short , v_int32 )
DEF_CVT_FUNC ( 64f 32u , cvt_64f , double , unsigned , v_float32 )
DEF_CVT_FUNC ( 64f 32 s , cvt_ , double , int , v_int32 )
DEF_CVT_FUNC ( 64f 32f , cvt_ , double , float , v_float32 )
DEF_CVT_FUNC ( 64f 64u , cvt_64f , double , uint64_t , v_float64 )
DEF_CVT_FUNC ( 64f 64 s , cvt_64f , double , int64_t , v_float32 )
DEF_CVT_FUNC ( 64f 16f , cvt1_ , double , float16_t , v_float32 )
DEF_CVT_FUNC ( 64f 16 bf , cvt1_ , double , bfloat16_t , v_float32 )
DEF_CVT2BOOL_FUNC ( 64f 8 b , int64_t , 1 )
////////////////////// 16f -> ... ////////////////////////
@ -256,9 +442,56 @@ DEF_CVT_FUNC(16f8u, cvt_, float16_t, uchar, v_float32)
DEF_CVT_FUNC ( 16f 8 s , cvt_ , float16_t , schar , v_float32 )
DEF_CVT_FUNC ( 16f 16u , cvt1_ , float16_t , ushort , v_float32 )
DEF_CVT_FUNC ( 16f 16 s , cvt1_ , float16_t , short , v_float32 )
DEF_CVT_FUNC ( 16f 32u , cvt1_ , float16_t , unsigned , v_float32 )
DEF_CVT_FUNC ( 16f 32 s , cvt1_ , float16_t , int , v_float32 )
DEF_CVT_FUNC ( 16f 32f , cvt1_ , float16_t , float , v_float32 )
DEF_CVT_FUNC ( 16f 64f , cvt1_ , float16_t , double , v_float32 )
DEF_CVT_FUNC ( 16f 64u , cvt1_ , float16_t , uint64_t , v_float32 )
DEF_CVT_FUNC ( 16f 64 s , cvt1_ , float16_t , int64_t , v_float32 )
DEF_CVT_FUNC ( 16f 16 bf , cvt1_ , float16_t , bfloat16_t , v_float32 )
DEF_CVT2BOOL_FUNC ( 16f 8 b , short , 1 )
////////////////////// 16bf -> ... ////////////////////////
DEF_CVT_FUNC ( 16 bf8u , cvt_ , bfloat16_t , uchar , v_float32 )
DEF_CVT_FUNC ( 16 bf8s , cvt_ , bfloat16_t , schar , v_float32 )
DEF_CVT_FUNC ( 16 bf16u , cvt1_ , bfloat16_t , ushort , v_float32 )
DEF_CVT_FUNC ( 16 bf16s , cvt1_ , bfloat16_t , short , v_float32 )
DEF_CVT_FUNC ( 16 bf32u , cvt1_ , bfloat16_t , unsigned , v_float32 )
DEF_CVT_FUNC ( 16 bf32s , cvt1_ , bfloat16_t , int , v_float32 )
DEF_CVT_FUNC ( 16 bf32f , cvt1_ , bfloat16_t , float , v_float32 )
DEF_CVT_FUNC ( 16 bf64f , cvt1_ , bfloat16_t , double , v_float32 )
DEF_CVT_FUNC ( 16 bf64u , cvt1_ , bfloat16_t , uint64_t , v_float32 )
DEF_CVT_FUNC ( 16 bf64s , cvt1_ , bfloat16_t , int64_t , v_float32 )
DEF_CVT_FUNC ( 16 bf16f , cvt1_ , bfloat16_t , float16_t , v_float32 )
////////////////////// 64s -> ... ////////////////////////
DEF_CVT_FUNC ( 64 s8u , cvt_ , int64_t , uchar , v_int32 )
DEF_CVT_FUNC ( 64 s8s , cvt_ , int64_t , schar , v_int32 )
DEF_CVT_FUNC ( 64 s16u , cvt_ , int64_t , ushort , v_int32 )
DEF_CVT_FUNC ( 64 s16s , cvt_ , int64_t , short , v_int32 )
DEF_CVT_FUNC ( 64 s32u , cvt_ , int64_t , unsigned , v_uint32 )
DEF_CVT_FUNC ( 64 s32s , cvt_ , int64_t , int , v_int32 )
DEF_CVT_FUNC ( 64 s32f , cvt_64f , int64_t , float , v_float32 )
DEF_CVT_FUNC ( 64 s64f , cvt_64f , int64_t , double , v_float64 )
DEF_CVT_FUNC ( 64 s64u , cvt_ , int64_t , uint64_t , v_uint64 )
DEF_CVT_FUNC ( 64 s16f , cvt1_ , int64_t , float16_t , v_float32 )
DEF_CVT_FUNC ( 64 s16bf , cvt1_ , int64_t , bfloat16_t , v_float32 )
DEF_CVT2BOOL_FUNC ( 64 s8b , int64_t , 0 )
////////////////////// 64u -> ... ////////////////////////
DEF_CVT_FUNC ( 64u 8u , cvt_ , uint64_t , uchar , v_int32 )
DEF_CVT_FUNC ( 64u 8 s , cvt_ , uint64_t , schar , v_int32 )
DEF_CVT_FUNC ( 64u 16u , cvt_ , uint64_t , ushort , v_int32 )
DEF_CVT_FUNC ( 64u 16 s , cvt_ , uint64_t , short , v_int32 )
DEF_CVT_FUNC ( 64u 32u , cvt_ , uint64_t , unsigned , v_uint32 )
DEF_CVT_FUNC ( 64u 32 s , cvt_ , uint64_t , int , v_int32 )
DEF_CVT_FUNC ( 64u 32f , cvt_64f , uint64_t , float , v_float64 )
DEF_CVT_FUNC ( 64u 64f , cvt_64f , uint64_t , double , v_float64 )
DEF_CVT_FUNC ( 64u 16f , cvt1_ , uint64_t , float16_t , v_float32 )
DEF_CVT_FUNC ( 64u 16 bf , cvt1_ , uint64_t , bfloat16_t , v_float32 )
///////////// "conversion" w/o conversion ///////////////
@ -274,147 +507,210 @@ static void cvt32s(const uchar* src, size_t sstep, const uchar*, size_t, uchar*
static void cvt64s ( const uchar * src , size_t sstep , const uchar * , size_t , uchar * dst , size_t dstep , Size size , void * )
{ CV_INSTRUMENT_REGION ( ) ; cvtCopy ( ( const uchar * ) src , sstep , ( uchar * ) dst , dstep , size , 8 ) ; }
/* [TODO] Recover IPP calls
# if defined(HAVE_IPP)
# define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \
static void cvt # # suffix ( const stype * src , size_t sstep , const uchar * , size_t , \
dtype * dst , size_t dstep , Size size , double * ) \
{ \
CV_IPP_RUN ( src & & dst , CV_INSTRUMENT_FUN_IPP ( ippiConvert_ # # ippFavor , src , ( int ) sstep , dst , ( int ) dstep , ippiSize ( size . width , size . height ) ) > = 0 ) \
cvt_ ( src , sstep , dst , dstep , size ) ; \
}
# define DEF_CVT_FUNC_F2(suffix, stype, dtype, ippFavor) \
static void cvt # # suffix ( const stype * src , size_t sstep , const uchar * , size_t , \
dtype * dst , size_t dstep , Size size , double * ) \
{ \
CV_IPP_RUN ( src & & dst , CV_INSTRUMENT_FUN_IPP ( ippiConvert_ # # ippFavor , src , ( int ) sstep , dst , ( int ) dstep , ippiSize ( size . width , size . height ) , ippRndFinancial , 0 ) > = 0 ) \
cvt_ ( src , sstep , dst , dstep , size ) ; \
}
# else
# define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \
static void cvt # # suffix ( const stype * src , size_t sstep , const uchar * , size_t , \
dtype * dst , size_t dstep , Size size , double * ) \
{ \
cvt_ ( src , sstep , dst , dstep , size ) ; \
}
# define DEF_CVT_FUNC_F2 DEF_CVT_FUNC_F
# endif
# define DEF_CVT_FUNC(suffix, stype, dtype) \
static void cvt # # suffix ( const stype * src , size_t sstep , const uchar * , size_t , \
dtype * dst , size_t dstep , Size size , double * ) \
{ \
cvt_ ( src , sstep , dst , dstep , size ) ; \
}
# define DEF_CPY_FUNC(suffix, stype) \
static void cvt # # suffix ( const stype * src , size_t sstep , const uchar * , size_t , \
stype * dst , size_t dstep , Size size , double * ) \
{ \
cpy_ ( src , sstep , dst , dstep , size ) ; \
}
DEF_CPY_FUNC ( 8u , uchar )
DEF_CVT_FUNC_F ( 8 s8u , schar , uchar , 8 s8u_C1Rs )
DEF_CVT_FUNC_F ( 16u 8u , ushort , uchar , 16u 8u _C1R )
DEF_CVT_FUNC_F ( 16 s8u , short , uchar , 16 s8u_C1R )
DEF_CVT_FUNC_F ( 32 s8u , int , uchar , 32 s8u_C1R )
DEF_CVT_FUNC_F2 ( 32f 8u , float , uchar , 32f 8u _C1RSfs )
DEF_CVT_FUNC ( 64f 8u , double , uchar )
DEF_CVT_FUNC_F2 ( 8u 8 s , uchar , schar , 8u 8 s_C1RSfs )
DEF_CVT_FUNC_F2 ( 16u 8 s , ushort , schar , 16u 8 s_C1RSfs )
DEF_CVT_FUNC_F2 ( 16 s8s , short , schar , 16 s8s_C1RSfs )
DEF_CVT_FUNC_F ( 32 s8s , int , schar , 32 s8s_C1R )
DEF_CVT_FUNC_F2 ( 32f 8 s , float , schar , 32f 8 s_C1RSfs )
DEF_CVT_FUNC ( 64f 8 s , double , schar )
DEF_CVT_FUNC_F ( 8u 16u , uchar , ushort , 8u 16u _C1R )
DEF_CVT_FUNC_F ( 8 s16u , schar , ushort , 8 s16u_C1Rs )
DEF_CPY_FUNC ( 16u , ushort )
DEF_CVT_FUNC_F ( 16 s16u , short , ushort , 16 s16u_C1Rs )
DEF_CVT_FUNC_F2 ( 32 s16u , int , ushort , 32 s16u_C1RSfs )
DEF_CVT_FUNC_F2 ( 32f 16u , float , ushort , 32f 16u _C1RSfs )
DEF_CVT_FUNC ( 64f 16u , double , ushort )
DEF_CVT_FUNC_F ( 8u 16 s , uchar , short , 8u 16 s_C1R )
DEF_CVT_FUNC_F ( 8 s16s , schar , short , 8 s16s_C1R )
DEF_CVT_FUNC_F2 ( 16u 16 s , ushort , short , 16u 16 s_C1RSfs )
DEF_CVT_FUNC_F2 ( 32 s16s , int , short , 32 s16s_C1RSfs )
DEF_CVT_FUNC ( 32f 16 s , float , short )
DEF_CVT_FUNC ( 64f 16 s , double , short )
DEF_CVT_FUNC_F ( 8u 32 s , uchar , int , 8u 32 s_C1R )
DEF_CVT_FUNC_F ( 8 s32s , schar , int , 8 s32s_C1R )
DEF_CVT_FUNC_F ( 16u 32 s , ushort , int , 16u 32 s_C1R )
DEF_CVT_FUNC_F ( 16 s32s , short , int , 16 s32s_C1R )
DEF_CPY_FUNC ( 32 s , int )
DEF_CVT_FUNC_F2 ( 32f 32 s , float , int , 32f 32 s_C1RSfs )
DEF_CVT_FUNC ( 64f 32 s , double , int )
DEF_CVT_FUNC_F ( 8u 32f , uchar , float , 8u 32f _C1R )
DEF_CVT_FUNC_F ( 8 s32f , schar , float , 8 s32f_C1R )
DEF_CVT_FUNC_F ( 16u 32f , ushort , float , 16u 32f _C1R )
DEF_CVT_FUNC_F ( 16 s32f , short , float , 16 s32f_C1R )
DEF_CVT_FUNC_F ( 32 s32f , int , float , 32 s32f_C1R )
DEF_CVT_FUNC ( 64f 32f , double , float )
DEF_CVT_FUNC ( 8u 64f , uchar , double )
DEF_CVT_FUNC ( 8 s64f , schar , double )
DEF_CVT_FUNC ( 16u 64f , ushort , double )
DEF_CVT_FUNC ( 16 s64f , short , double )
DEF_CVT_FUNC ( 32 s64f , int , double )
DEF_CVT_FUNC ( 32f 64f , float , double )
DEF_CPY_FUNC ( 64 s , int64 )
*/
BinaryFunc getConvertFunc ( int sdepth , int ddepth )
BinaryFunc getConvertFunc ( int sdepth_ , int ddepth_ )
{
static BinaryFunc cvtTab [ ] [ 8 ] =
{
{
( cvt8u ) , ( cvt8s8u ) , ( cvt16u8u ) ,
( cvt16s8u ) , ( cvt32s8u ) , ( cvt32f8u ) ,
( cvt64f8u ) , ( cvt16f8u )
} ,
{
( cvt8u8s ) , cvt8u , ( cvt16u8s ) ,
( cvt16s8s ) , ( cvt32s8s ) , ( cvt32f8s ) ,
( cvt64f8s ) , ( cvt16f8s )
} ,
{
( cvt8u16u ) , ( cvt8s16u ) , cvt16u ,
( cvt16s16u ) , ( cvt32s16u ) , ( cvt32f16u ) ,
( cvt64f16u ) , ( cvt16f16u )
} ,
{
( cvt8u16s ) , ( cvt8s16s ) , ( cvt16u16s ) ,
cvt16u , ( cvt32s16s ) , ( cvt32f16s ) ,
( cvt64f16s ) , ( cvt16f16s )
} ,
{
( cvt8u32s ) , ( cvt8s32s ) , ( cvt16u32s ) ,
( cvt16s32s ) , cvt32s , ( cvt32f32s ) ,
( cvt64f32s ) , ( cvt16f32s )
} ,
{
( cvt8u32f ) , ( cvt8s32f ) , ( cvt16u32f ) ,
( cvt16s32f ) , ( cvt32s32f ) , cvt32s ,
( cvt64f32f ) , ( cvt16f32f )
} ,
{
( cvt8u64f ) , ( cvt8s64f ) , ( cvt16u64f ) ,
( cvt16s64f ) , ( cvt32s64f ) , ( cvt32f64f ) ,
( cvt64s ) , ( cvt16f64f )
} ,
{
( cvt8u16f ) , ( cvt8s16f ) , ( cvt16u16f ) , ( cvt16s16f ) ,
( cvt32s16f ) , ( cvt32f16f ) , ( cvt64f16f ) , ( cvt16u )
}
} ;
return cvtTab [ CV_MAT_DEPTH ( ddepth ) ] [ CV_MAT_DEPTH ( sdepth ) ] ;
int sdepth = CV_MAT_DEPTH ( sdepth_ ) ;
int ddepth = CV_MAT_DEPTH ( ddepth_ ) ;
BinaryFunc func =
ddepth = = CV_8U ? (
sdepth = = CV_8U ? cvt8u :
sdepth = = CV_8S ? cvt8s8u :
sdepth = = CV_16U ? cvt16u8u :
sdepth = = CV_16S ? cvt16s8u :
sdepth = = CV_32U ? cvt32u8u :
sdepth = = CV_32S ? cvt32s8u :
sdepth = = CV_32F ? cvt32f8u :
sdepth = = CV_64F ? cvt64f8u :
sdepth = = CV_16F ? cvt16f8u :
sdepth = = CV_16BF ? cvt16bf8u :
sdepth = = CV_Bool ? cvt8b8u :
sdepth = = CV_64U ? cvt64u8u :
sdepth = = CV_64S ? cvt64s8u :
0 ) :
ddepth = = CV_8S ? (
sdepth = = CV_8U ? cvt8u8s :
sdepth = = CV_8S ? cvt8u :
sdepth = = CV_16U ? cvt16u8s :
sdepth = = CV_16S ? cvt16s8s :
sdepth = = CV_32U ? cvt32u8s :
sdepth = = CV_32S ? cvt32s8s :
sdepth = = CV_32F ? cvt32f8s :
sdepth = = CV_64F ? cvt64f8s :
sdepth = = CV_16F ? cvt16f8s :
sdepth = = CV_16BF ? cvt16bf8s :
sdepth = = CV_Bool ? cvt8b8u :
sdepth = = CV_64U ? cvt64u8s :
sdepth = = CV_64S ? cvt64s8s :
0 ) :
ddepth = = CV_16U ? (
sdepth = = CV_8U ? cvt8u16s : // same as cvt8u16u
sdepth = = CV_8S ? cvt8s16u :
sdepth = = CV_16U ? cvt16u :
sdepth = = CV_16S ? cvt16s16u :
sdepth = = CV_32U ? cvt32u16u :
sdepth = = CV_32S ? cvt32s16u :
sdepth = = CV_32F ? cvt32f16u :
sdepth = = CV_64F ? cvt64f16u :
sdepth = = CV_16F ? cvt16f16u :
sdepth = = CV_16BF ? cvt16bf16u :
sdepth = = CV_Bool ? cvt8b16s :
sdepth = = CV_64U ? cvt64u16u :
sdepth = = CV_64S ? cvt64s16u :
0 ) :
ddepth = = CV_16S ? (
sdepth = = CV_8U ? cvt8u16s :
sdepth = = CV_8S ? cvt8s16s :
sdepth = = CV_16U ? cvt16u16s :
sdepth = = CV_16S ? cvt16u :
sdepth = = CV_32U ? cvt32u16s :
sdepth = = CV_32S ? cvt32s16s :
sdepth = = CV_32F ? cvt32f16s :
sdepth = = CV_64F ? cvt64f16s :
sdepth = = CV_16F ? cvt16f16s :
sdepth = = CV_16BF ? cvt16bf16s :
sdepth = = CV_Bool ? cvt8b16s :
sdepth = = CV_64U ? cvt64u16s :
sdepth = = CV_64S ? cvt64s16s :
0 ) :
ddepth = = CV_32U ? (
sdepth = = CV_8U ? cvt8u32s : // same as cvt8u32u
sdepth = = CV_8S ? cvt8s32u :
sdepth = = CV_16U ? cvt16u32s : // same as cvt16u32u
sdepth = = CV_16S ? cvt16s32u :
sdepth = = CV_32U ? cvt32s :
sdepth = = CV_32S ? cvt32s32u :
sdepth = = CV_32F ? cvt32f32u :
sdepth = = CV_64F ? cvt64f32u :
sdepth = = CV_16F ? cvt16f32u :
sdepth = = CV_16BF ? cvt16bf32u :
sdepth = = CV_Bool ? cvt8b32s :
sdepth = = CV_64U ? cvt64u32u :
sdepth = = CV_64S ? cvt64s32u :
0 ) :
ddepth = = CV_32S ? (
sdepth = = CV_8U ? cvt8u32s :
sdepth = = CV_8S ? cvt8s32s :
sdepth = = CV_16U ? cvt16u32s :
sdepth = = CV_16S ? cvt16s32s :
sdepth = = CV_32U ? cvt32u32s :
sdepth = = CV_32S ? cvt32s :
sdepth = = CV_32F ? cvt32f32s :
sdepth = = CV_64F ? cvt64f32s :
sdepth = = CV_16F ? cvt16f32s :
sdepth = = CV_16BF ? cvt16bf32s :
sdepth = = CV_Bool ? cvt8b32s :
sdepth = = CV_64U ? cvt64u32s :
sdepth = = CV_64S ? cvt64s32s :
0 ) :
ddepth = = CV_32F ? (
sdepth = = CV_8U ? cvt8u32f :
sdepth = = CV_8S ? cvt8s32f :
sdepth = = CV_16U ? cvt16u32f :
sdepth = = CV_16S ? cvt16s32f :
sdepth = = CV_32U ? cvt32u32f :
sdepth = = CV_32S ? cvt32s32f :
sdepth = = CV_32F ? cvt32s :
sdepth = = CV_64F ? cvt64f32f :
sdepth = = CV_16F ? cvt16f32f :
sdepth = = CV_16BF ? cvt16bf32f :
sdepth = = CV_Bool ? cvt8b32f :
sdepth = = CV_64U ? cvt64u32f :
sdepth = = CV_64S ? cvt64s32f :
0 ) :
ddepth = = CV_64F ? (
sdepth = = CV_8U ? cvt8u64f :
sdepth = = CV_8S ? cvt8s64f :
sdepth = = CV_16U ? cvt16u64f :
sdepth = = CV_16S ? cvt16s64f :
sdepth = = CV_32U ? cvt32u64f :
sdepth = = CV_32S ? cvt32s64f :
sdepth = = CV_32F ? cvt32f64f :
sdepth = = CV_64F ? cvt64s :
sdepth = = CV_16F ? cvt16f64f :
sdepth = = CV_16BF ? cvt16bf64f :
sdepth = = CV_Bool ? cvt8b64f :
sdepth = = CV_64U ? cvt64u64f :
sdepth = = CV_64S ? cvt64s64f :
0 ) :
ddepth = = CV_16F ? (
sdepth = = CV_8U ? cvt8u16f :
sdepth = = CV_8S ? cvt8s16f :
sdepth = = CV_16U ? cvt16u16f :
sdepth = = CV_16S ? cvt16s16f :
sdepth = = CV_32U ? cvt32u16f :
sdepth = = CV_32S ? cvt32s16f :
sdepth = = CV_32F ? cvt32f16f :
sdepth = = CV_64F ? cvt64f16f :
sdepth = = CV_16F ? cvt16u :
sdepth = = CV_16BF ? cvt16bf16f :
sdepth = = CV_Bool ? cvt8b16f :
sdepth = = CV_64U ? cvt64u16f :
sdepth = = CV_64S ? cvt64s16f :
0 ) :
ddepth = = CV_16BF ? (
sdepth = = CV_8U ? cvt8u16bf :
sdepth = = CV_8S ? cvt8s16bf :
sdepth = = CV_16U ? cvt16u16bf :
sdepth = = CV_16S ? cvt16s16bf :
sdepth = = CV_32U ? cvt32u16bf :
sdepth = = CV_32S ? cvt32s16bf :
sdepth = = CV_32F ? cvt32f16bf :
sdepth = = CV_64F ? cvt64f16bf :
sdepth = = CV_16F ? cvt16f16bf :
sdepth = = CV_16BF ? cvt16u :
sdepth = = CV_Bool ? cvt8b16bf :
sdepth = = CV_64U ? cvt64u16bf :
sdepth = = CV_64S ? cvt64s16bf :
0 ) :
ddepth = = CV_Bool ? (
sdepth = = CV_8U ? cvt8u8b :
sdepth = = CV_8S ? cvt8u8b :
sdepth = = CV_16U ? cvt16s8b :
sdepth = = CV_16S ? cvt16s8b :
sdepth = = CV_32U ? cvt32s8b :
sdepth = = CV_32S ? cvt32s8b :
sdepth = = CV_32F ? cvt32f8b :
sdepth = = CV_64F ? cvt64f8b :
sdepth = = CV_16F ? cvt16f8b :
sdepth = = CV_16BF ? cvt16f8b : // same as cvt16f8b
sdepth = = CV_Bool ? cvt8u :
sdepth = = CV_64U ? cvt64s8b :
sdepth = = CV_64S ? cvt64s8b :
0 ) :
ddepth = = CV_64U ? (
sdepth = = CV_8U ? cvt8u64s : // same as cvt8u64u
sdepth = = CV_8S ? cvt8s64u :
sdepth = = CV_16U ? cvt16u64s : // same as cvt16u64u
sdepth = = CV_16S ? cvt16s64u :
sdepth = = CV_32U ? cvt32u64s : // same as cvt32u64u
sdepth = = CV_32S ? cvt32s64u :
sdepth = = CV_32F ? cvt32f64u :
sdepth = = CV_64F ? cvt64f64u :
sdepth = = CV_16F ? cvt16f64u :
sdepth = = CV_16BF ? cvt16bf64u :
sdepth = = CV_Bool ? cvt8b64s :
sdepth = = CV_64U ? cvt64s :
sdepth = = CV_64S ? cvt64s64u :
0 ) :
ddepth = = CV_64S ? (
sdepth = = CV_8U ? cvt8u64s :
sdepth = = CV_8S ? cvt8s64s :
sdepth = = CV_16U ? cvt16u64s :
sdepth = = CV_16S ? cvt16s64s :
sdepth = = CV_32U ? cvt32u64s :
sdepth = = CV_32S ? cvt32s64s :
sdepth = = CV_32F ? cvt32f64s :
sdepth = = CV_64F ? cvt64f64s :
sdepth = = CV_16F ? cvt16f64s :
sdepth = = CV_16BF ? cvt16bf64s :
sdepth = = CV_Bool ? cvt8b64s :
sdepth = = CV_64U ? cvt64s :
sdepth = = CV_64S ? cvt64s :
0 ) :
0 ;
CV_Assert ( func ! = 0 ) ;
return func ;
}
CV_CPU_OPTIMIZATION_NAMESPACE_END