@ -31,7 +31,8 @@
# include "yuv2rgb_altivec.h"
# include "libavutil/ppc/util_altivec.h"
# if HAVE_ALTIVEC && HAVE_BIGENDIAN
# if HAVE_ALTIVEC
# if HAVE_BIGENDIAN
# define vzero vec_splat_s32(0)
# define GET_LS(a,b,c,s) {\
@ -102,7 +103,137 @@
# include "swscale_ppc_template.c"
# undef FUNC
# endif /* HAVE_ALTIVEC && HAVE_BIGENDIAN */
# undef vzero
# endif /* HAVE_BIGENDIAN */
# define output_pixel(pos, val, bias, signedness) \
if ( big_endian ) { \
AV_WB16 ( pos , bias + av_clip_ # # signedness # # 16 ( val > > shift ) ) ; \
} else { \
AV_WL16 ( pos , bias + av_clip_ # # signedness # # 16 ( val > > shift ) ) ; \
}
static void
yuv2plane1_float_u ( const int32_t * src , float * dest , int dstW , int start )
{
static const int big_endian = HAVE_BIGENDIAN ;
static const int shift = 3 ;
static const float float_mult = 1.0f / 65535.0f ;
int i , val ;
uint16_t val_uint ;
for ( i = start ; i < dstW ; + + i ) {
val = src [ i ] + ( 1 < < ( shift - 1 ) ) ;
output_pixel ( & val_uint , val , 0 , uint ) ;
dest [ i ] = float_mult * ( float ) val_uint ;
}
}
static void
yuv2plane1_float_bswap_u ( const int32_t * src , uint32_t * dest , int dstW , int start )
{
static const int big_endian = HAVE_BIGENDIAN ;
static const int shift = 3 ;
static const float float_mult = 1.0f / 65535.0f ;
int i , val ;
uint16_t val_uint ;
for ( i = start ; i < dstW ; + + i ) {
val = src [ i ] + ( 1 < < ( shift - 1 ) ) ;
output_pixel ( & val_uint , val , 0 , uint ) ;
dest [ i ] = av_bswap32 ( av_float2int ( float_mult * ( float ) val_uint ) ) ;
}
}
static void yuv2plane1_float_altivec ( const int32_t * src , float * dest , int dstW )
{
const int dst_u = - ( uintptr_t ) dest & 3 ;
const int shift = 3 ;
const int add = ( 1 < < ( shift - 1 ) ) ;
const int clip = ( 1 < < 16 ) - 1 ;
const float fmult = 1.0f / 65535.0f ;
const vector uint32_t vadd = ( vector uint32_t ) { add , add , add , add } ;
const vector uint32_t vshift = ( vector uint32_t ) vec_splat_u32 ( shift ) ;
const vector uint32_t vlargest = ( vector uint32_t ) { clip , clip , clip , clip } ;
const vector float vmul = ( vector float ) { fmult , fmult , fmult , fmult } ;
const vector float vzero = ( vector float ) { 0 , 0 , 0 , 0 } ;
vector uint32_t v ;
vector float vd ;
int i ;
yuv2plane1_float_u ( src , dest , dst_u , 0 ) ;
for ( i = dst_u ; i < dstW - 3 ; i + = 4 ) {
v = vec_ld ( 0 , ( const uint32_t * ) & src [ i ] ) ;
v = vec_add ( v , vadd ) ;
v = vec_sr ( v , vshift ) ;
v = vec_min ( v , vlargest ) ;
vd = vec_ctf ( v , 0 ) ;
vd = vec_madd ( vd , vmul , vzero ) ;
vec_st ( vd , 0 , & dest [ i ] ) ;
}
yuv2plane1_float_u ( src , dest , dstW , i ) ;
}
static void yuv2plane1_float_bswap_altivec ( const int32_t * src , uint32_t * dest , int dstW )
{
const int dst_u = - ( uintptr_t ) dest & 3 ;
const int shift = 3 ;
const int add = ( 1 < < ( shift - 1 ) ) ;
const int clip = ( 1 < < 16 ) - 1 ;
const float fmult = 1.0f / 65535.0f ;
const vector uint32_t vadd = ( vector uint32_t ) { add , add , add , add } ;
const vector uint32_t vshift = ( vector uint32_t ) vec_splat_u32 ( shift ) ;
const vector uint32_t vlargest = ( vector uint32_t ) { clip , clip , clip , clip } ;
const vector float vmul = ( vector float ) { fmult , fmult , fmult , fmult } ;
const vector float vzero = ( vector float ) { 0 , 0 , 0 , 0 } ;
const vector uint32_t vswapbig = ( vector uint32_t ) { 16 , 16 , 16 , 16 } ;
const vector uint16_t vswapsmall = vec_splat_u16 ( 8 ) ;
vector uint32_t v ;
vector float vd ;
int i ;
yuv2plane1_float_bswap_u ( src , dest , dst_u , 0 ) ;
for ( i = dst_u ; i < dstW - 3 ; i + = 4 ) {
v = vec_ld ( 0 , ( const uint32_t * ) & src [ i ] ) ;
v = vec_add ( v , vadd ) ;
v = vec_sr ( v , vshift ) ;
v = vec_min ( v , vlargest ) ;
vd = vec_ctf ( v , 0 ) ;
vd = vec_madd ( vd , vmul , vzero ) ;
vd = ( vector float ) vec_rl ( ( vector uint32_t ) vd , vswapbig ) ;
vd = ( vector float ) vec_rl ( ( vector uint16_t ) vd , vswapsmall ) ;
vec_st ( vd , 0 , ( float * ) & dest [ i ] ) ;
}
yuv2plane1_float_bswap_u ( src , dest , dstW , i ) ;
}
# define yuv2plane1_float(template, dest_type, BE_LE) \
static void yuv2plane1_float # # BE_LE # # _altivec ( const int16_t * src , uint8_t * dest , \
int dstW , \
const uint8_t * dither , int offset ) \
{ \
template ( ( const int32_t * ) src , ( dest_type * ) dest , dstW ) ; \
}
# if HAVE_BIGENDIAN
yuv2plane1_float ( yuv2plane1_float_altivec , float , BE )
yuv2plane1_float ( yuv2plane1_float_bswap_altivec , uint32_t , LE )
# else
yuv2plane1_float ( yuv2plane1_float_altivec , float , LE )
yuv2plane1_float ( yuv2plane1_float_bswap_altivec , uint32_t , BE )
# endif
# endif /* HAVE_ALTIVEC */
av_cold void ff_sws_init_swscale_ppc ( SwsContext * c )
{
@ -124,6 +255,12 @@ av_cold void ff_sws_init_swscale_ppc(SwsContext *c)
}
# endif
if ( dstFormat = = AV_PIX_FMT_GRAYF32BE ) {
c - > yuv2plane1 = yuv2plane1_floatBE_altivec ;
} else if ( dstFormat = = AV_PIX_FMT_GRAYF32LE ) {
c - > yuv2plane1 = yuv2plane1_floatLE_altivec ;
}
/* The following list of supported dstFormat values should
* match what ' s found in the body of ff_yuv2packedX_altivec ( ) */
if ( ! ( c - > flags & ( SWS_BITEXACT | SWS_FULL_CHR_H_INT ) ) & & ! c - > needAlpha ) {