@ -18,8 +18,10 @@
* Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
*/
# include "libavutil/x86/cpu.h"
# include "dsputil_x86.h"
# include "diracdsp_mmx.h"
# include "fpel.h"
void ff_put_rect_clamped_mmx ( uint8_t * dst , int dst_stride , const int16_t * src , int src_stride , int width , int height ) ;
void ff_put_rect_clamped_sse2 ( uint8_t * dst , int dst_stride , const int16_t * src , int src_stride , int width , int height ) ;
@ -56,14 +58,76 @@ HPEL_FILTER(16, sse2)
c - > PFX # # _dirac_pixels_tab [ 1 ] [ IDX ] = ff_ # # PFX # # _dirac_pixels16_ # # EXT ; \
c - > PFX # # _dirac_pixels_tab [ 2 ] [ IDX ] = ff_ # # PFX # # _dirac_pixels32_ # # EXT
# define DIRAC_PIXOP(OPNAME2, OPNAME, EXT)\
void ff_ # # OPNAME2 # # _dirac_pixels8_ # # EXT ( uint8_t * dst , const uint8_t * src [ 5 ] , int stride , int h ) \
{ \
if ( h & 3 ) \
ff_ # # OPNAME2 # # _dirac_pixels8_c ( dst , src , stride , h ) ; \
else \
OPNAME # # _pixels8_ # # EXT ( dst , src [ 0 ] , stride , h ) ; \
} \
void ff_ # # OPNAME2 # # _dirac_pixels16_ # # EXT ( uint8_t * dst , const uint8_t * src [ 5 ] , int stride , int h ) \
{ \
if ( h & 3 ) \
ff_ # # OPNAME2 # # _dirac_pixels16_c ( dst , src , stride , h ) ; \
else \
OPNAME # # _pixels16_ # # EXT ( dst , src [ 0 ] , stride , h ) ; \
} \
void ff_ # # OPNAME2 # # _dirac_pixels32_ # # EXT ( uint8_t * dst , const uint8_t * src [ 5 ] , int stride , int h ) \
{ \
if ( h & 3 ) { \
ff_ # # OPNAME2 # # _dirac_pixels32_c ( dst , src , stride , h ) ; \
} else { \
OPNAME # # _pixels16_ # # EXT ( dst , src [ 0 ] , stride , h ) ; \
OPNAME # # _pixels16_ # # EXT ( dst + 16 , src [ 0 ] + 16 , stride , h ) ; \
} \
}
void ff_avg_pixels16_mmxext ( uint8_t * block , const uint8_t * pixels ,
ptrdiff_t line_size , int h ) ;
DIRAC_PIXOP ( put , ff_put , mmx )
DIRAC_PIXOP ( avg , ff_avg , mmx )
DIRAC_PIXOP ( avg , ff_avg , mmxext )
void ff_put_dirac_pixels16_sse2 ( uint8_t * dst , const uint8_t * src [ 5 ] , int stride , int h )
{
if ( h & 3 )
ff_put_dirac_pixels16_c ( dst , src , stride , h ) ;
else
ff_put_pixels16_sse2 ( dst , src [ 0 ] , stride , h ) ;
}
void ff_avg_dirac_pixels16_sse2 ( uint8_t * dst , const uint8_t * src [ 5 ] , int stride , int h )
{
if ( h & 3 )
ff_avg_dirac_pixels16_c ( dst , src , stride , h ) ;
else
ff_avg_pixels16_sse2 ( dst , src [ 0 ] , stride , h ) ;
}
void ff_put_dirac_pixels32_sse2 ( uint8_t * dst , const uint8_t * src [ 5 ] , int stride , int h )
{
if ( h & 3 ) {
ff_put_dirac_pixels32_c ( dst , src , stride , h ) ;
} else {
ff_put_pixels16_sse2 ( dst , src [ 0 ] , stride , h ) ;
ff_put_pixels16_sse2 ( dst + 16 , src [ 0 ] + 16 , stride , h ) ;
}
}
void ff_avg_dirac_pixels32_sse2 ( uint8_t * dst , const uint8_t * src [ 5 ] , int stride , int h )
{
if ( h & 3 ) {
ff_avg_dirac_pixels32_c ( dst , src , stride , h ) ;
} else {
ff_avg_pixels16_sse2 ( dst , src [ 0 ] , stride , h ) ;
ff_avg_pixels16_sse2 ( dst + 16 , src [ 0 ] + 16 , stride , h ) ;
}
}
void ff_diracdsp_init_mmx ( DiracDSPContext * c )
{
int mm_flags = av_get_cpu_flags ( ) ;
if ( ! ( mm_flags & AV_CPU_FLAG_MMX ) )
return ;
# if HAVE_YASM
if ( EXTERNAL_MMX ( mm_flags ) ) {
c - > add_dirac_obmc [ 0 ] = ff_add_dirac_obmc8_mmx ;
# if !ARCH_X86_64
c - > add_dirac_obmc [ 1 ] = ff_add_dirac_obmc16_mmx ;
@ -72,33 +136,25 @@ void ff_diracdsp_init_mmx(DiracDSPContext* c)
c - > add_rect_clamped = ff_add_rect_clamped_mmx ;
c - > put_signed_rect_clamped = ff_put_signed_rect_clamped_mmx ;
# endif
# endif
# if HAVE_MMX_INLINE
PIXFUNC ( put , 0 , mmx ) ;
PIXFUNC ( avg , 0 , mmx ) ;
# endif
}
# if HAVE_MMXEXT_INLINE
if ( mm_flags & AV_CPU_FLAG_MMX2 ) {
if ( EXTERNAL_MMXEXT ( mm_flags ) ) {
PIXFUNC ( avg , 0 , mmxext ) ;
}
# endif
if ( mm_flags & AV_CPU_FLAG_SSE2 ) {
# if HAVE_YASM
if ( EXTERNAL_SSE2 ( mm_flags ) ) {
c - > dirac_hpel_filter = dirac_hpel_filter_sse2 ;
c - > add_rect_clamped = ff_add_rect_clamped_sse2 ;
c - > put_signed_rect_clamped = ff_put_signed_rect_clamped_sse2 ;
c - > add_dirac_obmc [ 1 ] = ff_add_dirac_obmc16_sse2 ;
c - > add_dirac_obmc [ 2 ] = ff_add_dirac_obmc32_sse2 ;
# endif
# if HAVE_SSE2_INLINE
c - > put_dirac_pixels_tab [ 1 ] [ 0 ] = ff_put_dirac_pixels16_sse2 ;
c - > avg_dirac_pixels_tab [ 1 ] [ 0 ] = ff_avg_dirac_pixels16_sse2 ;
c - > put_dirac_pixels_tab [ 2 ] [ 0 ] = ff_put_dirac_pixels32_sse2 ;
c - > avg_dirac_pixels_tab [ 2 ] [ 0 ] = ff_avg_dirac_pixels32_sse2 ;
# endif
}
}