@ -726,35 +726,6 @@ static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale){
}
}
static inline void transpose4x4 ( uint8_t * dst , uint8_t * src , int dst_stride , int src_stride ) {
__asm__ volatile ( //FIXME could save 1 instruction if done as 8x4 ...
" movd %4, %%mm0 \n \t "
" movd %5, %%mm1 \n \t "
" movd %6, %%mm2 \n \t "
" movd %7, %%mm3 \n \t "
" punpcklbw %%mm1, %%mm0 \n \t "
" punpcklbw %%mm3, %%mm2 \n \t "
" movq %%mm0, %%mm1 \n \t "
" punpcklwd %%mm2, %%mm0 \n \t "
" punpckhwd %%mm2, %%mm1 \n \t "
" movd %%mm0, %0 \n \t "
" punpckhdq %%mm0, %%mm0 \n \t "
" movd %%mm0, %1 \n \t "
" movd %%mm1, %2 \n \t "
" punpckhdq %%mm1, %%mm1 \n \t "
" movd %%mm1, %3 \n \t "
: " =m " ( * ( uint32_t * ) ( dst + 0 * dst_stride ) ) ,
" =m " ( * ( uint32_t * ) ( dst + 1 * dst_stride ) ) ,
" =m " ( * ( uint32_t * ) ( dst + 2 * dst_stride ) ) ,
" =m " ( * ( uint32_t * ) ( dst + 3 * dst_stride ) )
: " m " ( * ( uint32_t * ) ( src + 0 * src_stride ) ) ,
" m " ( * ( uint32_t * ) ( src + 1 * src_stride ) ) ,
" m " ( * ( uint32_t * ) ( src + 2 * src_stride ) ) ,
" m " ( * ( uint32_t * ) ( src + 3 * src_stride ) )
) ;
}
static void h263_h_loop_filter_mmx ( uint8_t * src , int stride , int qscale ) {
if ( CONFIG_H263_DECODER | | CONFIG_H263_ENCODER ) {
const int strength = ff_h263_loop_filter_strength [ qscale ] ;
@ -1818,7 +1789,7 @@ PREFETCH(prefetch_mmx2, prefetcht0)
PREFETCH ( prefetch_3dnow , prefetch )
# undef PREFETCH
# include "h264dsp _mmx.c"
# include "h264_qpel _mmx.c"
void ff_put_h264_chroma_mc8_mmx_rnd ( uint8_t * dst , uint8_t * src ,
int stride , int h , int x , int y ) ;
@ -2449,20 +2420,8 @@ int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, co
void ff_add_hfyu_median_prediction_mmx2 ( uint8_t * dst , const uint8_t * top , const uint8_t * diff , int w , int * left , int * left_top ) ;
int ff_add_hfyu_left_prediction_ssse3 ( uint8_t * dst , const uint8_t * src , int w , int left ) ;
int ff_add_hfyu_left_prediction_sse4 ( uint8_t * dst , const uint8_t * src , int w , int left ) ;
void ff_x264_deblock_v_luma_sse2 ( uint8_t * pix , int stride , int alpha , int beta , int8_t * tc0 ) ;
void ff_x264_deblock_h_luma_sse2 ( uint8_t * pix , int stride , int alpha , int beta , int8_t * tc0 ) ;
void ff_x264_deblock_h_luma_intra_mmxext ( uint8_t * pix , int stride , int alpha , int beta ) ;
void ff_x264_deblock_v_luma_intra_sse2 ( uint8_t * pix , int stride , int alpha , int beta ) ;
void ff_x264_deblock_h_luma_intra_sse2 ( uint8_t * pix , int stride , int alpha , int beta ) ;
# if HAVE_YASM && ARCH_X86_32
void ff_x264_deblock_v8_luma_intra_mmxext ( uint8_t * pix , int stride , int alpha , int beta ) ;
static void ff_x264_deblock_v_luma_intra_mmxext ( uint8_t * pix , int stride , int alpha , int beta )
{
ff_x264_deblock_v8_luma_intra_mmxext ( pix + 0 , stride , alpha , beta ) ;
ff_x264_deblock_v8_luma_intra_mmxext ( pix + 8 , stride , alpha , beta ) ;
}
# elif !HAVE_YASM
# if !HAVE_YASM
# define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_misc_sse(a,b,c,6)
# define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
# define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
@ -2994,89 +2953,3 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
//ff_idct = just_return;
# endif
}
# if CONFIG_H264DSP
void ff_h264dsp_init_x86 ( H264DSPContext * c )
{
int mm_flags = mm_support ( ) ;
if ( mm_flags & FF_MM_MMX ) {
c - > h264_idct_dc_add =
c - > h264_idct_add = ff_h264_idct_add_mmx ;
c - > h264_idct8_dc_add =
c - > h264_idct8_add = ff_h264_idct8_add_mmx ;
c - > h264_idct_add16 = ff_h264_idct_add16_mmx ;
c - > h264_idct8_add4 = ff_h264_idct8_add4_mmx ;
c - > h264_idct_add8 = ff_h264_idct_add8_mmx ;
c - > h264_idct_add16intra = ff_h264_idct_add16intra_mmx ;
if ( mm_flags & FF_MM_MMX2 ) {
c - > h264_idct_dc_add = ff_h264_idct_dc_add_mmx2 ;
c - > h264_idct8_dc_add = ff_h264_idct8_dc_add_mmx2 ;
c - > h264_idct_add16 = ff_h264_idct_add16_mmx2 ;
c - > h264_idct8_add4 = ff_h264_idct8_add4_mmx2 ;
c - > h264_idct_add8 = ff_h264_idct_add8_mmx2 ;
c - > h264_idct_add16intra = ff_h264_idct_add16intra_mmx2 ;
c - > h264_v_loop_filter_luma = h264_v_loop_filter_luma_mmx2 ;
c - > h264_h_loop_filter_luma = h264_h_loop_filter_luma_mmx2 ;
c - > h264_v_loop_filter_chroma = h264_v_loop_filter_chroma_mmx2 ;
c - > h264_h_loop_filter_chroma = h264_h_loop_filter_chroma_mmx2 ;
c - > h264_v_loop_filter_chroma_intra = h264_v_loop_filter_chroma_intra_mmx2 ;
c - > h264_h_loop_filter_chroma_intra = h264_h_loop_filter_chroma_intra_mmx2 ;
c - > h264_loop_filter_strength = h264_loop_filter_strength_mmx2 ;
c - > weight_h264_pixels_tab [ 0 ] = ff_h264_weight_16x16_mmx2 ;
c - > weight_h264_pixels_tab [ 1 ] = ff_h264_weight_16x8_mmx2 ;
c - > weight_h264_pixels_tab [ 2 ] = ff_h264_weight_8x16_mmx2 ;
c - > weight_h264_pixels_tab [ 3 ] = ff_h264_weight_8x8_mmx2 ;
c - > weight_h264_pixels_tab [ 4 ] = ff_h264_weight_8x4_mmx2 ;
c - > weight_h264_pixels_tab [ 5 ] = ff_h264_weight_4x8_mmx2 ;
c - > weight_h264_pixels_tab [ 6 ] = ff_h264_weight_4x4_mmx2 ;
c - > weight_h264_pixels_tab [ 7 ] = ff_h264_weight_4x2_mmx2 ;
c - > biweight_h264_pixels_tab [ 0 ] = ff_h264_biweight_16x16_mmx2 ;
c - > biweight_h264_pixels_tab [ 1 ] = ff_h264_biweight_16x8_mmx2 ;
c - > biweight_h264_pixels_tab [ 2 ] = ff_h264_biweight_8x16_mmx2 ;
c - > biweight_h264_pixels_tab [ 3 ] = ff_h264_biweight_8x8_mmx2 ;
c - > biweight_h264_pixels_tab [ 4 ] = ff_h264_biweight_8x4_mmx2 ;
c - > biweight_h264_pixels_tab [ 5 ] = ff_h264_biweight_4x8_mmx2 ;
c - > biweight_h264_pixels_tab [ 6 ] = ff_h264_biweight_4x4_mmx2 ;
c - > biweight_h264_pixels_tab [ 7 ] = ff_h264_biweight_4x2_mmx2 ;
}
if ( mm_flags & FF_MM_SSE2 ) {
c - > h264_idct8_add = ff_h264_idct8_add_sse2 ;
c - > h264_idct8_add4 = ff_h264_idct8_add4_sse2 ;
}
# if HAVE_YASM
if ( mm_flags & FF_MM_MMX2 ) {
# if ARCH_X86_32
c - > h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext ;
c - > h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext ;
# endif
if ( mm_flags & FF_MM_SSE2 ) {
c - > biweight_h264_pixels_tab [ 0 ] = ff_h264_biweight_16x16_sse2 ;
c - > biweight_h264_pixels_tab [ 3 ] = ff_h264_biweight_8x8_sse2 ;
# if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110
c - > h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2 ;
c - > h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2 ;
c - > h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2 ;
c - > h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2 ;
# endif
# if CONFIG_GPL
c - > h264_idct_add16 = ff_h264_idct_add16_sse2 ;
c - > h264_idct_add8 = ff_h264_idct_add8_sse2 ;
c - > h264_idct_add16intra = ff_h264_idct_add16intra_sse2 ;
# endif
}
if ( mm_flags & FF_MM_SSSE3 ) {
c - > biweight_h264_pixels_tab [ 0 ] = ff_h264_biweight_16x16_ssse3 ;
c - > biweight_h264_pixels_tab [ 3 ] = ff_h264_biweight_8x8_ssse3 ;
}
}
# endif
}
}
# endif /* CONFIG_H264DSP */