@ -22,6 +22,10 @@
# ifdef USE_FASTMEMCPY
# include "fastmemcpy.h"
# endif
# ifdef HAVE_MMX
# include "i386/mmx.h"
# endif
/* XXX: totally non optimized */
static void yuv422_to_yuv420p ( UINT8 * lum , UINT8 * cb , UINT8 * cr ,
@ -762,29 +766,121 @@ int img_convert(AVPicture *dst, int dst_pix_fmt,
return 0 ;
}
# ifdef HAVE_MMX
# define DEINT_INPLACE_LINE_LUM \
movd_m2r ( lum_m4 [ 0 ] , mm0 ) ; \
movd_m2r ( lum_m3 [ 0 ] , mm1 ) ; \
movd_m2r ( lum_m2 [ 0 ] , mm2 ) ; \
movd_m2r ( lum_m1 [ 0 ] , mm3 ) ; \
movd_m2r ( lum [ 0 ] , mm4 ) ; \
punpcklbw_r2r ( mm7 , mm0 ) ; \
movd_r2m ( mm2 , lum_m4 [ 0 ] ) ; \
punpcklbw_r2r ( mm7 , mm1 ) ; \
punpcklbw_r2r ( mm7 , mm2 ) ; \
punpcklbw_r2r ( mm7 , mm3 ) ; \
punpcklbw_r2r ( mm7 , mm4 ) ; \
paddw_r2r ( mm3 , mm1 ) ; \
psllw_i2r ( 1 , mm2 ) ; \
paddw_r2r ( mm4 , mm0 ) ; \
psllw_i2r ( 2 , mm1 ) ; \
paddw_r2r ( mm6 , mm2 ) ; \
paddw_r2r ( mm2 , mm1 ) ; \
psubusw_r2r ( mm0 , mm1 ) ; \
psrlw_i2r ( 3 , mm1 ) ; \
packuswb_r2r ( mm7 , mm1 ) ; \
movd_r2m ( mm1 , lum_m2 [ 0 ] ) ;
# define DEINT_LINE_LUM \
movd_m2r ( lum_m4 [ 0 ] , mm0 ) ; \
movd_m2r ( lum_m3 [ 0 ] , mm1 ) ; \
movd_m2r ( lum_m2 [ 0 ] , mm2 ) ; \
movd_m2r ( lum_m1 [ 0 ] , mm3 ) ; \
movd_m2r ( lum [ 0 ] , mm4 ) ; \
punpcklbw_r2r ( mm7 , mm0 ) ; \
punpcklbw_r2r ( mm7 , mm1 ) ; \
punpcklbw_r2r ( mm7 , mm2 ) ; \
punpcklbw_r2r ( mm7 , mm3 ) ; \
punpcklbw_r2r ( mm7 , mm4 ) ; \
paddw_r2r ( mm3 , mm1 ) ; \
psllw_i2r ( 1 , mm2 ) ; \
paddw_r2r ( mm4 , mm0 ) ; \
psllw_i2r ( 2 , mm1 ) ; \
paddw_r2r ( mm6 , mm2 ) ; \
paddw_r2r ( mm2 , mm1 ) ; \
psubusw_r2r ( mm0 , mm1 ) ; \
psrlw_i2r ( 3 , mm1 ) ; \
packuswb_r2r ( mm7 , mm1 ) ; \
movd_r2m ( mm1 , dst [ 0 ] ) ;
# endif
/* filter parameters: [-1 4 2 4 -1] // 8 */
static void deinterlace_line ( UINT8 * dst , UINT8 * src , int src_wrap ,
static void deinterlace_line ( UINT8 * dst , UINT8 * lum_m4 , UINT8 * lum_m3 , UINT8 * lum_m2 , UINT8 * lum_m1 , UINT8 * lum ,
int size )
{
# ifndef HAVE_MMX
UINT8 * cm = cropTbl + MAX_NEG_CROP ;
int sum ;
UINT8 * s ;
for ( ; size > 0 ; size - - ) {
s = src ;
sum = - s [ 0 ] ;
s + = src_wrap ;
sum + = s [ 0 ] < < 2 ;
s + = src_wrap ;
sum + = s [ 0 ] < < 1 ;
s + = src_wrap ;
sum + = s [ 0 ] < < 2 ;
s + = src_wrap ;
sum + = - s [ 0 ] ;
sum = - lum_m4 [ 0 ] ;
sum + = lum_m3 [ 0 ] < < 2 ;
sum + = lum_m2 [ 0 ] < < 1 ;
sum + = lum_m1 [ 0 ] < < 2 ;
sum + = - lum [ 0 ] ;
dst [ 0 ] = cm [ ( sum + 4 ) > > 3 ] ;
lum_m4 + + ;
lum_m3 + + ;
lum_m2 + + ;
lum_m1 + + ;
lum + + ;
dst + + ;
src + + ;
}
# else
for ( ; size > 3 ; size - = 4 ) {
DEINT_LINE_LUM
lum_m4 + = 4 ;
lum_m3 + = 4 ;
lum_m2 + = 4 ;
lum_m1 + = 4 ;
lum + = 4 ;
dst + = 4 ;
}
# endif
}
static void deinterlace_line_inplace ( UINT8 * lum_m4 , UINT8 * lum_m3 , UINT8 * lum_m2 , UINT8 * lum_m1 , UINT8 * lum ,
int size )
{
# ifndef HAVE_MMX
UINT8 * cm = cropTbl + MAX_NEG_CROP ;
int sum ;
for ( ; size > 0 ; size - - ) {
sum = - lum_m4 [ 0 ] ;
sum + = lum_m3 [ 0 ] < < 2 ;
sum + = lum_m2 [ 0 ] < < 1 ;
lum_m4 [ 0 ] = lum_m2 [ 0 ] ;
sum + = lum_m1 [ 0 ] < < 2 ;
sum + = - lum [ 0 ] ;
lum_m2 [ 0 ] = cm [ ( sum + 4 ) > > 3 ] ;
lum_m4 + + ;
lum_m3 + + ;
lum_m2 + + ;
lum_m1 + + ;
lum + + ;
}
# else
for ( ; size > 3 ; size - = 4 ) {
DEINT_INPLACE_LINE_LUM
lum_m4 + = 4 ;
lum_m3 + = 4 ;
lum_m2 + = 4 ;
lum_m1 + = 4 ;
lum + = 4 ;
}
# endif
}
/* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The
@ -794,45 +890,58 @@ static void deinterlace_bottom_field(UINT8 *dst, int dst_wrap,
UINT8 * src1 , int src_wrap ,
int width , int height )
{
UINT8 * src , * ptr ;
int y , y1 , i ;
UINT8 * buf ;
buf = ( UINT8 * ) av_malloc ( 5 * width ) ;
src = src1 ;
for ( y = 0 ; y < height ; y + = 2 ) {
/* copy top field line */
memcpy ( dst , src , width ) ;
UINT8 * src_m2 , * src_m1 , * src_0 , * src_p1 , * src_p2 ;
int y ;
src_m2 = src1 ;
src_m1 = src1 ;
src_0 = & src_m1 [ src_wrap ] ;
src_p1 = & src_0 [ src_wrap ] ;
src_p2 = & src_p1 [ src_wrap ] ;
for ( y = 0 ; y < ( height - 2 ) ; y + = 2 ) {
memcpy ( dst , src_m1 , width ) ;
dst + = dst_wrap ;
src + = ( 1 - 2 ) * src_wrap ;
y1 = y - 2 ;
if ( y1 > = 0 & & ( y1 + 4 ) < height ) {
/* fast case : no edges */
deinterlace_line ( dst , src , src_wrap , width ) ;
} else {
/* in order to use the same function, we use an intermediate buffer */
ptr = buf ;
for ( i = 0 ; i < 5 ; i + + ) {
if ( y1 < 0 )
memcpy ( ptr , src1 , width ) ;
else if ( y1 > = height )
memcpy ( ptr , src1 + ( height - 1 ) * src_wrap , width ) ;
else
memcpy ( ptr , src1 + y1 * src_wrap , width ) ;
y1 + + ;
ptr + = width ;
}
deinterlace_line ( dst , buf , width , width ) ;
}
deinterlace_line ( dst , src_m2 , src_m1 , src_0 , src_p1 , src_p2 , width ) ;
src_m2 = src_0 ;
src_m1 = src_p1 ;
src_0 = src_p2 ;
src_p1 + = 2 * src_wrap ;
src_p2 + = 2 * src_wrap ;
dst + = dst_wrap ;
src + = ( 2 + 1 ) * src_wrap ;
}
memcpy ( dst , src_m1 , width ) ;
dst + = dst_wrap ;
/* do last line */
deinterlace_line ( dst , src_m2 , src_m1 , src_0 , src_0 , src_0 , width ) ;
}
static void deinterlace_bottom_field_inplace ( UINT8 * src1 , int src_wrap ,
int width , int height )
{
UINT8 * src_m1 , * src_0 , * src_p1 , * src_p2 ;
int y ;
UINT8 * buf ;
buf = ( UINT8 * ) av_malloc ( width ) ;
src_m1 = src1 ;
memcpy ( buf , src_m1 , width ) ;
src_0 = & src_m1 [ src_wrap ] ;
src_p1 = & src_0 [ src_wrap ] ;
src_p2 = & src_p1 [ src_wrap ] ;
for ( y = 0 ; y < ( height - 2 ) ; y + = 2 ) {
deinterlace_line_inplace ( buf , src_m1 , src_0 , src_p1 , src_p2 , width ) ;
src_m1 = src_p1 ;
src_0 = src_p2 ;
src_p1 + = 2 * src_wrap ;
src_p2 + = 2 * src_wrap ;
}
/* do last line */
deinterlace_line_inplace ( buf , src_m1 , src_0 , src_0 , src_0 , width ) ;
av_free ( buf ) ;
}
/* deinterlace, return -1 if format not handled */
/* deinterlace - if not supported return -1 */
int avpicture_deinterlace ( AVPicture * dst , AVPicture * src ,
int pix_fmt , int width , int height )
{
@ -842,9 +951,22 @@ int avpicture_deinterlace(AVPicture *dst, AVPicture *src,
pix_fmt ! = PIX_FMT_YUV422P & &
pix_fmt ! = PIX_FMT_YUV444P )
return - 1 ;
if ( ( width & 1 ) ! = 0 | | ( height & 3 ) ! = 0 )
if ( ( width & 3 ) ! = 0 | | ( height & 3 ) ! = 0 )
return - 1 ;
# ifdef HAVE_MMX
{
mmx_t rounder ;
rounder . uw [ 0 ] = 4 ;
rounder . uw [ 1 ] = 4 ;
rounder . uw [ 2 ] = 4 ;
rounder . uw [ 3 ] = 4 ;
pxor_r2r ( mm7 , mm7 ) ;
movq_m2r ( rounder , mm6 ) ;
}
# endif
for ( i = 0 ; i < 3 ; i + + ) {
if ( i = = 1 ) {
switch ( pix_fmt ) {
@ -859,10 +981,18 @@ int avpicture_deinterlace(AVPicture *dst, AVPicture *src,
break ;
}
}
deinterlace_bottom_field ( dst - > data [ i ] , dst - > linesize [ i ] ,
if ( src = = dst ) {
deinterlace_bottom_field_inplace ( src - > data [ i ] , src - > linesize [ i ] ,
width , height ) ;
} else {
deinterlace_bottom_field ( dst - > data [ i ] , dst - > linesize [ i ] ,
src - > data [ i ] , src - > linesize [ i ] ,
width , height ) ;
}
}
# ifdef HAVE_MMX
emms ( ) ;
# endif
return 0 ;
}