@ -41,13 +41,14 @@ void ff_gradfun_blur_line_movdqu_sse2(intptr_t x, uint16_t *buf,
const uint8_t * src1 , const uint8_t * src2 ) ;
# if HAVE_YASM
static void gradfun_filter_line ( uint8_t * dst , const uint8_t * src , const uint16_t * dc ,
int width , int thresh , const uint16_t * dithers ,
int alignment )
static void gradfun_filter_line_mmxext ( uint8_t * dst , const uint8_t * src ,
const uint16_t * dc ,
int width , int thresh ,
const uint16_t * dithers )
{
intptr_t x ;
if ( width & alignment ) {
x = width & ~ alignment ;
if ( width & 3 ) {
x = width & ~ 3 ;
ff_gradfun_filter_line_c ( dst + x , src + x , dc + x / 2 ,
width - x , thresh , dithers ) ;
width = x ;
@ -57,19 +58,21 @@ static void gradfun_filter_line(uint8_t *dst, const uint8_t *src, const uint16_t
thresh , dithers ) ;
}
static void gradfun_filter_line_mmxext ( uint8_t * dst , const uint8_t * src ,
const uint16_t * dc ,
int width , int thresh ,
const uint16_t * dithers )
{
gradfun_filter_line ( dst , src , dc , width , thresh , dithers , 3 ) ;
}
static void gradfun_filter_line_ssse3 ( uint8_t * dst , const uint8_t * src , const uint16_t * dc ,
int width , int thresh ,
const uint16_t * dithers )
{
gradfun_filter_line ( dst , src , dc , width , thresh , dithers , 7 ) ;
intptr_t x ;
if ( width & 7 ) {
// could be 10% faster if I somehow eliminated this
x = width & ~ 7 ;
ff_gradfun_filter_line_c ( dst + x , src + x , dc + x / 2 ,
width - x , thresh , dithers ) ;
width = x ;
}
x = - width ;
ff_gradfun_filter_line_ssse3 ( x , dst + width , src + width , dc + width / 2 ,
thresh , dithers ) ;
}
static void gradfun_blur_line_sse2 ( uint16_t * dc , uint16_t * buf , const uint16_t * buf1 ,