@ -26,16 +26,15 @@
# include "libavutil/cpu.h"
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
# define cpuid(index,eax,ebx,ecx,edx)\
__asm__ volatile \
( " mov %% " REG_b " , %% " REG_S " \n \t " \
" cpuid \n \t " \
" xchg %% " REG_b " , %% " REG_S \
: " =a " ( eax ) , " =S " ( ebx ) , \
" =c " ( ecx ) , " =d " ( edx ) \
: " 0 " ( index ) ) ;
# define xgetbv(index,eax,edx) \
# define cpuid(index, eax, ebx, ecx, edx) \
__asm__ volatile ( \
" mov %% " REG_b " , %% " REG_S " \n \t " \
" cpuid \n \t " \
" xchg %% " REG_b " , %% " REG_S \
: " =a " ( eax ) , " =S " ( ebx ) , " =c " ( ecx ) , " =d " ( edx ) \
: " 0 " ( index ) )
# define xgetbv(index, eax, edx) \
__asm__ ( " .byte 0x0f, 0x01, 0xd0 " : " =a " ( eax ) , " =d " ( edx ) : " c " ( index ) )
/* Function to test if multimedia instructions are supported... */
@ -43,8 +42,8 @@ int ff_get_cpu_flags_x86(void)
{
int rval = 0 ;
int eax , ebx , ecx , edx ;
int max_std_level , max_ext_level , std_caps = 0 , ext_caps = 0 ;
int family = 0 , model = 0 ;
int max_std_level , max_ext_level , std_caps = 0 , ext_caps = 0 ;
int family = 0 , model = 0 ;
union { int i [ 3 ] ; char c [ 12 ] ; } vendor ;
# if ARCH_X86_32
@ -79,19 +78,20 @@ int ff_get_cpu_flags_x86(void)
vendor . i [ 1 ] = edx ;
vendor . i [ 2 ] = ecx ;
if ( max_std_level > = 1 ) {
if ( max_std_level > = 1 ) {
cpuid ( 1 , eax , ebx , ecx , std_caps ) ;
family = ( ( eax > > 8 ) & 0xf ) + ( ( eax > > 20 ) & 0xff ) ;
model = ( ( eax > > 4 ) & 0xf ) + ( ( eax > > 12 ) & 0xf0 ) ;
family = ( ( eax > > 8 ) & 0xf ) + ( ( eax > > 20 ) & 0xff ) ;
model = ( ( eax > > 4 ) & 0xf ) + ( ( eax > > 12 ) & 0xf0 ) ;
if ( std_caps & ( 1 < < 15 ) )
rval | = AV_CPU_FLAG_CMOV ;
if ( std_caps & ( 1 < < 23 ) )
if ( std_caps & ( 1 < < 23 ) )
rval | = AV_CPU_FLAG_MMX ;
if ( std_caps & ( 1 < < 25 ) )
rval | = AV_CPU_FLAG_MMX2
if ( std_caps & ( 1 < < 25 ) )
rval | = AV_CPU_FLAG_MMX2 ;
# if HAVE_SSE
| AV_CPU_FLAG_SSE ;
if ( std_caps & ( 1 < < 26 ) )
if ( std_caps & ( 1 < < 25 ) )
rval | = AV_CPU_FLAG_SSE ;
if ( std_caps & ( 1 < < 26 ) )
rval | = AV_CPU_FLAG_SSE2 ;
if ( ecx & 1 )
rval | = AV_CPU_FLAG_SSE3 ;
@ -111,20 +111,19 @@ int ff_get_cpu_flags_x86(void)
}
# endif
# endif
;
}
cpuid ( 0x80000000 , max_ext_level , ebx , ecx , edx ) ;
if ( max_ext_level > = 0x80000001 ) {
if ( max_ext_level > = 0x80000001 ) {
cpuid ( 0x80000001 , eax , ebx , ecx , ext_caps ) ;
if ( ext_caps & ( 1U < < 31 ) )
if ( ext_caps & ( 1U < < 31 ) )
rval | = AV_CPU_FLAG_3DNOW ;
if ( ext_caps & ( 1 < < 30 ) )
if ( ext_caps & ( 1 < < 30 ) )
rval | = AV_CPU_FLAG_3DNOWEXT ;
if ( ext_caps & ( 1 < < 23 ) )
if ( ext_caps & ( 1 < < 23 ) )
rval | = AV_CPU_FLAG_MMX ;
if ( ext_caps & ( 1 < < 22 ) )
if ( ext_caps & ( 1 < < 22 ) )
rval | = AV_CPU_FLAG_MMX2 ;
/* Allow for selectively disabling SSE2 functions on AMD processors
@ -151,14 +150,17 @@ int ff_get_cpu_flags_x86(void)
if ( ! strncmp ( vendor . c , " GenuineIntel " , 12 ) ) {
if ( family = = 6 & & ( model = = 9 | | model = = 13 | | model = = 14 ) ) {
/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah")
* theoretically support sse2 , but it ' s usually slower than mmx ,
* so let ' s just pretend they don ' t . AV_CPU_FLAG_SSE2 is disabled and
* AV_CPU_FLAG_SSE2SLOW is enabled so that SSE2 is not used unless
* explicitly enabled by checking AV_CPU_FLAG_SSE2SLOW . The same
* situation applies for AV_CPU_FLAG_SSE3 and AV_CPU_FLAG_SSE3SLOW . */
if ( rval & AV_CPU_FLAG_SSE2 ) rval ^ = AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE2 ;
if ( rval & AV_CPU_FLAG_SSE3 ) rval ^ = AV_CPU_FLAG_SSE3SLOW | AV_CPU_FLAG_SSE3 ;
/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
* 6 / 14 ( core1 " yonah " ) theoretically support sse2 , but it ' s
* usually slower than mmx , so let ' s just pretend they don ' t .
* AV_CPU_FLAG_SSE2 is disabled and AV_CPU_FLAG_SSE2SLOW is
* enabled so that SSE2 is not used unless explicitly enabled
* by checking AV_CPU_FLAG_SSE2SLOW . The same situation
* applies for AV_CPU_FLAG_SSE3 and AV_CPU_FLAG_SSE3SLOW . */
if ( rval & AV_CPU_FLAG_SSE2 )
rval ^ = AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE2 ;
if ( rval & AV_CPU_FLAG_SSE3 )
rval ^ = AV_CPU_FLAG_SSE3SLOW | AV_CPU_FLAG_SSE3 ;
}
/* The Atom processor has SSSE3 support, which is useful in many cases,
* but sometimes the SSSE3 version is slower than the SSE2 equivalent