From eba586b0d9f0546c7c9c965edb71e7b29721217d Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Mon, 21 Mar 2011 23:32:40 -0400 Subject: [PATCH] Add a CPU flag for the Atom processor. The Atom has SSSE3 support, which is useful in many cases, but sometimes the SSSE3 version is slower than the SSE2 equivalent on the Atom, but is generally faster on other processors supporting SSSE3. This flag allows for selectively disabling certain SSSE3 functions on the Atom. --- libavutil/cpu.c | 1 + libavutil/cpu.h | 1 + libavutil/x86/cpu.c | 12 ++++++++++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/libavutil/cpu.c b/libavutil/cpu.c index eba067a91a..ddccd000bc 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -58,6 +58,7 @@ int main(void) cpu_flags & AV_CPU_FLAG_SSE3 ? "SSE3 " : "", cpu_flags & AV_CPU_FLAG_SSE3SLOW ? "SSE3(slow) " : "", cpu_flags & AV_CPU_FLAG_SSSE3 ? "SSSE3 " : "", + cpu_flags & AV_CPU_FLAG_ATOM ? "Atom " : "", cpu_flags & AV_CPU_FLAG_SSE4 ? "SSE4.1 " : "", cpu_flags & AV_CPU_FLAG_SSE42 ? "SSE4.2 " : "", cpu_flags & AV_CPU_FLAG_AVX ? "AVX " : "", diff --git a/libavutil/cpu.h b/libavutil/cpu.h index 3a87fc0506..11ba368678 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -34,6 +34,7 @@ #define AV_CPU_FLAG_SSE3 0x0040 ///< Prescott SSE3 functions #define AV_CPU_FLAG_SSE3SLOW 0x20000000 ///< SSE3 supported, but usually not faster #define AV_CPU_FLAG_SSSE3 0x0080 ///< Conroe SSSE3 functions +#define AV_CPU_FLAG_ATOM 0x10000000 ///< Atom processor, some SSSE3 instructions are slower #define AV_CPU_FLAG_SSE4 0x0100 ///< Penryn SSE4.1 functions #define AV_CPU_FLAG_SSE42 0x0200 ///< Nehalem SSE4.2 functions #define AV_CPU_FLAG_AVX 0x4000 ///< AVX functions: requires OS support even if YMM registers aren't used diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index c11956d3c3..4bc56912b5 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -135,8 +135,8 @@ int ff_get_cpu_flags_x86(void) } } - if (!strncmp(vendor.c, "GenuineIntel", 12) && - family == 6 && (model == 9 || model == 13 || model == 14)) { + if (!strncmp(vendor.c, "GenuineIntel", 12)) { + if (family == 6 && (model == 9 || model == 13 || model == 14)) { /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah") * theoretically support sse2, but it's usually slower than mmx, * so let's just pretend they don't. AV_CPU_FLAG_SSE2 is disabled and @@ -145,6 +145,14 @@ int ff_get_cpu_flags_x86(void) * situation applies for AV_CPU_FLAG_SSE3 and AV_CPU_FLAG_SSE3SLOW. */ if (rval & AV_CPU_FLAG_SSE2) rval ^= AV_CPU_FLAG_SSE2SLOW|AV_CPU_FLAG_SSE2; if (rval & AV_CPU_FLAG_SSE3) rval ^= AV_CPU_FLAG_SSE3SLOW|AV_CPU_FLAG_SSE3; + } + /* The Atom processor has SSSE3 support, which is useful in many cases, + * but sometimes the SSSE3 version is slower than the SSE2 equivalent + * on the Atom, but is generally faster on other processors supporting + * SSSE3. This flag allows for selectively disabling certain SSSE3 + * functions on the Atom. */ + if (family == 6 && model == 28) + rval |= AV_CPU_FLAG_ATOM; } return rval;