From 3b345d389be2d67017f904caa21713f53a8e8c90 Mon Sep 17 00:00:00 2001 From: James Almer Date: Wed, 27 Sep 2017 23:10:09 -0300 Subject: [PATCH] avutil/cpu: split flag checks per arch in av_cpu_max_align() Signed-off-by: James Almer --- libavutil/aarch64/cpu.c | 10 ++++++++++ libavutil/arm/cpu.c | 10 ++++++++++ libavutil/cpu.c | 39 ++++++++------------------------------- libavutil/cpu_internal.h | 5 +++++ libavutil/ppc/cpu.c | 12 ++++++++++++ libavutil/x86/cpu.c | 27 +++++++++++++++++++++++++++ 6 files changed, 72 insertions(+), 31 deletions(-) diff --git a/libavutil/aarch64/cpu.c b/libavutil/aarch64/cpu.c index 8ef077aaea..cc641da576 100644 --- a/libavutil/aarch64/cpu.c +++ b/libavutil/aarch64/cpu.c @@ -26,3 +26,13 @@ int ff_get_cpu_flags_aarch64(void) AV_CPU_FLAG_NEON * HAVE_NEON | AV_CPU_FLAG_VFP * HAVE_VFP; } + +size_t ff_get_cpu_max_align_aarch64(void) +{ + int flags = av_get_cpu_flags(); + + if (flags & AV_CPU_FLAG_NEON) + return 16; + + return 8; +} diff --git a/libavutil/arm/cpu.c b/libavutil/arm/cpu.c index 3889ef011c..81e85e2525 100644 --- a/libavutil/arm/cpu.c +++ b/libavutil/arm/cpu.c @@ -158,3 +158,13 @@ int ff_get_cpu_flags_arm(void) } #endif + +size_t ff_get_cpu_max_align_arm(void) +{ + int flags = av_get_cpu_flags(); + + if (flags & AV_CPU_FLAG_NEON) + return 16; + + return 8; +} diff --git a/libavutil/cpu.c b/libavutil/cpu.c index ab04494acf..c8401b8258 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -304,37 +304,14 @@ int av_cpu_count(void) size_t av_cpu_max_align(void) { - int av_unused flags = av_get_cpu_flags(); - -#if ARCH_ARM || ARCH_AARCH64 - if (flags & AV_CPU_FLAG_NEON) - return 16; -#elif ARCH_PPC - if (flags & (AV_CPU_FLAG_ALTIVEC | - AV_CPU_FLAG_VSX | - AV_CPU_FLAG_POWER8)) - return 16; -#elif ARCH_X86 - if (flags & (AV_CPU_FLAG_AVX2 | - AV_CPU_FLAG_AVX | - AV_CPU_FLAG_XOP | - AV_CPU_FLAG_FMA4 | - AV_CPU_FLAG_FMA3 | - AV_CPU_FLAG_AVXSLOW)) - return 32; - if (flags & (AV_CPU_FLAG_AESNI | - AV_CPU_FLAG_SSE42 | - AV_CPU_FLAG_SSE4 | - AV_CPU_FLAG_SSSE3 | - AV_CPU_FLAG_SSE3 | - AV_CPU_FLAG_SSE2 | - AV_CPU_FLAG_SSE | - AV_CPU_FLAG_ATOM | - AV_CPU_FLAG_SSSE3SLOW | - AV_CPU_FLAG_SSE3SLOW | - AV_CPU_FLAG_SSE2SLOW)) - return 16; -#endif + if (ARCH_AARCH64) + return ff_get_cpu_max_align_aarch64(); + if (ARCH_ARM) + return ff_get_cpu_max_align_arm(); + if (ARCH_PPC) + return ff_get_cpu_max_align_ppc(); + if (ARCH_X86) + return ff_get_cpu_max_align_x86(); return 8; } diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h index 6c352abe1b..b8bf1e5396 100644 --- a/libavutil/cpu_internal.h +++ b/libavutil/cpu_internal.h @@ -44,4 +44,9 @@ int ff_get_cpu_flags_arm(void); int ff_get_cpu_flags_ppc(void); int ff_get_cpu_flags_x86(void); +size_t ff_get_cpu_max_align_aarch64(void); +size_t ff_get_cpu_max_align_arm(void); +size_t ff_get_cpu_max_align_ppc(void); +size_t ff_get_cpu_max_align_x86(void); + #endif /* AVUTIL_CPU_INTERNAL_H */ diff --git a/libavutil/ppc/cpu.c b/libavutil/ppc/cpu.c index 0f1e982624..7bb7cd813c 100644 --- a/libavutil/ppc/cpu.c +++ b/libavutil/ppc/cpu.c @@ -148,3 +148,15 @@ out: #endif /* HAVE_ALTIVEC */ return 0; } + +size_t ff_get_cpu_max_align_ppc(void) +{ + int flags = av_get_cpu_flags(); + + if (flags & (AV_CPU_FLAG_ALTIVEC | + AV_CPU_FLAG_VSX | + AV_CPU_FLAG_POWER8)) + return 16; + + return 8; +} diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index 3800a11ad8..f33088c8c7 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -233,3 +233,30 @@ int ff_get_cpu_flags_x86(void) return rval; } + +size_t ff_get_cpu_max_align_x86(void) +{ + int flags = av_get_cpu_flags(); + + if (flags & (AV_CPU_FLAG_AVX2 | + AV_CPU_FLAG_AVX | + AV_CPU_FLAG_XOP | + AV_CPU_FLAG_FMA4 | + AV_CPU_FLAG_FMA3 | + AV_CPU_FLAG_AVXSLOW)) + return 32; + if (flags & (AV_CPU_FLAG_AESNI | + AV_CPU_FLAG_SSE42 | + AV_CPU_FLAG_SSE4 | + AV_CPU_FLAG_SSSE3 | + AV_CPU_FLAG_SSE3 | + AV_CPU_FLAG_SSE2 | + AV_CPU_FLAG_SSE | + AV_CPU_FLAG_ATOM | + AV_CPU_FLAG_SSSE3SLOW | + AV_CPU_FLAG_SSE3SLOW | + AV_CPU_FLAG_SSE2SLOW)) + return 16; + + return 8; +}