From 1f6d86991f191568d45484f2b3740c2dcd0a7b45 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Wed, 3 Oct 2012 16:46:17 +0200 Subject: [PATCH 1/4] x86: Add YASM implementations of cpuid and xgetbv from x264 This allows detecting CPU features with builds that have neither gcc inline assembly nor the right compiler intrinsics enabled. --- libavutil/x86/Makefile | 3 +- libavutil/x86/cpu.c | 17 +++++++- libavutil/x86/cpu.h | 4 ++ libavutil/x86/cpuid.asm | 91 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 libavutil/x86/cpuid.asm diff --git a/libavutil/x86/Makefile b/libavutil/x86/Makefile index 4546353669..3dd696c26a 100644 --- a/libavutil/x86/Makefile +++ b/libavutil/x86/Makefile @@ -1,4 +1,5 @@ OBJS += x86/cpu.o \ x86/float_dsp_init.o \ -YASM-OBJS += x86/float_dsp.o \ +YASM-OBJS += x86/cpuid.o \ + x86/float_dsp.o \ diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index 5de60147c6..fb1dd299bc 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -22,10 +22,21 @@ #include #include + #include "libavutil/x86/asm.h" +#include "libavutil/x86/cpu.h" #include "libavutil/cpu.h" -#if HAVE_INLINE_ASM +#if HAVE_YASM + +#define cpuid(index, eax, ebx, ecx, edx) \ + ff_cpu_cpuid(index, &eax, &ebx, &ecx, &edx) + +#define xgetbv(index, eax, edx) \ + ff_cpu_xgetbv(index, &eax, &edx) + +#elif HAVE_INLINE_ASM + /* ebx saving is necessary for PIC. gcc seems unable to see it alone */ #define cpuid(index, eax, ebx, ecx, edx) \ __asm__ volatile ( \ @@ -90,6 +101,10 @@ #define cpuid_test() 1 +#elif HAVE_YASM + +#define cpuid_test ff_cpu_cpuid_test + #elif HAVE_INLINE_ASM || HAVE_RWEFLAGS static int cpuid_test(void) diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h index e14cb57416..e4f6f0bd5a 100644 --- a/libavutil/x86/cpu.h +++ b/libavutil/x86/cpu.h @@ -54,4 +54,8 @@ #define INLINE_AVX(flags) CPUEXT(flags, _INLINE, AVX) #define INLINE_FMA4(flags) CPUEXT(flags, _INLINE, FMA4) +void ff_cpu_cpuid(int index, int *eax, int *ebx, int *ecx, int *edx); +void ff_cpu_xgetbv(int op, int *eax, int *edx); +int ff_cpu_cpuid_test(void); + #endif /* AVUTIL_X86_CPU_H */ diff --git a/libavutil/x86/cpuid.asm b/libavutil/x86/cpuid.asm new file mode 100644 index 0000000000..d2ac1f01dc --- /dev/null +++ b/libavutil/x86/cpuid.asm @@ -0,0 +1,91 @@ +;***************************************************************************** +;* Copyright (C) 2005-2010 x264 project +;* +;* Authors: Loren Merritt +;* Jason Garrett-Glaser +;* +;* This file is part of Libav. +;* +;* Libav is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* Libav is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with Libav; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "x86inc.asm" + +SECTION .text + +;----------------------------------------------------------------------------- +; void ff_cpu_cpuid(int index, int *eax, int *ebx, int *ecx, int *edx) +;----------------------------------------------------------------------------- +cglobal cpu_cpuid, 5,7 + push rbx + push r4 + push r3 + push r2 + push r1 + mov eax, r0d + xor ecx, ecx + cpuid + pop r4 + mov [r4], eax + pop r4 + mov [r4], ebx + pop r4 + mov [r4], ecx + pop r4 + mov [r4], edx + pop rbx + RET + +;----------------------------------------------------------------------------- +; void ff_cpu_xgetbv(int op, int *eax, int *edx) +;----------------------------------------------------------------------------- +cglobal cpu_xgetbv, 3,7 + push r2 + push r1 + mov ecx, r0d + xgetbv + pop r4 + mov [r4], eax + pop r4 + mov [r4], edx + RET + +%if ARCH_X86_64 == 0 +;----------------------------------------------------------------------------- +; int ff_cpu_cpuid_test(void) +; return 0 if unsupported +;----------------------------------------------------------------------------- +cglobal cpu_cpuid_test + pushfd + push ebx + push ebp + push esi + push edi + pushfd + pop eax + mov ebx, eax + xor eax, 0x200000 + push eax + popfd + pushfd + pop eax + xor eax, ebx + pop edi + pop esi + pop ebp + pop ebx + popfd + ret +%endif From f6fbce761ea697f437cef721ec6711ffcbd1ac1f Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Wed, 3 Oct 2012 17:57:53 +0200 Subject: [PATCH 2/4] x86: Drop CPU detection intrinsics Now that there is CPU detection in YASM, there will always be one of inline or external assembly enabled, which obviates the need to fall back on CPU detection through compiler intrinsics. --- configure | 6 ------ libavutil/x86/cpu.c | 36 ------------------------------------ 2 files changed, 42 deletions(-) diff --git a/configure b/configure index 4c3ebdb281..cd72f36376 100755 --- a/configure +++ b/configure @@ -1138,7 +1138,6 @@ HAVE_LIST=" cbrtf closesocket cmov - cpuid cpunop dcbzl dev_bktr_ioctl_bt848_h @@ -1206,7 +1205,6 @@ HAVE_LIST=" rint round roundf - rweflags sched_getaffinity sdl sdl_video_size @@ -1250,7 +1248,6 @@ HAVE_LIST=" windows_h winsock2_h xform_asm - xgetbv xmm_clobbers " @@ -3070,10 +3067,7 @@ elif enabled sparc; then elif enabled x86; then - check_code ld immintrin.h "return __xgetbv(0)" && enable xgetbv - check_code ld intrin.h "int info[4]; __cpuid(info, 0)" && enable cpuid check_code ld intrin.h "__rdtsc()" && enable rdtsc - check_code ld intrin.h "unsigned int x = __readeflags()" && enable rweflags check_code ld mmintrin.h "_mm_empty()" && enable mm_empty diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index fb1dd299bc..5b658d1bed 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -45,35 +45,9 @@ "xchg %%"REG_b", %%"REG_S \ : "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) \ : "0" (index)) -#elif HAVE_CPUID -#include -#define cpuid(index, eax, ebx, ecx, edx) \ - do { \ - int info[4]; \ - __cpuid(info, index); \ - eax = info[0]; \ - ebx = info[1]; \ - ecx = info[2]; \ - edx = info[3]; \ - } while (0) -#endif /* HAVE_CPUID */ - -#if HAVE_INLINE_ASM #define xgetbv(index, eax, edx) \ __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (index)) -#elif HAVE_XGETBV -#include - -#define xgetbv(index, eax, edx) \ - do { \ - uint64_t res = __xgetbv(index); \ - eax = res; \ - edx = res >> 32; \ - } while (0) -#endif /* HAVE_XGETBV */ - -#if HAVE_INLINE_ASM #define get_eflags(x) \ __asm__ volatile ("pushfl \n" \ @@ -85,16 +59,6 @@ "popfl \n" \ :: "r"(x)) -#elif HAVE_RWEFLAGS - -#include - -#define get_eflags(x) \ - x = __readeflags() - -#define set_eflags(x) \ - __writeeflags(x) - #endif /* HAVE_INLINE_ASM */ #if ARCH_X86_64 From a7329e5fc22433dfeaf7af22fb40fe3cada21385 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Wed, 5 Sep 2012 20:49:28 +0200 Subject: [PATCH 3/4] x86: get_cpu_flags: add necessary ifdefs around function body ff_get_cpu_flags_x86() requires cpuid(), which is conditionally defined elsewhere in the file. Surrounding the function body with ifdefs allows building even when cpuid is not defined. An empty cpuflags mask is returned in this case. --- libavutil/x86/cpu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index 5b658d1bed..dab2cac0cb 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -89,6 +89,9 @@ static int cpuid_test(void) int ff_get_cpu_flags_x86(void) { int rval = 0; + +#ifdef cpuid + int eax, ebx, ecx, edx; int max_std_level, max_ext_level, std_caps = 0, ext_caps = 0; int family = 0, model = 0; @@ -192,5 +195,7 @@ int ff_get_cpu_flags_x86(void) rval |= AV_CPU_FLAG_ATOM; } +#endif /* cpuid */ + return rval; } From f6f7d1504134683c435e2c7d804279d982e52bb4 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Wed, 3 Oct 2012 16:25:14 -0700 Subject: [PATCH 4/4] h264: don't touch H264Context->ref_count[] during MB decoding The variable is copied to subsequent threads at the same time, so this may cause wrong ref_count[] values to be copied to subsequent threads. This bug was found using TSAN. Signed-off-by: Luca Barbato --- libavcodec/h264_cabac.c | 41 ++++++++++++++++------------------------- libavcodec/h264_cavlc.c | 33 +++++++++++++-------------------- 2 files changed, 29 insertions(+), 45 deletions(-) diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c index f2fea5d3f2..92c1c03740 100644 --- a/libavcodec/h264_cabac.c +++ b/libavcodec/h264_cabac.c @@ -2005,11 +2005,6 @@ decode_intra_mb: return 0; } - if(MB_MBAFF){ - h->ref_count[0] <<= 1; - h->ref_count[1] <<= 1; - } - fill_decode_caches(h, mb_type); if( IS_INTRA( mb_type ) ) { @@ -2078,10 +2073,11 @@ decode_intra_mb: for( i = 0; i < 4; i++ ) { if(IS_DIRECT(h->sub_mb_type[i])) continue; if(IS_DIR(h->sub_mb_type[i], 0, list)){ - if( h->ref_count[list] > 1 ){ + int rc = h->ref_count[list] << MB_MBAFF; + if (rc > 1) { ref[list][i] = decode_cabac_mb_ref( h, list, 4*i ); - if(ref[list][i] >= (unsigned)h->ref_count[list]){ - av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]); + if (ref[list][i] >= (unsigned) rc) { + av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], rc); return -1; } }else @@ -2163,11 +2159,11 @@ decode_intra_mb: if(IS_16X16(mb_type)){ for(list=0; listlist_count; list++){ if(IS_DIR(mb_type, 0, list)){ - int ref; - if(h->ref_count[list] > 1){ + int ref, rc = h->ref_count[list] << MB_MBAFF; + if (rc > 1) { ref= decode_cabac_mb_ref(h, list, 0); - if(ref >= (unsigned)h->ref_count[list]){ - av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]); + if (ref >= (unsigned) rc) { + av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, rc); return -1; } }else @@ -2191,11 +2187,11 @@ decode_intra_mb: for(list=0; listlist_count; list++){ for(i=0; i<2; i++){ if(IS_DIR(mb_type, i, list)){ - int ref; - if(h->ref_count[list] > 1){ + int ref, rc = h->ref_count[list] << MB_MBAFF; + if (rc > 1) { ref= decode_cabac_mb_ref( h, list, 8*i ); - if(ref >= (unsigned)h->ref_count[list]){ - av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]); + if (ref >= (unsigned) rc) { + av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, rc); return -1; } }else @@ -2226,11 +2222,11 @@ decode_intra_mb: for(list=0; listlist_count; list++){ for(i=0; i<2; i++){ if(IS_DIR(mb_type, i, list)){ //FIXME optimize - int ref; - if(h->ref_count[list] > 1){ + int ref, rc = h->ref_count[list] << MB_MBAFF; + if (rc > 1) { ref= decode_cabac_mb_ref( h, list, 4*i ); - if(ref >= (unsigned)h->ref_count[list]){ - av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]); + if (ref >= (unsigned) rc) { + av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, rc); return -1; } }else @@ -2403,10 +2399,5 @@ decode_intra_mb: s->current_picture.f.qscale_table[mb_xy] = s->qscale; write_back_non_zero_count(h); - if(MB_MBAFF){ - h->ref_count[0] >>= 1; - h->ref_count[1] >>= 1; - } - return 0; } diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c index c4159e241c..8996057c31 100644 --- a/libavcodec/h264_cavlc.c +++ b/libavcodec/h264_cavlc.c @@ -784,11 +784,6 @@ decode_intra_mb: return 0; } - if(MB_MBAFF){ - h->ref_count[0] <<= 1; - h->ref_count[1] <<= 1; - } - fill_decode_neighbors(h, mb_type); fill_decode_caches(h, mb_type); @@ -868,7 +863,7 @@ decode_intra_mb: } for(list=0; listlist_count; list++){ - int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list]; + int ref_count = IS_REF0(mb_type) ? 1 : h->ref_count[list] << MB_MBAFF; for(i=0; i<4; i++){ if(IS_DIRECT(h->sub_mb_type[i])) continue; if(IS_DIR(h->sub_mb_type[i], 0, list)){ @@ -948,13 +943,14 @@ decode_intra_mb: for(list=0; listlist_count; list++){ unsigned int val; if(IS_DIR(mb_type, 0, list)){ - if(h->ref_count[list]==1){ + int rc = h->ref_count[list] << MB_MBAFF; + if (rc == 1) { val= 0; - }else if(h->ref_count[list]==2){ + } else if (rc == 2) { val= get_bits1(&s->gb)^1; }else{ val= get_ue_golomb_31(&s->gb); - if(val >= h->ref_count[list]){ + if (val >= rc) { av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val); return -1; } @@ -978,13 +974,14 @@ decode_intra_mb: for(i=0; i<2; i++){ unsigned int val; if(IS_DIR(mb_type, i, list)){ - if(h->ref_count[list] == 1){ + int rc = h->ref_count[list] << MB_MBAFF; + if (rc == 1) { val= 0; - }else if(h->ref_count[list] == 2){ + } else if (rc == 2) { val= get_bits1(&s->gb)^1; }else{ val= get_ue_golomb_31(&s->gb); - if(val >= h->ref_count[list]){ + if (val >= rc) { av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val); return -1; } @@ -1015,13 +1012,14 @@ decode_intra_mb: for(i=0; i<2; i++){ unsigned int val; if(IS_DIR(mb_type, i, list)){ //FIXME optimize - if(h->ref_count[list]==1){ + int rc = h->ref_count[list] << MB_MBAFF; + if (rc == 1) { val= 0; - }else if(h->ref_count[list]==2){ + } else if (rc == 2) { val= get_bits1(&s->gb)^1; }else{ val= get_ue_golomb_31(&s->gb); - if(val >= h->ref_count[list]){ + if (val >= rc) { av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val); return -1; } @@ -1180,10 +1178,5 @@ decode_intra_mb: s->current_picture.f.qscale_table[mb_xy] = s->qscale; write_back_non_zero_count(h); - if(MB_MBAFF){ - h->ref_count[0] >>= 1; - h->ref_count[1] >>= 1; - } - return 0; }