From 711781d7a1714ea4eb0217eb1ba04811978c43d1 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Fri, 11 Dec 2015 14:06:38 +0100 Subject: [PATCH] x86: checkasm: check for or handle missing cleanup after MMX instructions Not every asm routine is expected clear the MMX state after returning. It is however a requisite for testing floating point code in checkasm. Annotate functions requiring cleanup with declare_func_emms() and issue emms after the call. The remaining functions are checked for having a cleared MMX state after return. --- tests/checkasm/checkasm.h | 16 ++++++- tests/checkasm/h264pred.c | 8 ++-- tests/checkasm/h264qpel.c | 2 +- tests/checkasm/x86/checkasm.asm | 78 ++++++++++++++++++++++++--------- 4 files changed, 78 insertions(+), 26 deletions(-) diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index c1206e7f1e..6fc30ca0a2 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -26,6 +26,7 @@ #include #include "config.h" #include "libavutil/avstring.h" +#include "libavutil/cpu.h" #include "libavutil/lfg.h" #include "libavutil/timer.h" @@ -54,6 +55,7 @@ static av_unused void *func_ref, *func_new; /* Declare the function prototype. The first argument is the return value, the remaining * arguments are the function parameters. Naming parameters is optional. */ #define declare_func(ret, ...) declare_new(ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__) +#define declare_func_emms(cpu_flags, ret, ...) declare_new_emms(cpu_flags, ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__) /* Indicate that the current test has failed */ #define fail() checkasm_fail_func("%s:%d", av_basename(__FILE__), __LINE__) @@ -65,8 +67,12 @@ static av_unused void *func_ref, *func_new; #define call_ref(...) ((func_type *)func_ref)(__VA_ARGS__) #if ARCH_X86 && HAVE_YASM -/* Verifies that clobbered callee-saved registers are properly saved and restored */ +/* Verifies that clobbered callee-saved registers are properly saved and restored + * and that either no MMX registers are touched or emms is issued */ void checkasm_checked_call(void *func, ...); +/* Verifies that clobbered callee-saved registers are properly saved and restored + * and issues emms for asm functions which are not required to do so */ +void checkasm_checked_call_emms(void *func, ...); #if ARCH_X86_64 /* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit. @@ -81,16 +87,24 @@ void checkasm_checked_call(void *func, ...); void checkasm_stack_clobber(uint64_t clobber, ...); #define declare_new(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__)\ = (void *)checkasm_checked_call; +#define declare_new_emms(cpu_flags, ret, ...) \ + ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__) = \ + ((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms : \ + (void *)checkasm_checked_call; #define CLOB (UINT64_C(0xdeadbeefdeadbeef)) #define call_new(...) (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\ CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\ checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__)) #elif ARCH_X86_32 #define declare_new(ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call; +#define declare_new_emms(cpu_flags, ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = \ + ((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms : \ + (void *)checkasm_checked_call; #define call_new(...) checked_call(func_new, __VA_ARGS__) #endif #else #define declare_new(ret, ...) +#define declare_new_emms(cpu_flags, ret, ...) /* Call the function */ #define call_new(...) ((func_type *)func_new)(__VA_ARGS__) #endif diff --git a/tests/checkasm/h264pred.c b/tests/checkasm/h264pred.c index a1ee720fae..6dffa3484d 100644 --- a/tests/checkasm/h264pred.c +++ b/tests/checkasm/h264pred.c @@ -144,7 +144,7 @@ static void check_pred4x4(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, if (chroma_format == 1) { uint8_t *topright = buf0 + 2*16; int pred_mode; - declare_func(void, uint8_t *src, const uint8_t *topright, ptrdiff_t stride); + declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, const uint8_t *topright, ptrdiff_t stride); for (pred_mode = 0; pred_mode < 15; pred_mode++) { if (check_pred_func(h->pred4x4[pred_mode], "4x4", pred4x4_modes[codec][pred_mode])) { @@ -163,7 +163,7 @@ static void check_pred8x8(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, int codec, int chroma_format, int bit_depth) { int pred_mode; - declare_func(void, uint8_t *src, ptrdiff_t stride); + declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, ptrdiff_t stride); for (pred_mode = 0; pred_mode < 11; pred_mode++) { if (check_pred_func(h->pred8x8[pred_mode], (chroma_format == 2) ? "8x16" : "8x8", @@ -183,7 +183,7 @@ static void check_pred16x16(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, { if (chroma_format == 1) { int pred_mode; - declare_func(void, uint8_t *src, ptrdiff_t stride); + declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, ptrdiff_t stride); for (pred_mode = 0; pred_mode < 9; pred_mode++) { if (check_pred_func(h->pred16x16[pred_mode], "16x16", pred16x16_modes[codec][pred_mode])) { @@ -203,7 +203,7 @@ static void check_pred8x8l(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, { if (chroma_format == 1 && codec_ids[codec] == AV_CODEC_ID_H264) { int pred_mode; - declare_func(void, uint8_t *src, int topleft, int topright, ptrdiff_t stride); + declare_func_emms(AV_CPU_FLAG_MMXEXT, void, uint8_t *src, int topleft, int topright, ptrdiff_t stride); for (pred_mode = 0; pred_mode < 12; pred_mode++) { if (check_pred_func(h->pred8x8l[pred_mode], "8x8l", pred4x4_modes[codec][pred_mode])) { diff --git a/tests/checkasm/h264qpel.c b/tests/checkasm/h264qpel.c index 27bcc97cfc..d92f223ba1 100644 --- a/tests/checkasm/h264qpel.c +++ b/tests/checkasm/h264qpel.c @@ -55,7 +55,7 @@ void checkasm_check_h264qpel(void) LOCAL_ALIGNED_16(uint8_t, dst1, [BUF_SIZE]); H264QpelContext h; int op, bit_depth, i, j; - declare_func(void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride); + declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride); for (op = 0; op < 2; op++) { qpel_mc_func (*tab)[16] = op ? h.avg_h264_qpel_pixels_tab : h.put_h264_qpel_pixels_tab; diff --git a/tests/checkasm/x86/checkasm.asm b/tests/checkasm/x86/checkasm.asm index 94b19b66f2..147d7a7497 100644 --- a/tests/checkasm/x86/checkasm.asm +++ b/tests/checkasm/x86/checkasm.asm @@ -26,6 +26,7 @@ SECTION_RODATA error_message: db "failed to preserve register", 0 +error_message_emms: db "failed to issue emms", 0 %if ARCH_X86_64 ; just random numbers to reduce the chance of incidental match @@ -83,11 +84,22 @@ cglobal stack_clobber, 1,2 DECLARE_REG_TMP 7 %endif +%macro report_fail 1 + mov r9, rax + mov r10, rdx + lea r0, [%1] + xor eax, eax + call fail_func + mov rdx, r10 + mov rax, r9 +%endmacro + ;----------------------------------------------------------------------------- ; void checkasm_checked_call(void *func, ...) ;----------------------------------------------------------------------------- INIT_XMM -cglobal checked_call, 2,15,16,max_args*8+8 +%macro check_call 0-1 +cglobal checked_call%1, 2,15,16,max_args*8+8 mov t0, r0 ; All arguments have been pushed on the stack instead of registers in order to @@ -154,16 +166,22 @@ cglobal checked_call, 2,15,16,max_args*8+8 ; Call fail_func() with a descriptive message to mark it as a failure ; if the called function didn't preserve all callee-saved registers. ; Save the return value located in rdx:rax first to prevent clobbering. - jz .ok - mov r9, rax - mov r10, rdx - lea r0, [error_message] - xor eax, eax - call fail_func - mov rdx, r10 - mov rax, r9 -.ok: + jz .clobber_ok + report_fail error_message +.clobber_ok: +%ifnid %1, _emms + fstenv [rsp] + mov r9h, [rsp + 8] + add r9h, 1 + jz .emms_ok + report_fail error_message_emms + emms +.emms_ok: +%else + emms +%endif RET +%endmacro %else @@ -173,10 +191,21 @@ cglobal checked_call, 2,15,16,max_args*8+8 %define n5 dword 0xb78d0d1d %define n6 dword 0x33627ba7 +%macro report_fail 1 + mov r3, eax + mov r4, edx + lea r0, [%1] + mov [esp], r0 + call fail_func + mov edx, r4 + mov eax, r3 +%endmacro + +%macro check_call 0-1 ;----------------------------------------------------------------------------- ; void checkasm_checked_call(void *func, ...) ;----------------------------------------------------------------------------- -cglobal checked_call, 1,7 +cglobal checked_call%1, 1,7 mov r3, n3 mov r4, n4 mov r5, n5 @@ -192,16 +221,25 @@ cglobal checked_call, 1,7 or r3, r4 or r5, r6 or r3, r5 - jz .ok - mov r3, eax - mov r4, edx - lea r0, [error_message] - mov [esp], r0 - call fail_func - mov edx, r4 - mov eax, r3 -.ok: + jz .clobber_ok + report_fail error_message +.clobber_ok: +%ifnid %1, _emms + fstenv [rsp] + mov r3h, [rsp + 8] + add r3h, 1 + jz .emms_ok + report_fail error_message_emms + emms +.emms_ok: +%else + emms +%endif add esp, max_args*4 REP_RET +%endmacro %endif ; ARCH_X86_64 + +check_call +check_call _emms