diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index 73b7e429a4..e41ef67547 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -26,6 +26,7 @@ #include #include "config.h" #include "libavutil/avstring.h" +#include "libavutil/cpu.h" #include "libavutil/lfg.h" #include "libavutil/timer.h" @@ -58,6 +59,7 @@ static av_unused void *func_ref, *func_new; /* Declare the function prototype. The first argument is the return value, the remaining * arguments are the function parameters. Naming parameters is optional. */ #define declare_func(ret, ...) declare_new(ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__) +#define declare_func_emms(cpu_flags, ret, ...) declare_new_emms(cpu_flags, ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__) /* Indicate that the current test has failed */ #define fail() checkasm_fail_func("%s:%d", av_basename(__FILE__), __LINE__) @@ -69,8 +71,12 @@ static av_unused void *func_ref, *func_new; #define call_ref(...) ((func_type *)func_ref)(__VA_ARGS__) #if ARCH_X86 && HAVE_YASM -/* Verifies that clobbered callee-saved registers are properly saved and restored */ +/* Verifies that clobbered callee-saved registers are properly saved and restored + * and that either no MMX registers are touched or emms is issued */ void checkasm_checked_call(void *func, ...); +/* Verifies that clobbered callee-saved registers are properly saved and restored + * and issues emms for asm functions which are not required to do so */ +void checkasm_checked_call_emms(void *func, ...); #if ARCH_X86_64 /* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit. @@ -85,16 +91,24 @@ void checkasm_checked_call(void *func, ...); void checkasm_stack_clobber(uint64_t clobber, ...); #define declare_new(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__)\ = (void *)checkasm_checked_call; +#define declare_new_emms(cpu_flags, ret, ...) \ + ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__) = \ + ((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms : \ + (void *)checkasm_checked_call; #define CLOB (UINT64_C(0xdeadbeefdeadbeef)) #define call_new(...) (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\ CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\ checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__)) #elif ARCH_X86_32 #define declare_new(ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call; +#define declare_new_emms(cpu_flags, ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = \ + ((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms : \ + (void *)checkasm_checked_call; #define call_new(...) checked_call(func_new, __VA_ARGS__) #endif #else #define declare_new(ret, ...) +#define declare_new_emms(cpu_flags, ret, ...) /* Call the function */ #define call_new(...) ((func_type *)func_new)(__VA_ARGS__) #endif diff --git a/tests/checkasm/h264pred.c b/tests/checkasm/h264pred.c index ad1b02758d..4028405006 100644 --- a/tests/checkasm/h264pred.c +++ b/tests/checkasm/h264pred.c @@ -144,7 +144,7 @@ static void check_pred4x4(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, if (chroma_format == 1) { uint8_t *topright = buf0 + 2*16; int pred_mode; - declare_func(void, uint8_t *src, const uint8_t *topright, ptrdiff_t stride); + declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, const uint8_t *topright, ptrdiff_t stride); for (pred_mode = 0; pred_mode < 15; pred_mode++) { if (check_pred_func(h->pred4x4[pred_mode], "4x4", pred4x4_modes[codec][pred_mode])) { @@ -163,7 +163,7 @@ static void check_pred8x8(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, int codec, int chroma_format, int bit_depth) { int pred_mode; - declare_func(void, uint8_t *src, ptrdiff_t stride); + declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, ptrdiff_t stride); for (pred_mode = 0; pred_mode < 11; pred_mode++) { if (check_pred_func(h->pred8x8[pred_mode], (chroma_format == 2) ? "8x16" : "8x8", @@ -183,7 +183,7 @@ static void check_pred16x16(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, { if (chroma_format == 1) { int pred_mode; - declare_func(void, uint8_t *src, ptrdiff_t stride); + declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, ptrdiff_t stride); for (pred_mode = 0; pred_mode < 9; pred_mode++) { if (check_pred_func(h->pred16x16[pred_mode], "16x16", pred16x16_modes[codec][pred_mode])) { @@ -203,7 +203,7 @@ static void check_pred8x8l(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, { if (chroma_format == 1 && codec_ids[codec] == AV_CODEC_ID_H264) { int pred_mode; - declare_func(void, uint8_t *src, int topleft, int topright, ptrdiff_t stride); + declare_func_emms(AV_CPU_FLAG_MMXEXT, void, uint8_t *src, int topleft, int topright, ptrdiff_t stride); for (pred_mode = 0; pred_mode < 12; pred_mode++) { if (check_pred_func(h->pred8x8l[pred_mode], "8x8l", pred4x4_modes[codec][pred_mode])) { diff --git a/tests/checkasm/h264qpel.c b/tests/checkasm/h264qpel.c index fb7a1db62d..ba069f1259 100644 --- a/tests/checkasm/h264qpel.c +++ b/tests/checkasm/h264qpel.c @@ -55,7 +55,7 @@ void checkasm_check_h264qpel(void) LOCAL_ALIGNED_16(uint8_t, dst1, [BUF_SIZE]); H264QpelContext h; int op, bit_depth, i, j; - declare_func(void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride); + declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride); for (op = 0; op < 2; op++) { qpel_mc_func (*tab)[16] = op ? h.avg_h264_qpel_pixels_tab : h.put_h264_qpel_pixels_tab; diff --git a/tests/checkasm/pixblockdsp.c b/tests/checkasm/pixblockdsp.c index 70e7f7409b..66bfdb7db8 100644 --- a/tests/checkasm/pixblockdsp.c +++ b/tests/checkasm/pixblockdsp.c @@ -47,7 +47,7 @@ #define check_get_pixels(type) \ do { \ int i; \ - declare_func(void, int16_t *block, const uint8_t *pixels, ptrdiff_t line_size); \ + declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *block, const uint8_t *pixels, ptrdiff_t line_size); \ \ for (i = 0; i < BUF_UNITS; i++) { \ int src_offset = i * 64 * sizeof(type) + i; /* Test various alignments */ \ @@ -64,7 +64,7 @@ #define check_diff_pixels(type) \ do { \ int i; \ - declare_func(void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, int stride); \ + declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, int stride); \ \ for (i = 0; i < BUF_UNITS; i++) { \ int src_offset = i * 64 * sizeof(type) + i; /* Test various alignments */ \ diff --git a/tests/checkasm/vp9dsp.c b/tests/checkasm/vp9dsp.c index c1e13764e2..931f7882b5 100644 --- a/tests/checkasm/vp9dsp.c +++ b/tests/checkasm/vp9dsp.c @@ -54,8 +54,8 @@ static void check_ipred(void) LOCAL_ALIGNED_32(uint8_t, dst1, [32 * 32 * 2]); VP9DSPContext dsp; int tx, mode, bit_depth; - declare_func(void, uint8_t *dst, ptrdiff_t stride, - const uint8_t *left, const uint8_t *top); + declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *top); static const char *const mode_names[N_INTRA_PRED_MODES] = { [VERT_PRED] = "vert", [HOR_PRED] = "hor", @@ -315,7 +315,7 @@ static void check_itxfm(void) LOCAL_ALIGNED_32(int16_t, coef, [32 * 32 * 2]); LOCAL_ALIGNED_32(int16_t, subcoef0, [32 * 32 * 2]); LOCAL_ALIGNED_32(int16_t, subcoef1, [32 * 32 * 2]); - declare_func(void, uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); + declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); VP9DSPContext dsp; int y, x, tx, txtp, bit_depth, sub; static const char *const txtp_types[N_TXFM_TYPES] = { @@ -553,8 +553,8 @@ static void check_mc(void) LOCAL_ALIGNED_32(uint8_t, dst1, [64 * 64 * 2]); VP9DSPContext dsp; int op, hsize, bit_depth, filter, dx, dy; - declare_func(void, uint8_t *dst, ptrdiff_t dst_stride, - const uint8_t *ref, ptrdiff_t ref_stride, + declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dst_stride, + const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my); static const char *const filter_names[4] = { "8tap_smooth", "8tap_regular", "8tap_sharp", "bilin" diff --git a/tests/checkasm/x86/checkasm.asm b/tests/checkasm/x86/checkasm.asm index 20012f8ecb..41462f1887 100644 --- a/tests/checkasm/x86/checkasm.asm +++ b/tests/checkasm/x86/checkasm.asm @@ -26,6 +26,7 @@ SECTION_RODATA error_message: db "failed to preserve register", 0 +error_message_emms: db "failed to issue emms", 0 %if ARCH_X86_64 ; just random numbers to reduce the chance of incidental match @@ -83,11 +84,22 @@ cglobal stack_clobber, 1,2 DECLARE_REG_TMP 7 %endif +%macro report_fail 1 + mov r9, rax + mov r10, rdx + lea r0, [%1] + xor eax, eax + call fail_func + mov rdx, r10 + mov rax, r9 +%endmacro + ;----------------------------------------------------------------------------- ; void checkasm_checked_call(void *func, ...) ;----------------------------------------------------------------------------- INIT_XMM -cglobal checked_call, 2,15,16,max_args*8+8 +%macro check_call 0-1 +cglobal checked_call%1, 2,15,16,max_args*8+8 mov t0, r0 ; All arguments have been pushed on the stack instead of registers in order to @@ -154,16 +166,22 @@ cglobal checked_call, 2,15,16,max_args*8+8 ; Call fail_func() with a descriptive message to mark it as a failure ; if the called function didn't preserve all callee-saved registers. ; Save the return value located in rdx:rax first to prevent clobbering. - jz .ok - mov r9, rax - mov r10, rdx - lea r0, [error_message] - xor eax, eax - call fail_func - mov rdx, r10 - mov rax, r9 -.ok: + jz .clobber_ok + report_fail error_message +.clobber_ok: +%ifnid %1, _emms + fstenv [rsp] + mov r9h, [rsp + 8] + add r9h, 1 + jz .emms_ok + report_fail error_message_emms + emms +.emms_ok: +%else + emms +%endif RET +%endmacro %else @@ -173,10 +191,21 @@ cglobal checked_call, 2,15,16,max_args*8+8 %define n5 dword 0xb78d0d1d %define n6 dword 0x33627ba7 +%macro report_fail 1 + mov r3, eax + mov r4, edx + lea r0, [%1] + mov [esp], r0 + call fail_func + mov edx, r4 + mov eax, r3 +%endmacro + +%macro check_call 0-1 ;----------------------------------------------------------------------------- ; void checkasm_checked_call(void *func, ...) ;----------------------------------------------------------------------------- -cglobal checked_call, 1,7 +cglobal checked_call%1, 1,7 mov r3, n3 mov r4, n4 mov r5, n5 @@ -192,16 +221,25 @@ cglobal checked_call, 1,7 or r3, r4 or r5, r6 or r3, r5 - jz .ok - mov r3, eax - mov r4, edx - lea r0, [error_message] - mov [esp], r0 - call fail_func - mov edx, r4 - mov eax, r3 -.ok: + jz .clobber_ok + report_fail error_message +.clobber_ok: +%ifnid %1, _emms + fstenv [rsp] + mov r3h, [rsp + 8] + add r3h, 1 + jz .emms_ok + report_fail error_message_emms + emms +.emms_ok: +%else + emms +%endif add esp, max_args*4 REP_RET +%endmacro %endif ; ARCH_X86_64 + +check_call +check_call _emms