diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index de048f863d..438863042f 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -1,7 +1,7 @@ ;***************************************************************************** ;* x86inc.asm: x264asm abstraction layer ;***************************************************************************** -;* Copyright (C) 2005-2017 x264 project +;* Copyright (C) 2005-2018 x264 project ;* ;* Authors: Loren Merritt ;* Henrik Gramner @@ -892,6 +892,36 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, %undef %1%2 %endmacro +%macro DEFINE_MMREGS 1 ; mmtype + %assign %%prev_mmregs 0 + %ifdef num_mmregs + %assign %%prev_mmregs num_mmregs + %endif + + %assign num_mmregs 8 + %if ARCH_X86_64 && mmsize >= 16 + %assign num_mmregs 16 + %if cpuflag(avx512) || mmsize == 64 + %assign num_mmregs 32 + %endif + %endif + + %assign %%i 0 + %rep num_mmregs + CAT_XDEFINE m, %%i, %1 %+ %%i + CAT_XDEFINE nn%1, %%i, %%i + %assign %%i %%i+1 + %endrep + %if %%prev_mmregs > num_mmregs + %rep %%prev_mmregs - num_mmregs + CAT_UNDEF m, %%i + CAT_UNDEF nn %+ mmtype, %%i + %assign %%i %%i+1 + %endrep + %endif + %xdefine mmtype %1 +%endmacro + ; Prefer registers 16-31 over 0-15 to avoid having to use vzeroupper %macro AVX512_MM_PERMUTATION 0-1 0 ; start_reg %if ARCH_X86_64 && cpuflag(avx512) @@ -908,23 +938,12 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, %assign avx_enabled 0 %define RESET_MM_PERMUTATION INIT_MMX %1 %define mmsize 8 - %define num_mmregs 8 %define mova movq %define movu movq %define movh movd %define movnta movntq - %assign %%i 0 - %rep 8 - CAT_XDEFINE m, %%i, mm %+ %%i - CAT_XDEFINE nnmm, %%i, %%i - %assign %%i %%i+1 - %endrep - %rep 24 - CAT_UNDEF m, %%i - CAT_UNDEF nnmm, %%i - %assign %%i %%i+1 - %endrep INIT_CPUFLAGS %1 + DEFINE_MMREGS mm %endmacro %macro INIT_XMM 0-1+ @@ -936,22 +955,9 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, %define movh movq %define movnta movntdq INIT_CPUFLAGS %1 - %define num_mmregs 8 - %if ARCH_X86_64 - %define num_mmregs 16 - %if cpuflag(avx512) - %define num_mmregs 32 - %endif - %endif - %assign %%i 0 - %rep num_mmregs - CAT_XDEFINE m, %%i, xmm %+ %%i - CAT_XDEFINE nnxmm, %%i, %%i - %assign %%i %%i+1 - %endrep + DEFINE_MMREGS xmm %if WIN64 - ; Swap callee-saved registers with volatile registers - AVX512_MM_PERMUTATION 6 + AVX512_MM_PERMUTATION 6 ; Swap callee-saved registers with volatile registers %endif %endmacro @@ -964,19 +970,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, %undef movh %define movnta movntdq INIT_CPUFLAGS %1 - %define num_mmregs 8 - %if ARCH_X86_64 - %define num_mmregs 16 - %if cpuflag(avx512) - %define num_mmregs 32 - %endif - %endif - %assign %%i 0 - %rep num_mmregs - CAT_XDEFINE m, %%i, ymm %+ %%i - CAT_XDEFINE nnymm, %%i, %%i - %assign %%i %%i+1 - %endrep + DEFINE_MMREGS ymm AVX512_MM_PERMUTATION %endmacro @@ -984,21 +978,12 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, %assign avx_enabled 1 %define RESET_MM_PERMUTATION INIT_ZMM %1 %define mmsize 64 - %define num_mmregs 8 - %if ARCH_X86_64 - %define num_mmregs 32 - %endif %define mova movdqa %define movu movdqu %undef movh %define movnta movntdq - %assign %%i 0 - %rep num_mmregs - CAT_XDEFINE m, %%i, zmm %+ %%i - CAT_XDEFINE nnzmm, %%i, %%i - %assign %%i %%i+1 - %endrep INIT_CPUFLAGS %1 + DEFINE_MMREGS zmm AVX512_MM_PERMUTATION %endmacro