@ -353,14 +353,18 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
% if stack_size < 0
% assign stack_size -stack_size
% endif
% if mmsize != 8
% assign xmm_regs_used %2
% assign stack_size_padded stack_size
% if WIN64
% assign stack_size_padded stack_size_padded + 32 ; reserve 32 bytes for shadow space
% if mmsize != 8
% assign xmm_regs_used %2
% if xmm_regs_used > 8
% assign stack_size_padded stack_size_padded + (xmm_regs_used-8)*16
% endif
% endif
% endif
% if mmsize <= 16 && HAVE_ALIGNED_STACK
% assign stack_size_padded stack_size + %%stack_alignment - gprsize - (stack_offset & (%%stack_alignment - 1))
% if xmm_regs_used > 6
% assign stack_size_padded stack_size_padded + (xmm_regs_used - 6) * 16
% endif
% assign stack_size_padded stack_size_padded + %%stack_alignment - gprsize - (stack_offset & (%%stack_alignment - 1))
SUB rsp , stack_size_padded
% else
% assign %%reg_num (regs_used - 1)
@ -370,14 +374,6 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
; stack in a single instruction (i.e. mov rsp, rstk or mov
; rsp, [rsp+stack_size_padded])
mov rstk , rsp
% assign stack_size_padded stack_size
% if xmm_regs_used > 6
% assign stack_size_padded stack_size_padded + (xmm_regs_used - 6) * 16
% if mmsize == 32 && xmm_regs_used & 1
; re-align to 32 bytes
% assign stack_size_padded (stack_size_padded + 16)
% endif
% endif
% if %1 < 0 ; need to store rsp on stack
sub rsp , gprsize + stack_size_padded
and rsp , ~ ( %% stack_alignment - 1 )
@ -389,9 +385,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
% xdefine rstkm rstk
% endif
% endif
% if xmm_regs_used > 6
WIN64_PUSH_XMM
% endif
WIN64_PUSH_XMM
% endif
% endif
% endmacro
@ -452,40 +446,55 @@ DECLARE_REG 14, R15, 120
% endmacro
% macro WIN64_PUSH_XMM 0
% assign %%i xmm_regs_used
% rep (xmm_regs_used-6)
% assign %%i %%i-1
movaps [ rsp + ( %% i - 6 ) * 16 + stack_size + ( ~ stack_offset & 8 )], xmm %+ %% i
% endrep
; Use the shadow space to store XMM6 and XMM7, the rest needs stack space allocated.
% if xmm_regs_used > 6
movaps [ rstk + stack_offset + 8 ], xmm6
% endif
% if xmm_regs_used > 7
movaps [ rstk + stack_offset + 24 ], xmm7
% endif
% if xmm_regs_used > 8
% assign %%i 8
% rep xmm_regs_used-8
movaps [ rsp + ( %% i - 8 ) * 16 + stack_size + 32 ], xmm %+ %% i
% assign %%i %%i+1
% endrep
% endif
% endmacro
% macro WIN64_SPILL_XMM 1
% assign xmm_regs_used %1
ASSERT xmm_regs_used < = 16
% if xmm_regs_used > 6
SUB rsp , ( xmm_regs_used - 6 ) * 16 + 16
WIN64_PUSH_XMM
% if xmm_regs_used > 8
% assign stack_size_padded (xmm_regs_used-8)*16 + (~stack_offset&8) + 32
SUB rsp , stack_size_padded
% endif
WIN64_PUSH_XMM
% endmacro
% macro WIN64_RESTORE_XMM_INTERNAL 1
% if xmm_regs_used > 6
% assign %%pad_size 0
% if xmm_regs_used > 8
% assign %%i xmm_regs_used
% rep (xmm_regs_used-6)
% rep xmm_regs_used-8
% assign %%i %%i-1
movaps xmm %+ %% i , [ % 1 + ( %% i - 6 ) * 16 + stack_size + ( ~ stack_offset & 8 ) ]
movaps xmm %+ %% i , [ % 1 + ( %% i - 8 ) * 16 + stack_size + 32 ]
% endrep
% if stack_size_padded == 0
add % 1 , ( xmm_regs_used - 6 ) * 16 + 16
% endif
% endif
% if stack_size_padded > 0
% if stack_size > 0 && (mmsize == 32 || HAVE_ALIGNED_STACK == 0)
mov rsp , rstkm
% else
add % 1 , stack_size_padded
% assign %%pad_size stack_size_padded
% endif
% endif
% if xmm_regs_used > 7
movaps xmm7 , [ % 1 + stack_offset - %% pad_size + 24 ]
% endif
% if xmm_regs_used > 6
movaps xmm6 , [ % 1 + stack_offset - %% pad_size + 8 ]
% endif
% endmacro
% macro WIN64_RESTORE_XMM 1
@ -702,12 +711,12 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
% endif
align function_align
% 2:
RESET_MM_PERMUTATION ; not really needed, but makes disassembly somewhat nicer
% xdefine rstk rsp
% assign stack_offset 0
% assign stack_size 0
% assign stack_size_padded 0
% assign xmm_regs_used 0
RESET_MM_PERMUTATION ; needed for x86-64, also makes disassembly somewhat nicer
% xdefine rstk rsp ; copy of the original stack pointer, used when greater alignment than the known stack alignment is required
% assign stack_offset 0 ; stack pointer offset relative to the return address
% assign stack_size 0 ; amount of stack space that can be freely used inside a function
% assign stack_size_padded 0 ; total amount of allocated stack space, including space for callee-saved xmm registers on WIN64 and alignment padding
% assign xmm_regs_used 0 ; number of XMM registers requested, used for dealing with callee-saved registers on WIN64
% ifnidn %3, ""
PROLOGUE % 3
% endif