@ -1,11 +1,12 @@
;*****************************************************************************
;* x86inc.asm: x264asm abstraction layer
;*****************************************************************************
;* Copyright (C) 2005-2011 x264 project
;* Copyright (C) 2005-2012 x264 project
;*
;* Authors: Loren Merritt <lorenm@u.washington.edu>
;* Anton Mitrofanov <BugMaster@narod.ru>
;* Jason Garrett-Glaser <darkshikari@gmail.com>
;* Henrik Gramner <hengar-6@student.ltu.se>
;*
;* Permission to use, copy, modify, and/or distribute this software for any
;* purpose with or without fee is hereby granted, provided that the above
@ -95,6 +96,9 @@
default rel
% endif
; Always use long nops (reduces 0x90 spam in disassembly on x86_32)
CPU amdnop
; Macros to eliminate most code duplication between x86_32 and x86_64:
; Currently this works only for leaf functions which load all their arguments
; into registers at the start, and make no other use of the stack. Luckily that
@ -128,18 +132,20 @@
; rNm is the original location of arg N (a register or on the stack), dword
; rNmp is native size
% macro DECLARE_REG 6
% macro DECLARE_REG 5- 6
% define r%1q %2
% define r%1d %3
% define r%1w %4
% define r%1b %5
% define r%1m %6
% ifid %6 ; i.e. it's a register
% if %0 == 5
% define r%1m %3
% define r%1mp %2
% elif ARCH_X86_64 ; memory
% define r%1mp qword %6
% define r%1m [rsp + stack_offset + %6]
% define r%1mp qword r %+ %1m
% else
% define r%1mp dword %6
% define r%1m [esp + stack_offset + %6]
% define r%1mp dword r %+ %1m
% endif
% define r%1 %2
% endmacro
@ -187,7 +193,7 @@ DECLARE_REG_SIZE bp, bpl
% endrep
% endmacro
DECLARE_REG_TMP_SIZE 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9
DECLARE_REG_TMP_SIZE 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14
% if ARCH_X86_64
% define gprsize 8
@ -205,6 +211,33 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9
% assign stack_offset stack_offset-gprsize
% endmacro
% macro PUSH_IF_USED 1-*
% rep %0
% if %1 < regs_used
PUSH r % 1
% endif
% rotate 1
% endrep
% endmacro
% macro POP_IF_USED 1-*
% rep %0
% if %1 < regs_used
pop r % 1
% endif
% rotate 1
% endrep
% endmacro
% macro LOAD_IF_USED 1-*
% rep %0
% if %1 < num_args
mov r % 1 , r %+ % 1 %+ mp
% endif
% rotate 1
% endrep
% endmacro
% macro SUB 2
sub % 1 , % 2
% ifidn %1, rsp
@ -272,39 +305,34 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9
% if WIN64 ; Windows x64 ; =================================================
DECLARE_REG 0 , rcx , ecx , cx , cl , ecx
DECLARE_REG 1 , rdx , edx , dx , dl , edx
DECLARE_REG 2 , r8 , r8d , r8w , r8b , r8d
DECLARE_REG 3 , r9 , r9d , r9w , r9b , r9d
DECLARE_REG 4 , rdi , edi , di , di l , [ rsp + stack_offset + 40 ]
DECLARE_REG 5 , rsi , esi , si , si l , [ rsp + stack_offset + 48 ]
DECLARE_REG 6 , rax , eax , ax , al , [ rsp + stack_offset + 56 ]
% define r7m [rsp + stack_offset + 64]
% define r8m [rsp + stack_offset + 72]
% macro LOAD_IF_USED 2 ; reg_id, number_of_args
% if %1 < %2
mov r % 1 , [ rsp + stack_offset + 8 + % 1 * 8 ]
% endif
% endmacro
DECLARE_REG 0 , rcx , ecx , cx , cl
DECLARE_REG 1 , rdx , edx , dx , dl
DECLARE_REG 2 , R8 , R8D , R8W , R8B
DECLARE_REG 3 , R9 , R9D , R9W , R9B
DECLARE_REG 4 , R10 , R10D , R10W , R10B , 40
DECLARE_REG 5 , R11 , R11D , R11W , R11B , 48
DECLARE_REG 6 , rax , eax , ax , al , 56
DECLARE_REG 7 , rdi , edi , di , di l , 64
DECLARE_REG 8 , rsi , esi , si , si l , 72
DECLARE_REG 9 , rbx , ebx , bx , bl , 80
DECLARE_REG 10 , rbp , ebp , bp , bp l , 88
DECLARE_REG 11 , R12 , R12D , R12W , R12B , 96
DECLARE_REG 12 , R13 , R13D , R13W , R13B , 104
DECLARE_REG 13 , R14 , R14D , R14W , R14B , 112
DECLARE_REG 14 , R15 , R15D , R15W , R15B , 120
% macro PROLOGUE 2-4+ 0 ; #args, #regs, #xmm_regs, arg_names...
ASSERT % 2 > = % 1
% assign num_args % 1
% assign regs_used %2
ASSERT regs_used < = 7
% if regs_used > 4
push r4
push r5
% assign stack_offset stack_offset+16
% endif
ASSERT regs_used > = num_args
ASSERT regs_used < = 15
PUSH_IF_USED 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14
% if mmsize == 8
% assign xmm_regs_used 0
% else
WIN64_SPILL_XMM % 3
% endif
LOAD_IF_USED 4 , % 1
LOAD_IF_USED 5 , % 1
LOAD_IF_USED 6 , % 1
LOAD_IF_USED 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14
DEFINE_ARGS % 4
% endmacro
@ -312,12 +340,11 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]
% assign xmm_regs_used %1
ASSERT xmm_regs_used < = 16
% if xmm_regs_used > 6
sub rsp , ( xmm_regs_used - 6 ) * 16 + 16
% assign stack_offset stack_offset+(xmm_regs_used-6)*16+16
SUB rsp , ( xmm_regs_used - 6 ) * 16 + 16
% assign %%i xmm_regs_used
% rep (xmm_regs_used-6)
% assign %%i %%i-1
movdqa [ rsp + ( %% i - 6 ) * 16 + 8 ], xmm %+ %% i
movdqa [ rsp + ( %% i - 6 ) * 16 + ( ~ stack_offset & 8 ) ], xmm %+ %% i
% endrep
% endif
% endmacro
@ -327,7 +354,7 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]
% assign %%i xmm_regs_used
% rep (xmm_regs_used-6)
% assign %%i %%i-1
movdqa xmm %+ %% i , [ % 1 + ( %% i - 6 ) * 16 + 8 ]
movdqa xmm %+ %% i , [ % 1 + ( %% i - 6 ) * 16 + ( ~ stack_offset & 8 ) ]
% endrep
add % 1 , ( xmm_regs_used - 6 ) * 16 + 16
% endif
@ -341,15 +368,12 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]
% macro RET 0
WIN64_RESTORE_XMM_INTERNAL rsp
% if regs_used > 4
pop r5
pop r4
% endif
POP_IF_USED 14 , 13 , 12 , 11 , 10 , 9 , 8 , 7
ret
% endmacro
% macro REP_RET 0
% if regs_used > 4 || xmm_regs_used > 6
% if regs_used > 7 || xmm_regs_used > 6
RET
% else
rep ret
@ -358,92 +382,80 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]
% elif ARCH_X86_64 ; *nix x64 ; =============================================
DECLARE_REG 0 , rdi , edi , di , di l , edi
DECLARE_REG 1 , rsi , esi , si , si l , esi
DECLARE_REG 2 , rdx , edx , dx , dl , edx
DECLARE_REG 3 , rcx , ecx , cx , cl , ecx
DECLARE_REG 4 , r8 , r8d , r8w , r8b , r8d
DECLARE_REG 5 , r9 , r9d , r9w , r9b , r9d
DECLARE_REG 6 , rax , eax , ax , al , [ rsp + stack_offset + 8 ]
% define r7m [rsp + stack_offset + 16]
% define r8m [rsp + stack_offset + 24]
% macro LOAD_IF_USED 2 ; reg_id, number_of_args
% if %1 < %2
mov r % 1 , [ rsp - 40 + % 1 * 8 ]
% endif
% endmacro
DECLARE_REG 0 , rdi , edi , di , di l
DECLARE_REG 1 , rsi , esi , si , si l
DECLARE_REG 2 , rdx , edx , dx , dl
DECLARE_REG 3 , rcx , ecx , cx , cl
DECLARE_REG 4 , R8 , R8D , R8W , R8B
DECLARE_REG 5 , R9 , R9D , R9W , R9B
DECLARE_REG 6 , rax , eax , ax , al , 8
DECLARE_REG 7 , R10 , R10D , R10W , R10B , 16
DECLARE_REG 8 , R11 , R11D , R11W , R11B , 24
DECLARE_REG 9 , rbx , ebx , bx , bl , 32
DECLARE_REG 10 , rbp , ebp , bp , bp l , 40
DECLARE_REG 11 , R12 , R12D , R12W , R12B , 48
DECLARE_REG 12 , R13 , R13D , R13W , R13B , 56
DECLARE_REG 13 , R14 , R14D , R14W , R14B , 64
DECLARE_REG 14 , R15 , R15D , R15W , R15B , 72
% macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
ASSERT % 2 > = % 1
ASSERT % 2 < = 7
LOAD_IF_USED 6 , % 1
% assign num_args %1
% assign regs_used %2
ASSERT regs_used > = num_args
ASSERT regs_used < = 15
PUSH_IF_USED 9 , 10 , 11 , 12 , 13 , 14
LOAD_IF_USED 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14
DEFINE_ARGS % 4
% endmacro
% macro RET 0
POP_IF_USED 14 , 13 , 12 , 11 , 10 , 9
ret
% endmacro
% macro REP_RET 0
% if regs_used > 9
RET
% else
rep ret
% endif
% endmacro
% else ; X86_32 ; ==============================================================
DECLARE_REG 0 , eax , eax , ax , al , [ esp + stack_offset + 4 ]
DECLARE_REG 1 , ecx , ecx , cx , cl , [ esp + stack_offset + 8 ]
DECLARE_REG 2 , edx , edx , dx , dl , [ esp + stack_offset + 12 ]
DECLARE_REG 3 , ebx , ebx , bx , bl , [ esp + stack_offset + 16 ]
DECLARE_REG 4 , esi , esi , si , null , [ esp + stack_offset + 20 ]
DECLARE_REG 5 , edi , edi , di , null , [ esp + stack_offset + 24 ]
DECLARE_REG 6 , ebp , ebp , bp , null , [ esp + stack_offset + 28 ]
% define r7m [esp + stack_offset + 32]
% define r8m [esp + stack_offset + 36]
DECLARE_REG 0 , eax , eax , ax , al , 4
DECLARE_REG 1 , ecx , ecx , cx , cl , 8
DECLARE_REG 2 , edx , edx , dx , dl , 12
DECLARE_REG 3 , ebx , ebx , bx , bl , 16
DECLARE_REG 4 , esi , esi , si , null , 20
DECLARE_REG 5 , edi , edi , di , null , 24
DECLARE_REG 6 , ebp , ebp , bp , null , 28
% define rsp esp
% macro PUSH_IF_USED 1 ; reg_id
% if %1 < regs_used
push r % 1
% assign stack_offset stack_offset+4
% endif
% endmacro
% macro POP_IF_USED 1 ; reg_id
% if %1 < regs_used
pop r % 1
% endif
% macro DECLARE_ARG 1-*
% rep %0
% define r%1m [esp + stack_offset + 4*%1 + 4]
% define r%1mp dword r%1m
% rotate 1
% endrep
% endmacro
% macro LOAD_IF_USED 2 ; reg_id, number_of_args
% if %1 < %2
mov r % 1 , [ esp + stack_offset + 4 + % 1 * 4 ]
% endif
% endmacro
DECLARE_ARG 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14
% macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
ASSERT % 2 > = % 1
% assign num_args %1
% assign regs_used %2
ASSERT regs_used < = 7
PUSH_IF_USED 3
PUSH_IF_USED 4
PUSH_IF_USED 5
PUSH_IF_USED 6
LOAD_IF_USED 0 , % 1
LOAD_IF_USED 1 , % 1
LOAD_IF_USED 2 , % 1
LOAD_IF_USED 3 , % 1
LOAD_IF_USED 4 , % 1
LOAD_IF_USED 5 , % 1
LOAD_IF_USED 6 , % 1
% if regs_used > 7
% assign regs_used 7
% endif
ASSERT regs_used > = num_args
PUSH_IF_USED 3 , 4 , 5 , 6
LOAD_IF_USED 0 , 1 , 2 , 3 , 4 , 5 , 6
DEFINE_ARGS % 4
% endmacro
% macro RET 0
POP_IF_USED 6
POP_IF_USED 5
POP_IF_USED 4
POP_IF_USED 3
POP_IF_USED 6 , 5 , 4 , 3
ret
% endmacro
@ -464,8 +476,6 @@ DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28]
% endmacro
% endif
;=============================================================================
; arch-independent part
;=============================================================================