@ -2200,11 +2200,15 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
; align stack
mov stack_reg , rsp ; backup stack pointer
and rsp , ~ ( mmsize - 1 ) ; align stack
% ifidn %2, sse2
sub rsp , mmsize * 7
% else
sub rsp , mmsize * 8 ; stack layout: [0]=E, [1]=I, [2]=hev_thr
; [3]=hev() result
; [4]=filter tmp result
; [5]/[6] = p2/q2 backup
; [7]=lim_res sign result
% endif
% define flim_E [rsp]
% define flim_I [rsp+mmsize]
@ -2215,7 +2219,11 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
% define q0backup [rsp+mmsize*4]
% define p2backup [rsp+mmsize*5]
% define q2backup [rsp+mmsize*6]
% ifidn %2, sse2
% define lim_sign [rsp]
% else
% define lim_sign [rsp+mmsize*7]
% endif
mova flim_E , m0
mova flim_I , m1
@ -2232,7 +2240,7 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
% define q0backup m8
% define p2backup m13
% define q2backup m14
% define lim_sign m15
% define lim_sign m9
; splat function arguments
SPLATB_REG flim_E , E_reg , m7 ; E
@ -2638,8 +2646,8 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
pmullw m1 , [ pw_9 ]
paddw m6 , m7
paddw m1 , m7
% ifdef m15
SWAP 7 , 15
% ifdef m9
SWAP 7 , 9
% else
mova m7 , lim_sign
% endif
@ -2749,29 +2757,29 @@ MBEDGE_LOOPFILTER mmxext, h, 6, 8, 0
INIT_XMM
% define SPLATB_REG SPLATB_REG_SSE2
% define WRITE_8W WRITE_8W_SSE2
MBEDGE_LOOPFILTER ss e2 , v , 5 , 16 , 16
MBEDGE_LOOPFILTER ss e2 , v , 5 , 16 , 15
% ifdef m8
MBEDGE_LOOPFILTER ss e2 , h , 5 , 16 , 16
MBEDGE_LOOPFILTER ss e2 , h , 5 , 16 , 15
% else
MBEDGE_LOOPFILTER ss e2 , h , 6 , 16 , 16
MBEDGE_LOOPFILTER ss e2 , h , 6 , 16 , 15
% endif
MBEDGE_LOOPFILTER ss e2 , v , 6 , 8 , 16
MBEDGE_LOOPFILTER ss e2 , h , 6 , 8 , 16
MBEDGE_LOOPFILTER ss e2 , v , 6 , 8 , 15
MBEDGE_LOOPFILTER ss e2 , h , 6 , 8 , 15
% define SPLATB_REG SPLATB_REG_SSSE3
MBEDGE_LOOPFILTER ss se3 , v , 5 , 16 , 16
MBEDGE_LOOPFILTER ss se3 , v , 5 , 16 , 15
% ifdef m8
MBEDGE_LOOPFILTER ss se3 , h , 5 , 16 , 16
MBEDGE_LOOPFILTER ss se3 , h , 5 , 16 , 15
% else
MBEDGE_LOOPFILTER ss se3 , h , 6 , 16 , 16
MBEDGE_LOOPFILTER ss se3 , h , 6 , 16 , 15
% endif
MBEDGE_LOOPFILTER ss se3 , v , 6 , 8 , 16
MBEDGE_LOOPFILTER ss se3 , h , 6 , 8 , 16
MBEDGE_LOOPFILTER ss se3 , v , 6 , 8 , 15
MBEDGE_LOOPFILTER ss se3 , h , 6 , 8 , 15
% define WRITE_8W WRITE_8W_SSE4
% ifdef m8
MBEDGE_LOOPFILTER ss e4 , h , 5 , 16 , 16
MBEDGE_LOOPFILTER ss e4 , h , 5 , 16 , 15
% else
MBEDGE_LOOPFILTER ss e4 , h , 6 , 16 , 16
MBEDGE_LOOPFILTER ss e4 , h , 6 , 16 , 15
% endif
MBEDGE_LOOPFILTER ss e4 , h , 6 , 8 , 16
MBEDGE_LOOPFILTER ss e4 , h , 6 , 8 , 15