diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index a67c5bcc79..b3070825f1 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -2200,11 +2200,15 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 ; align stack mov stack_reg, rsp ; backup stack pointer and rsp, ~(mmsize-1) ; align stack +%ifidn %2, sse2 + sub rsp, mmsize * 7 +%else sub rsp, mmsize * 8 ; stack layout: [0]=E, [1]=I, [2]=hev_thr ; [3]=hev() result ; [4]=filter tmp result ; [5]/[6] = p2/q2 backup ; [7]=lim_res sign result +%endif %define flim_E [rsp] %define flim_I [rsp+mmsize] @@ -2215,7 +2219,11 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %define q0backup [rsp+mmsize*4] %define p2backup [rsp+mmsize*5] %define q2backup [rsp+mmsize*6] +%ifidn %2, sse2 +%define lim_sign [rsp] +%else %define lim_sign [rsp+mmsize*7] +%endif mova flim_E, m0 mova flim_I, m1 @@ -2232,7 +2240,7 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %define q0backup m8 %define p2backup m13 %define q2backup m14 -%define lim_sign m15 +%define lim_sign m9 ; splat function arguments SPLATB_REG flim_E, E_reg, m7 ; E @@ -2638,8 +2646,8 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 pmullw m1, [pw_9] paddw m6, m7 paddw m1, m7 -%ifdef m15 - SWAP 7, 15 +%ifdef m9 + SWAP 7, 9 %else mova m7, lim_sign %endif @@ -2749,29 +2757,29 @@ MBEDGE_LOOPFILTER mmxext, h, 6, 8, 0 INIT_XMM %define SPLATB_REG SPLATB_REG_SSE2 %define WRITE_8W WRITE_8W_SSE2 -MBEDGE_LOOPFILTER sse2, v, 5, 16, 16 +MBEDGE_LOOPFILTER sse2, v, 5, 16, 15 %ifdef m8 -MBEDGE_LOOPFILTER sse2, h, 5, 16, 16 +MBEDGE_LOOPFILTER sse2, h, 5, 16, 15 %else -MBEDGE_LOOPFILTER sse2, h, 6, 16, 16 +MBEDGE_LOOPFILTER sse2, h, 6, 16, 15 %endif -MBEDGE_LOOPFILTER sse2, v, 6, 8, 16 -MBEDGE_LOOPFILTER sse2, h, 6, 8, 16 +MBEDGE_LOOPFILTER sse2, v, 6, 8, 15 +MBEDGE_LOOPFILTER sse2, h, 6, 8, 15 %define SPLATB_REG SPLATB_REG_SSSE3 -MBEDGE_LOOPFILTER ssse3, v, 5, 16, 16 +MBEDGE_LOOPFILTER ssse3, v, 5, 16, 15 %ifdef m8 -MBEDGE_LOOPFILTER ssse3, h, 5, 16, 16 +MBEDGE_LOOPFILTER ssse3, h, 5, 16, 15 %else -MBEDGE_LOOPFILTER ssse3, h, 6, 16, 16 +MBEDGE_LOOPFILTER ssse3, h, 6, 16, 15 %endif -MBEDGE_LOOPFILTER ssse3, v, 6, 8, 16 -MBEDGE_LOOPFILTER ssse3, h, 6, 8, 16 +MBEDGE_LOOPFILTER ssse3, v, 6, 8, 15 +MBEDGE_LOOPFILTER ssse3, h, 6, 8, 15 %define WRITE_8W WRITE_8W_SSE4 %ifdef m8 -MBEDGE_LOOPFILTER sse4, h, 5, 16, 16 +MBEDGE_LOOPFILTER sse4, h, 5, 16, 15 %else -MBEDGE_LOOPFILTER sse4, h, 6, 16, 16 +MBEDGE_LOOPFILTER sse4, h, 6, 16, 15 %endif -MBEDGE_LOOPFILTER sse4, h, 6, 8, 16 +MBEDGE_LOOPFILTER sse4, h, 6, 8, 15