From 2a180c69eacdc6854957aabae3b0e3ee4d4fd774 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Mon, 26 Jul 2010 14:00:15 +0000 Subject: [PATCH] Save a register (or regsize of stackspace for x86-32) for the no-loop mbedge loopfilter functions, by re-using space that holds a variable that we no longer need. Originally committed as revision 24510 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/x86/vp8dsp.asm | 40 +++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index a67c5bcc79..b3070825f1 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -2200,11 +2200,15 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 ; align stack mov stack_reg, rsp ; backup stack pointer and rsp, ~(mmsize-1) ; align stack +%ifidn %2, sse2 + sub rsp, mmsize * 7 +%else sub rsp, mmsize * 8 ; stack layout: [0]=E, [1]=I, [2]=hev_thr ; [3]=hev() result ; [4]=filter tmp result ; [5]/[6] = p2/q2 backup ; [7]=lim_res sign result +%endif %define flim_E [rsp] %define flim_I [rsp+mmsize] @@ -2215,7 +2219,11 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %define q0backup [rsp+mmsize*4] %define p2backup [rsp+mmsize*5] %define q2backup [rsp+mmsize*6] +%ifidn %2, sse2 +%define lim_sign [rsp] +%else %define lim_sign [rsp+mmsize*7] +%endif mova flim_E, m0 mova flim_I, m1 @@ -2232,7 +2240,7 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %define q0backup m8 %define p2backup m13 %define q2backup m14 -%define lim_sign m15 +%define lim_sign m9 ; splat function arguments SPLATB_REG flim_E, E_reg, m7 ; E @@ -2638,8 +2646,8 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 pmullw m1, [pw_9] paddw m6, m7 paddw m1, m7 -%ifdef m15 - SWAP 7, 15 +%ifdef m9 + SWAP 7, 9 %else mova m7, lim_sign %endif @@ -2749,29 +2757,29 @@ MBEDGE_LOOPFILTER mmxext, h, 6, 8, 0 INIT_XMM %define SPLATB_REG SPLATB_REG_SSE2 %define WRITE_8W WRITE_8W_SSE2 -MBEDGE_LOOPFILTER sse2, v, 5, 16, 16 +MBEDGE_LOOPFILTER sse2, v, 5, 16, 15 %ifdef m8 -MBEDGE_LOOPFILTER sse2, h, 5, 16, 16 +MBEDGE_LOOPFILTER sse2, h, 5, 16, 15 %else -MBEDGE_LOOPFILTER sse2, h, 6, 16, 16 +MBEDGE_LOOPFILTER sse2, h, 6, 16, 15 %endif -MBEDGE_LOOPFILTER sse2, v, 6, 8, 16 -MBEDGE_LOOPFILTER sse2, h, 6, 8, 16 +MBEDGE_LOOPFILTER sse2, v, 6, 8, 15 +MBEDGE_LOOPFILTER sse2, h, 6, 8, 15 %define SPLATB_REG SPLATB_REG_SSSE3 -MBEDGE_LOOPFILTER ssse3, v, 5, 16, 16 +MBEDGE_LOOPFILTER ssse3, v, 5, 16, 15 %ifdef m8 -MBEDGE_LOOPFILTER ssse3, h, 5, 16, 16 +MBEDGE_LOOPFILTER ssse3, h, 5, 16, 15 %else -MBEDGE_LOOPFILTER ssse3, h, 6, 16, 16 +MBEDGE_LOOPFILTER ssse3, h, 6, 16, 15 %endif -MBEDGE_LOOPFILTER ssse3, v, 6, 8, 16 -MBEDGE_LOOPFILTER ssse3, h, 6, 8, 16 +MBEDGE_LOOPFILTER ssse3, v, 6, 8, 15 +MBEDGE_LOOPFILTER ssse3, h, 6, 8, 15 %define WRITE_8W WRITE_8W_SSE4 %ifdef m8 -MBEDGE_LOOPFILTER sse4, h, 5, 16, 16 +MBEDGE_LOOPFILTER sse4, h, 5, 16, 15 %else -MBEDGE_LOOPFILTER sse4, h, 6, 16, 16 +MBEDGE_LOOPFILTER sse4, h, 6, 16, 15 %endif -MBEDGE_LOOPFILTER sse4, h, 6, 8, 16 +MBEDGE_LOOPFILTER sse4, h, 6, 8, 15