From 2494bdd90d594fe7e5263d26287dbb2f24ec1d32 Mon Sep 17 00:00:00 2001 From: Loren Merritt Date: Wed, 9 Aug 2006 06:33:49 +0000 Subject: [PATCH] gcc 2.95 and 3.4.x on x86 32bit without fomit-frame-pointer can't even find 5 registers for asm input. 0.5% slower vorbis. Originally committed as revision 5964 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/i386/fft_3dn2.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/libavcodec/i386/fft_3dn2.c b/libavcodec/i386/fft_3dn2.c index 80dece700d..24d7799d58 100644 --- a/libavcodec/i386/fft_3dn2.c +++ b/libavcodec/i386/fft_3dn2.c @@ -154,20 +154,23 @@ void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, in1 = input; in2 = input + n2 - 1; for(k = 0; k < n4; k++) { + // FIXME a single block is faster, but gcc 2.95 and 3.4.x on 32bit can't compile it asm volatile( - "movd %1, %%mm0 \n\t" - "movd %3, %%mm1 \n\t" - "punpckldq %2, %%mm0 \n\t" - "punpckldq %4, %%mm1 \n\t" + "movd %0, %%mm0 \n\t" + "movd %2, %%mm1 \n\t" + "punpckldq %1, %%mm0 \n\t" + "punpckldq %3, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "pfmul %%mm1, %%mm0 \n\t" "pswapd %%mm1, %%mm1 \n\t" "pfmul %%mm1, %%mm2 \n\t" "pfpnacc %%mm2, %%mm0 \n\t" + ::"m"(in2[-2*k]), "m"(in1[2*k]), + "m"(tcos[k]), "m"(tsin[k]) + ); + asm volatile( "movq %%mm0, %0 \n\t" :"=m"(z[revtab[k]]) - :"m"(in2[-2*k]), "m"(in1[2*k]), - "m"(tcos[k]), "m"(tsin[k]) ); } @@ -190,11 +193,15 @@ void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, ); } + z += n8; asm volatile("movd %0, %%mm7" ::"r"(1<<31)); for(k = 0; k < n8; k++) { asm volatile( - "movq %4, %%mm0 \n\t" - "pswapd %5, %%mm1 \n\t" + "movq %0, %%mm0 \n\t" + "pswapd %1, %%mm1 \n\t" + ::"m"(z[k]), "m"(z[-1-k]) + ); + asm volatile( "movq %%mm0, %%mm2 \n\t" "pxor %%mm7, %%mm2 \n\t" "punpckldq %%mm1, %%mm2 \n\t" @@ -209,8 +216,7 @@ void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, "movq %%mm3, %3 \n\t" // { z[n8-1-k].im, -z[n8+k].re } :"=m"(output[2*k]), "=m"(output[n2-2-2*k]), "=m"(output[n2+2*k]), "=m"(output[n-2-2*k]) - :"m"(z[n8+k]), "m"(z[n8-1-k]) - :"memory" + ::"memory" ); } asm volatile("emms");