|
|
|
@ -2711,6 +2711,59 @@ static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block) |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize) |
|
|
|
|
{ |
|
|
|
|
int i; |
|
|
|
|
asm volatile("pxor %%mm7, %%mm7":); |
|
|
|
|
for(i=0; i<blocksize; i+=2) { |
|
|
|
|
asm volatile( |
|
|
|
|
"movq %0, %%mm0 \n\t" |
|
|
|
|
"movq %1, %%mm1 \n\t" |
|
|
|
|
"movq %%mm0, %%mm2 \n\t" |
|
|
|
|
"movq %%mm1, %%mm3 \n\t" |
|
|
|
|
"pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
|
|
|
|
|
"pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
|
|
|
|
|
"pslld $31, %%mm2 \n\t" // keep only the sign bit
|
|
|
|
|
"pxor %%mm2, %%mm1 \n\t" |
|
|
|
|
"movq %%mm3, %%mm4 \n\t" |
|
|
|
|
"pand %%mm1, %%mm3 \n\t" |
|
|
|
|
"pandn %%mm1, %%mm4 \n\t" |
|
|
|
|
"pfadd %%mm0, %%mm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
|
|
|
|
|
"pfsub %%mm4, %%mm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
|
|
|
|
|
"movq %%mm3, %1 \n\t" |
|
|
|
|
"movq %%mm0, %0 \n\t" |
|
|
|
|
:"+m"(mag[i]), "+m"(ang[i]) |
|
|
|
|
::"memory" |
|
|
|
|
); |
|
|
|
|
} |
|
|
|
|
asm volatile("emms"); |
|
|
|
|
} |
|
|
|
|
static void vorbis_inverse_coupling_sse2(float *mag, float *ang, int blocksize) |
|
|
|
|
{ |
|
|
|
|
int i; |
|
|
|
|
for(i=0; i<blocksize; i+=4) { |
|
|
|
|
asm volatile( |
|
|
|
|
"movaps %0, %%xmm0 \n\t" |
|
|
|
|
"movaps %1, %%xmm1 \n\t" |
|
|
|
|
"pxor %%xmm2, %%xmm2 \n\t" |
|
|
|
|
"pxor %%xmm3, %%xmm3 \n\t" |
|
|
|
|
"cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
|
|
|
|
|
"cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
|
|
|
|
|
"pslld $31, %%xmm2 \n\t" // keep only the sign bit
|
|
|
|
|
"pxor %%xmm2, %%xmm1 \n\t" |
|
|
|
|
"movaps %%xmm3, %%xmm4 \n\t" |
|
|
|
|
"pand %%xmm1, %%xmm3 \n\t" |
|
|
|
|
"pandn %%xmm1, %%xmm4 \n\t" |
|
|
|
|
"addps %%xmm0, %%xmm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
|
|
|
|
|
"subps %%xmm4, %%xmm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
|
|
|
|
|
"movaps %%xmm3, %1 \n\t" |
|
|
|
|
"movaps %%xmm0, %0 \n\t" |
|
|
|
|
:"+m"(mag[i]), "+m"(ang[i]) |
|
|
|
|
::"memory" |
|
|
|
|
); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_SNOW_ENCODER |
|
|
|
|
extern void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width); |
|
|
|
|
extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width); |
|
|
|
@ -3137,6 +3190,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) |
|
|
|
|
c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
if(mm_flags & MM_SSE2) |
|
|
|
|
c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse2; |
|
|
|
|
else if(mm_flags & MM_SSE) |
|
|
|
|
c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_ENCODERS |
|
|
|
|