|
|
|
@ -481,208 +481,6 @@ DECLARE_FUNCTION(3, 1) |
|
|
|
|
DECLARE_FUNCTION(3, 2) |
|
|
|
|
DECLARE_FUNCTION(3, 3) |
|
|
|
|
|
|
|
|
|
static void vc1_inv_trans_4x4_dc_mmxext(uint8_t *dest, int linesize, |
|
|
|
|
int16_t *block) |
|
|
|
|
{ |
|
|
|
|
int dc = block[0]; |
|
|
|
|
dc = (17 * dc + 4) >> 3; |
|
|
|
|
dc = (17 * dc + 64) >> 7; |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"movd %0, %%mm0 \n\t" |
|
|
|
|
"pshufw $0, %%mm0, %%mm0 \n\t" |
|
|
|
|
"pxor %%mm1, %%mm1 \n\t" |
|
|
|
|
"psubw %%mm0, %%mm1 \n\t" |
|
|
|
|
"packuswb %%mm0, %%mm0 \n\t" |
|
|
|
|
"packuswb %%mm1, %%mm1 \n\t" |
|
|
|
|
::"r"(dc) |
|
|
|
|
); |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"movd %0, %%mm2 \n\t" |
|
|
|
|
"movd %1, %%mm3 \n\t" |
|
|
|
|
"movd %2, %%mm4 \n\t" |
|
|
|
|
"movd %3, %%mm5 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm2 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm3 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm4 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm5 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm2 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm3 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm4 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm5 \n\t" |
|
|
|
|
"movd %%mm2, %0 \n\t" |
|
|
|
|
"movd %%mm3, %1 \n\t" |
|
|
|
|
"movd %%mm4, %2 \n\t" |
|
|
|
|
"movd %%mm5, %3 \n\t" |
|
|
|
|
:"+m"(*(uint32_t*)(dest+0*linesize)), |
|
|
|
|
"+m"(*(uint32_t*)(dest+1*linesize)), |
|
|
|
|
"+m"(*(uint32_t*)(dest+2*linesize)), |
|
|
|
|
"+m"(*(uint32_t*)(dest+3*linesize)) |
|
|
|
|
); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void vc1_inv_trans_4x8_dc_mmxext(uint8_t *dest, int linesize, |
|
|
|
|
int16_t *block) |
|
|
|
|
{ |
|
|
|
|
int dc = block[0]; |
|
|
|
|
dc = (17 * dc + 4) >> 3; |
|
|
|
|
dc = (12 * dc + 64) >> 7; |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"movd %0, %%mm0 \n\t" |
|
|
|
|
"pshufw $0, %%mm0, %%mm0 \n\t" |
|
|
|
|
"pxor %%mm1, %%mm1 \n\t" |
|
|
|
|
"psubw %%mm0, %%mm1 \n\t" |
|
|
|
|
"packuswb %%mm0, %%mm0 \n\t" |
|
|
|
|
"packuswb %%mm1, %%mm1 \n\t" |
|
|
|
|
::"r"(dc) |
|
|
|
|
); |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"movd %0, %%mm2 \n\t" |
|
|
|
|
"movd %1, %%mm3 \n\t" |
|
|
|
|
"movd %2, %%mm4 \n\t" |
|
|
|
|
"movd %3, %%mm5 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm2 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm3 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm4 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm5 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm2 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm3 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm4 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm5 \n\t" |
|
|
|
|
"movd %%mm2, %0 \n\t" |
|
|
|
|
"movd %%mm3, %1 \n\t" |
|
|
|
|
"movd %%mm4, %2 \n\t" |
|
|
|
|
"movd %%mm5, %3 \n\t" |
|
|
|
|
:"+m"(*(uint32_t*)(dest+0*linesize)), |
|
|
|
|
"+m"(*(uint32_t*)(dest+1*linesize)), |
|
|
|
|
"+m"(*(uint32_t*)(dest+2*linesize)), |
|
|
|
|
"+m"(*(uint32_t*)(dest+3*linesize)) |
|
|
|
|
); |
|
|
|
|
dest += 4*linesize; |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"movd %0, %%mm2 \n\t" |
|
|
|
|
"movd %1, %%mm3 \n\t" |
|
|
|
|
"movd %2, %%mm4 \n\t" |
|
|
|
|
"movd %3, %%mm5 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm2 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm3 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm4 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm5 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm2 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm3 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm4 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm5 \n\t" |
|
|
|
|
"movd %%mm2, %0 \n\t" |
|
|
|
|
"movd %%mm3, %1 \n\t" |
|
|
|
|
"movd %%mm4, %2 \n\t" |
|
|
|
|
"movd %%mm5, %3 \n\t" |
|
|
|
|
:"+m"(*(uint32_t*)(dest+0*linesize)), |
|
|
|
|
"+m"(*(uint32_t*)(dest+1*linesize)), |
|
|
|
|
"+m"(*(uint32_t*)(dest+2*linesize)), |
|
|
|
|
"+m"(*(uint32_t*)(dest+3*linesize)) |
|
|
|
|
); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void vc1_inv_trans_8x4_dc_mmxext(uint8_t *dest, int linesize, |
|
|
|
|
int16_t *block) |
|
|
|
|
{ |
|
|
|
|
int dc = block[0]; |
|
|
|
|
dc = ( 3 * dc + 1) >> 1; |
|
|
|
|
dc = (17 * dc + 64) >> 7; |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"movd %0, %%mm0 \n\t" |
|
|
|
|
"pshufw $0, %%mm0, %%mm0 \n\t" |
|
|
|
|
"pxor %%mm1, %%mm1 \n\t" |
|
|
|
|
"psubw %%mm0, %%mm1 \n\t" |
|
|
|
|
"packuswb %%mm0, %%mm0 \n\t" |
|
|
|
|
"packuswb %%mm1, %%mm1 \n\t" |
|
|
|
|
::"r"(dc) |
|
|
|
|
); |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"movq %0, %%mm2 \n\t" |
|
|
|
|
"movq %1, %%mm3 \n\t" |
|
|
|
|
"movq %2, %%mm4 \n\t" |
|
|
|
|
"movq %3, %%mm5 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm2 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm3 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm4 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm5 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm2 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm3 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm4 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm5 \n\t" |
|
|
|
|
"movq %%mm2, %0 \n\t" |
|
|
|
|
"movq %%mm3, %1 \n\t" |
|
|
|
|
"movq %%mm4, %2 \n\t" |
|
|
|
|
"movq %%mm5, %3 \n\t" |
|
|
|
|
:"+m"(*(uint32_t*)(dest+0*linesize)), |
|
|
|
|
"+m"(*(uint32_t*)(dest+1*linesize)), |
|
|
|
|
"+m"(*(uint32_t*)(dest+2*linesize)), |
|
|
|
|
"+m"(*(uint32_t*)(dest+3*linesize)) |
|
|
|
|
); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void vc1_inv_trans_8x8_dc_mmxext(uint8_t *dest, int linesize, |
|
|
|
|
int16_t *block) |
|
|
|
|
{ |
|
|
|
|
int dc = block[0]; |
|
|
|
|
dc = (3 * dc + 1) >> 1; |
|
|
|
|
dc = (3 * dc + 16) >> 5; |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"movd %0, %%mm0 \n\t" |
|
|
|
|
"pshufw $0, %%mm0, %%mm0 \n\t" |
|
|
|
|
"pxor %%mm1, %%mm1 \n\t" |
|
|
|
|
"psubw %%mm0, %%mm1 \n\t" |
|
|
|
|
"packuswb %%mm0, %%mm0 \n\t" |
|
|
|
|
"packuswb %%mm1, %%mm1 \n\t" |
|
|
|
|
::"r"(dc) |
|
|
|
|
); |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"movq %0, %%mm2 \n\t" |
|
|
|
|
"movq %1, %%mm3 \n\t" |
|
|
|
|
"movq %2, %%mm4 \n\t" |
|
|
|
|
"movq %3, %%mm5 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm2 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm3 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm4 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm5 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm2 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm3 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm4 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm5 \n\t" |
|
|
|
|
"movq %%mm2, %0 \n\t" |
|
|
|
|
"movq %%mm3, %1 \n\t" |
|
|
|
|
"movq %%mm4, %2 \n\t" |
|
|
|
|
"movq %%mm5, %3 \n\t" |
|
|
|
|
:"+m"(*(uint32_t*)(dest+0*linesize)), |
|
|
|
|
"+m"(*(uint32_t*)(dest+1*linesize)), |
|
|
|
|
"+m"(*(uint32_t*)(dest+2*linesize)), |
|
|
|
|
"+m"(*(uint32_t*)(dest+3*linesize)) |
|
|
|
|
); |
|
|
|
|
dest += 4*linesize; |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"movq %0, %%mm2 \n\t" |
|
|
|
|
"movq %1, %%mm3 \n\t" |
|
|
|
|
"movq %2, %%mm4 \n\t" |
|
|
|
|
"movq %3, %%mm5 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm2 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm3 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm4 \n\t" |
|
|
|
|
"paddusb %%mm0, %%mm5 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm2 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm3 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm4 \n\t" |
|
|
|
|
"psubusb %%mm1, %%mm5 \n\t" |
|
|
|
|
"movq %%mm2, %0 \n\t" |
|
|
|
|
"movq %%mm3, %1 \n\t" |
|
|
|
|
"movq %%mm4, %2 \n\t" |
|
|
|
|
"movq %%mm5, %3 \n\t" |
|
|
|
|
:"+m"(*(uint32_t*)(dest+0*linesize)), |
|
|
|
|
"+m"(*(uint32_t*)(dest+1*linesize)), |
|
|
|
|
"+m"(*(uint32_t*)(dest+2*linesize)), |
|
|
|
|
"+m"(*(uint32_t*)(dest+3*linesize)) |
|
|
|
|
); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#define FN_ASSIGN(OP, X, Y, INSN) \ |
|
|
|
|
dsp->OP##vc1_mspel_pixels_tab[1][X+4*Y] = OP##vc1_mspel_mc##X##Y##INSN; \
|
|
|
|
|
dsp->OP##vc1_mspel_pixels_tab[0][X+4*Y] = OP##vc1_mspel_mc##X##Y##_16##INSN |
|
|
|
@ -729,10 +527,5 @@ av_cold void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp) |
|
|
|
|
FN_ASSIGN(avg_, 3, 1, _mmxext); |
|
|
|
|
FN_ASSIGN(avg_, 3, 2, _mmxext); |
|
|
|
|
FN_ASSIGN(avg_, 3, 3, _mmxext); |
|
|
|
|
|
|
|
|
|
dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_mmxext; |
|
|
|
|
dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_mmxext; |
|
|
|
|
dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmxext; |
|
|
|
|
dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmxext; |
|
|
|
|
} |
|
|
|
|
#endif /* HAVE_6REGS && HAVE_INLINE_ASM && HAVE_MMX_EXTERNAL */ |
|
|
|
|