|
|
|
@ -22,6 +22,8 @@ |
|
|
|
|
#include "../dsputil.h" |
|
|
|
|
#include "../simple_idct.h" |
|
|
|
|
|
|
|
|
|
extern const uint8_t ff_h263_loop_filter_strength[32]; |
|
|
|
|
|
|
|
|
|
int mm_flags; /* multimedia extension flags */ |
|
|
|
|
|
|
|
|
|
/* pixel operations */ |
|
|
|
@ -34,6 +36,8 @@ static const uint64_t ff_pw_3 __attribute__ ((aligned(8))) = 0x0003000300030003 |
|
|
|
|
static const uint64_t ff_pw_16 __attribute__ ((aligned(8))) = 0x0010001000100010ULL; |
|
|
|
|
static const uint64_t ff_pw_15 __attribute__ ((aligned(8))) = 0x000F000F000F000FULL; |
|
|
|
|
|
|
|
|
|
static const uint64_t ff_pb_FC __attribute__ ((aligned(8))) = 0xFCFCFCFCFCFCFCFCULL; |
|
|
|
|
|
|
|
|
|
#define JUMPALIGN() __asm __volatile (".balign 8"::) |
|
|
|
|
#define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::) |
|
|
|
|
|
|
|
|
@ -465,6 +469,92 @@ static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ |
|
|
|
|
dst[i+0] += src[i+0]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale){ |
|
|
|
|
const int strength= ff_h263_loop_filter_strength[qscale]; |
|
|
|
|
|
|
|
|
|
asm volatile( |
|
|
|
|
"pxor %%mm7, %%mm7 \n\t" |
|
|
|
|
"movq %0, %%mm0 \n\t" |
|
|
|
|
"movq %0, %%mm1 \n\t" |
|
|
|
|
"movq %3, %%mm2 \n\t" |
|
|
|
|
"movq %3, %%mm3 \n\t" |
|
|
|
|
"punpcklbw %%mm7, %%mm0 \n\t" |
|
|
|
|
"punpckhbw %%mm7, %%mm1 \n\t" |
|
|
|
|
"punpcklbw %%mm7, %%mm2 \n\t" |
|
|
|
|
"punpckhbw %%mm7, %%mm3 \n\t" |
|
|
|
|
"psubw %%mm2, %%mm0 \n\t" |
|
|
|
|
"psubw %%mm3, %%mm1 \n\t" |
|
|
|
|
"movq %1, %%mm2 \n\t" |
|
|
|
|
"movq %1, %%mm3 \n\t" |
|
|
|
|
"movq %2, %%mm4 \n\t" |
|
|
|
|
"movq %2, %%mm5 \n\t" |
|
|
|
|
"punpcklbw %%mm7, %%mm2 \n\t" |
|
|
|
|
"punpckhbw %%mm7, %%mm3 \n\t" |
|
|
|
|
"punpcklbw %%mm7, %%mm4 \n\t" |
|
|
|
|
"punpckhbw %%mm7, %%mm5 \n\t" |
|
|
|
|
"psubw %%mm2, %%mm4 \n\t" |
|
|
|
|
"psubw %%mm3, %%mm5 \n\t" |
|
|
|
|
"psllw $2, %%mm4 \n\t" |
|
|
|
|
"psllw $2, %%mm5 \n\t" |
|
|
|
|
"paddw %%mm0, %%mm4 \n\t" |
|
|
|
|
"paddw %%mm1, %%mm5 \n\t" |
|
|
|
|
"pxor %%mm6, %%mm6 \n\t" |
|
|
|
|
"pcmpgtw %%mm4, %%mm6 \n\t"
|
|
|
|
|
"pcmpgtw %%mm5, %%mm7 \n\t"
|
|
|
|
|
"pxor %%mm6, %%mm4 \n\t" |
|
|
|
|
"pxor %%mm7, %%mm5 \n\t" |
|
|
|
|
"psubw %%mm6, %%mm4 \n\t"
|
|
|
|
|
"psubw %%mm7, %%mm5 \n\t"
|
|
|
|
|
"psrlw $3, %%mm4 \n\t" |
|
|
|
|
"psrlw $3, %%mm5 \n\t" |
|
|
|
|
"packuswb %%mm5, %%mm4 \n\t" //abs(d)
|
|
|
|
|
"packsswb %%mm7, %%mm6 \n\t" //sign(d)
|
|
|
|
|
"pxor %%mm7, %%mm7 \n\t" |
|
|
|
|
"movd %4, %%mm2 \n\t" |
|
|
|
|
"punpcklbw %%mm2, %%mm2 \n\t" |
|
|
|
|
"punpcklbw %%mm2, %%mm2 \n\t" |
|
|
|
|
"punpcklbw %%mm2, %%mm2 \n\t" //2*strength
|
|
|
|
|
"psubusb %%mm4, %%mm2 \n\t" // S(2*strength - abs(d))
|
|
|
|
|
"movq %%mm2, %%mm3 \n\t" // S(2*strength - abs(d))
|
|
|
|
|
"psubusb %%mm4, %%mm3 \n\t" // S(S(2*strength - abs(d)) - abs(d))
|
|
|
|
|
"psubb %%mm3, %%mm2 \n\t" // MIN(abs(d), S(2*strength - abs(d)))
|
|
|
|
|
"movq %1, %%mm3 \n\t" |
|
|
|
|
"movq %2, %%mm4 \n\t" |
|
|
|
|
"pxor %%mm6, %%mm3 \n\t" |
|
|
|
|
"pxor %%mm6, %%mm4 \n\t" |
|
|
|
|
"paddusb %%mm2, %%mm3 \n\t" |
|
|
|
|
"psubusb %%mm2, %%mm4 \n\t" |
|
|
|
|
"pxor %%mm6, %%mm3 \n\t" |
|
|
|
|
"pxor %%mm6, %%mm4 \n\t" |
|
|
|
|
"movq %%mm3, %1 \n\t" |
|
|
|
|
"movq %%mm4, %2 \n\t" |
|
|
|
|
"paddusb %%mm2, %%mm2 \n\t" |
|
|
|
|
"packsswb %%mm1, %%mm0 \n\t" |
|
|
|
|
"pcmpgtb %%mm0, %%mm7 \n\t" |
|
|
|
|
"pxor %%mm7, %%mm0 \n\t" |
|
|
|
|
"psubb %%mm7, %%mm0 \n\t" |
|
|
|
|
"movq %%mm0, %%mm1 \n\t" |
|
|
|
|
"psubusb %%mm2, %%mm0 \n\t" |
|
|
|
|
"psubb %%mm0, %%mm1 \n\t" |
|
|
|
|
"pand %5, %%mm1 \n\t" |
|
|
|
|
"psrlw $2, %%mm1 \n\t" |
|
|
|
|
"pxor %%mm7, %%mm1 \n\t" |
|
|
|
|
"psubb %%mm7, %%mm1 \n\t" |
|
|
|
|
"movq %0, %%mm3 \n\t" |
|
|
|
|
"movq %3, %%mm4 \n\t" |
|
|
|
|
"psubb %%mm1, %%mm3 \n\t" |
|
|
|
|
"paddb %%mm1, %%mm4 \n\t" |
|
|
|
|
"movq %%mm3, %0 \n\t" |
|
|
|
|
"movq %%mm4, %3 \n\t" |
|
|
|
|
|
|
|
|
|
: "+m" (*(uint64_t*)(src - 2*stride)), |
|
|
|
|
"+m" (*(uint64_t*)(src - 1*stride)), |
|
|
|
|
"+m" (*(uint64_t*)(src + 0*stride)), |
|
|
|
|
"+m" (*(uint64_t*)(src + 1*stride)) |
|
|
|
|
: "g" (2*strength), "m"(ff_pb_FC) |
|
|
|
|
); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_ENCODERS |
|
|
|
|
static int pix_norm1_mmx(uint8_t *pix, int line_size) { |
|
|
|
|
int tmp; |
|
|
|
@ -1691,6 +1781,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) |
|
|
|
|
c->pix_norm1 = pix_norm1_mmx; |
|
|
|
|
c->sse[0] = sse16_mmx; |
|
|
|
|
#endif //CONFIG_ENCODERS
|
|
|
|
|
|
|
|
|
|
c->h263_v_loop_filter= h263_v_loop_filter_mmx; |
|
|
|
|
|
|
|
|
|
if (mm_flags & MM_MMXEXT) { |
|
|
|
|
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; |
|
|
|
|