|
|
|
@ -55,7 +55,7 @@ |
|
|
|
|
"movq %%mm3, %%mm4 \n\t"\
|
|
|
|
|
"lea " offset "(%0), %%"REG_d" \n\t"\
|
|
|
|
|
"mov (%%"REG_d"), %%"REG_S" \n\t"\
|
|
|
|
|
ASMALIGN(4) /* FIXME Unroll? */\
|
|
|
|
|
".p2align 4 \n\t" /* FIXME Unroll? */\
|
|
|
|
|
"1: \n\t"\
|
|
|
|
|
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
|
|
|
|
|
"movq " x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\
|
|
|
|
@ -93,7 +93,7 @@ |
|
|
|
|
"pxor %%mm6, %%mm6 \n\t"\
|
|
|
|
|
"pxor %%mm7, %%mm7 \n\t"\
|
|
|
|
|
"mov (%%"REG_d"), %%"REG_S" \n\t"\
|
|
|
|
|
ASMALIGN(4) \
|
|
|
|
|
".p2align 4 \n\t"\
|
|
|
|
|
"1: \n\t"\
|
|
|
|
|
"movq " x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\
|
|
|
|
|
"movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\
|
|
|
|
@ -148,7 +148,7 @@ |
|
|
|
|
|
|
|
|
|
#define YSCALEYUV2YV121 \ |
|
|
|
|
"mov %2, %%"REG_a" \n\t"\
|
|
|
|
|
ASMALIGN(4) /* FIXME Unroll? */\
|
|
|
|
|
".p2align 4 \n\t" /* FIXME Unroll? */\
|
|
|
|
|
"1: \n\t"\
|
|
|
|
|
"movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
|
|
|
|
|
"movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\
|
|
|
|
@ -164,7 +164,7 @@ |
|
|
|
|
"pcmpeqw %%mm7, %%mm7 \n\t"\
|
|
|
|
|
"psrlw $15, %%mm7 \n\t"\
|
|
|
|
|
"psllw $6, %%mm7 \n\t"\
|
|
|
|
|
ASMALIGN(4) /* FIXME Unroll? */\
|
|
|
|
|
".p2align 4 \n\t" /* FIXME Unroll? */\
|
|
|
|
|
"1: \n\t"\
|
|
|
|
|
"movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
|
|
|
|
|
"movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\
|
|
|
|
@ -187,14 +187,14 @@ |
|
|
|
|
#define YSCALEYUV2PACKEDX_UV \ |
|
|
|
|
__asm__ volatile(\
|
|
|
|
|
"xor %%"REG_a", %%"REG_a" \n\t"\
|
|
|
|
|
ASMALIGN(4)\
|
|
|
|
|
".p2align 4 \n\t"\
|
|
|
|
|
"nop \n\t"\
|
|
|
|
|
"1: \n\t"\
|
|
|
|
|
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
|
|
|
|
|
"mov (%%"REG_d"), %%"REG_S" \n\t"\
|
|
|
|
|
"movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
|
|
|
|
|
"movq %%mm3, %%mm4 \n\t"\
|
|
|
|
|
ASMALIGN(4)\
|
|
|
|
|
".p2align 4 \n\t"\
|
|
|
|
|
"2: \n\t"\
|
|
|
|
|
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
|
|
|
|
|
"movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\
|
|
|
|
@ -213,7 +213,7 @@ |
|
|
|
|
"mov (%%"REG_d"), %%"REG_S" \n\t"\
|
|
|
|
|
"movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
|
|
|
|
|
"movq "#dst1", "#dst2" \n\t"\
|
|
|
|
|
ASMALIGN(4)\
|
|
|
|
|
".p2align 4 \n\t"\
|
|
|
|
|
"2: \n\t"\
|
|
|
|
|
"movq 8(%%"REG_d"), "#coeff" \n\t" /* filterCoeff */\
|
|
|
|
|
"movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" /* Y1srcData */\
|
|
|
|
@ -241,7 +241,7 @@ |
|
|
|
|
#define YSCALEYUV2PACKEDX_ACCURATE_UV \ |
|
|
|
|
__asm__ volatile(\
|
|
|
|
|
"xor %%"REG_a", %%"REG_a" \n\t"\
|
|
|
|
|
ASMALIGN(4)\
|
|
|
|
|
".p2align 4 \n\t"\
|
|
|
|
|
"nop \n\t"\
|
|
|
|
|
"1: \n\t"\
|
|
|
|
|
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
|
|
|
|
@ -250,7 +250,7 @@ |
|
|
|
|
"pxor %%mm5, %%mm5 \n\t"\
|
|
|
|
|
"pxor %%mm6, %%mm6 \n\t"\
|
|
|
|
|
"pxor %%mm7, %%mm7 \n\t"\
|
|
|
|
|
ASMALIGN(4)\
|
|
|
|
|
".p2align 4 \n\t"\
|
|
|
|
|
"2: \n\t"\
|
|
|
|
|
"movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
|
|
|
|
|
"movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
|
|
|
|
@ -295,7 +295,7 @@ |
|
|
|
|
"pxor %%mm5, %%mm5 \n\t"\
|
|
|
|
|
"pxor %%mm7, %%mm7 \n\t"\
|
|
|
|
|
"pxor %%mm6, %%mm6 \n\t"\
|
|
|
|
|
ASMALIGN(4)\
|
|
|
|
|
".p2align 4 \n\t"\
|
|
|
|
|
"2: \n\t"\
|
|
|
|
|
"movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
|
|
|
|
|
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
|
|
|
|
@ -381,7 +381,7 @@ |
|
|
|
|
"movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
|
|
|
|
|
"movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
|
|
|
|
|
"xor "#index", "#index" \n\t"\
|
|
|
|
|
ASMALIGN(4)\
|
|
|
|
|
".p2align 4 \n\t"\
|
|
|
|
|
"1: \n\t"\
|
|
|
|
|
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
|
|
|
|
|
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
|
|
|
|
@ -413,7 +413,7 @@ |
|
|
|
|
|
|
|
|
|
#define REAL_YSCALEYUV2RGB_UV(index, c) \ |
|
|
|
|
"xor "#index", "#index" \n\t"\
|
|
|
|
|
ASMALIGN(4)\
|
|
|
|
|
".p2align 4 \n\t"\
|
|
|
|
|
"1: \n\t"\
|
|
|
|
|
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
|
|
|
|
|
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
|
|
|
|
@ -488,7 +488,7 @@ |
|
|
|
|
|
|
|
|
|
#define REAL_YSCALEYUV2PACKED1(index, c) \ |
|
|
|
|
"xor "#index", "#index" \n\t"\
|
|
|
|
|
ASMALIGN(4)\
|
|
|
|
|
".p2align 4 \n\t"\
|
|
|
|
|
"1: \n\t"\
|
|
|
|
|
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
|
|
|
|
|
"movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
|
|
|
|
@ -503,7 +503,7 @@ |
|
|
|
|
|
|
|
|
|
#define REAL_YSCALEYUV2RGB1(index, c) \ |
|
|
|
|
"xor "#index", "#index" \n\t"\
|
|
|
|
|
ASMALIGN(4)\
|
|
|
|
|
".p2align 4 \n\t"\
|
|
|
|
|
"1: \n\t"\
|
|
|
|
|
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
|
|
|
|
|
"movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
|
|
|
|
@ -552,7 +552,7 @@ |
|
|
|
|
|
|
|
|
|
#define REAL_YSCALEYUV2PACKED1b(index, c) \ |
|
|
|
|
"xor "#index", "#index" \n\t"\
|
|
|
|
|
ASMALIGN(4)\
|
|
|
|
|
".p2align 4 \n\t"\
|
|
|
|
|
"1: \n\t"\
|
|
|
|
|
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
|
|
|
|
|
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
|
|
|
|
@ -571,7 +571,7 @@ |
|
|
|
|
// do vertical chrominance interpolation
|
|
|
|
|
#define REAL_YSCALEYUV2RGB1b(index, c) \ |
|
|
|
|
"xor "#index", "#index" \n\t"\
|
|
|
|
|
ASMALIGN(4)\
|
|
|
|
|
".p2align 4 \n\t"\
|
|
|
|
|
"1: \n\t"\
|
|
|
|
|
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
|
|
|
|
|
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
|
|
|
|
@ -2055,7 +2055,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in |
|
|
|
|
"pxor %%mm7, %%mm7 \n\t" |
|
|
|
|
"push %%"REG_BP" \n\t" // we use 7 regs here ...
|
|
|
|
|
"mov %%"REG_a", %%"REG_BP" \n\t" |
|
|
|
|
ASMALIGN(4) |
|
|
|
|
".p2align 4 \n\t" |
|
|
|
|
"1: \n\t" |
|
|
|
|
"movzwl (%2, %%"REG_BP"), %%eax \n\t" |
|
|
|
|
"movzwl 2(%2, %%"REG_BP"), %%ebx \n\t" |
|
|
|
@ -2099,7 +2099,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in |
|
|
|
|
"pxor %%mm7, %%mm7 \n\t" |
|
|
|
|
"push %%"REG_BP" \n\t" // we use 7 regs here ...
|
|
|
|
|
"mov %%"REG_a", %%"REG_BP" \n\t" |
|
|
|
|
ASMALIGN(4) |
|
|
|
|
".p2align 4 \n\t" |
|
|
|
|
"1: \n\t" |
|
|
|
|
"movzwl (%2, %%"REG_BP"), %%eax \n\t" |
|
|
|
|
"movzwl 2(%2, %%"REG_BP"), %%ebx \n\t" |
|
|
|
@ -2150,7 +2150,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in |
|
|
|
|
dst-= counter/2; |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"pxor %%mm7, %%mm7 \n\t" |
|
|
|
|
ASMALIGN(4) |
|
|
|
|
".p2align 4 \n\t" |
|
|
|
|
"1: \n\t" |
|
|
|
|
"mov %2, %%"REG_c" \n\t" |
|
|
|
|
"movzwl (%%"REG_c", %0), %%eax \n\t" |
|
|
|
@ -2335,7 +2335,7 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, |
|
|
|
|
"xor %%"REG_a", %%"REG_a" \n\t" // i
|
|
|
|
|
"xor %%"REG_d", %%"REG_d" \n\t" // xx
|
|
|
|
|
"xorl %%ecx, %%ecx \n\t" // xalpha
|
|
|
|
|
ASMALIGN(4) |
|
|
|
|
".p2align 4 \n\t" |
|
|
|
|
"1: \n\t" |
|
|
|
|
"movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
|
|
|
|
|
"movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
|
|
|
|
@ -2475,7 +2475,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst, |
|
|
|
|
"xor %%"REG_a", %%"REG_a" \n\t" // i
|
|
|
|
|
"xor %%"REG_d", %%"REG_d" \n\t" // xx
|
|
|
|
|
"xorl %%ecx, %%ecx \n\t" // xalpha
|
|
|
|
|
ASMALIGN(4) |
|
|
|
|
".p2align 4 \n\t" |
|
|
|
|
"1: \n\t" |
|
|
|
|
"mov %0, %%"REG_S" \n\t" |
|
|
|
|
"movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t" //src[xx]
|
|
|
|
|