diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c index f5fd8606f9..c318d8018b 100644 --- a/libswscale/swscale_template.c +++ b/libswscale/swscale_template.c @@ -206,7 +206,7 @@ "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) : "%eax", "%ebx", "%ecx", "%edx", "%esi" */ -#define YSCALEYUV2PACKEDX \ +#define YSCALEYUV2PACKEDX_UV \ __asm__ volatile(\ "xor %%"REG_a", %%"REG_a" \n\t"\ ASMALIGN(4)\ @@ -229,8 +229,9 @@ "paddw %%mm5, %%mm4 \n\t"\ "test %%"REG_S", %%"REG_S" \n\t"\ " jnz 2b \n\t"\ -\ - "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\ + +#define YSCALEYUV2PACKEDX_YA(offset) \ + "lea "offset"(%0), %%"REG_d" \n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\ "movq "VROUNDER_OFFSET"(%0), %%mm1 \n\t"\ "movq %%mm1, %%mm7 \n\t"\ @@ -248,6 +249,10 @@ "test %%"REG_S", %%"REG_S" \n\t"\ " jnz 2b \n\t"\ +#define YSCALEYUV2PACKEDX \ + YSCALEYUV2PACKEDX_UV \ + YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET) \ + #define YSCALEYUV2PACKEDX_END \ :: "r" (&c->redDither), \ "m" (dummy), "m" (dummy), "m" (dummy),\ @@ -255,7 +260,7 @@ : "%"REG_a, "%"REG_d, "%"REG_S \ ); -#define YSCALEYUV2PACKEDX_ACCURATE \ +#define YSCALEYUV2PACKEDX_ACCURATE_UV \ __asm__ volatile(\ "xor %%"REG_a", %%"REG_a" \n\t"\ ASMALIGN(4)\ @@ -304,8 +309,9 @@ "paddw %%mm0, %%mm6 \n\t"\ "movq %%mm4, "U_TEMP"(%0) \n\t"\ "movq %%mm6, "V_TEMP"(%0) \n\t"\ -\ - "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\ + +#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \ + "lea "offset"(%0), %%"REG_d" \n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\ "pxor %%mm1, %%mm1 \n\t"\ "pxor %%mm5, %%mm5 \n\t"\ @@ -349,6 +355,10 @@ "movq "U_TEMP"(%0), %%mm3 \n\t"\ "movq "V_TEMP"(%0), %%mm4 \n\t"\ +#define YSCALEYUV2PACKEDX_ACCURATE \ + YSCALEYUV2PACKEDX_ACCURATE_UV \ + YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET) + #define YSCALEYUV2RGBX \ "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\ "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\ @@ -424,7 +434,7 @@ #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c) -#define REAL_YSCALEYUV2RGB(index, c) \ +#define REAL_YSCALEYUV2RGB_UV(index, c) \ "xor "#index", "#index" \n\t"\ ASMALIGN(4)\ "1: \n\t"\ @@ -448,6 +458,8 @@ "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\ "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\ /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ + +#define REAL_YSCALEYUV2RGB_YA(index, c) \ "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\ "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\ "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\ @@ -460,6 +472,8 @@ "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ + +#define REAL_YSCALEYUV2RGB_COEFF(c) \ "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\ "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\ "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\ @@ -488,7 +502,12 @@ "packuswb %%mm6, %%mm5 \n\t"\ "packuswb %%mm3, %%mm4 \n\t"\ "pxor %%mm7, %%mm7 \n\t" -#define YSCALEYUV2RGB(index, c) REAL_YSCALEYUV2RGB(index, c) +#define YSCALEYUV2RGB_YA(index, c) REAL_YSCALEYUV2RGB_YA(index, c) + +#define YSCALEYUV2RGB(index, c) \ + REAL_YSCALEYUV2RGB_UV(index, c) \ + REAL_YSCALEYUV2RGB_YA(index, c) \ + REAL_YSCALEYUV2RGB_COEFF(c) #define REAL_YSCALEYUV2PACKED1(index, c) \ "xor "#index", "#index" \n\t"\