swscale: reindent h[cy]scale_fast() and updateDitherTables().

pull/2/head
Ronald S. Bultje 14 years ago committed by Michael Niedermayer
parent c3f07903ec
commit fc72ec727e
  1. 263
      libswscale/x86/swscale_template.c

@ -2169,64 +2169,64 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
DECLARE_ALIGNED(8, uint64_t, ebxsave);
#endif
__asm__ volatile(
__asm__ volatile(
#if defined(PIC)
"mov %%"REG_b", %5 \n\t"
"mov %%"REG_b", %5 \n\t"
#endif
"pxor %%mm7, %%mm7 \n\t"
"mov %0, %%"REG_c" \n\t"
"mov %1, %%"REG_D" \n\t"
"mov %2, %%"REG_d" \n\t"
"mov %3, %%"REG_b" \n\t"
"xor %%"REG_a", %%"REG_a" \n\t" // i
PREFETCH" (%%"REG_c") \n\t"
PREFETCH" 32(%%"REG_c") \n\t"
PREFETCH" 64(%%"REG_c") \n\t"
"pxor %%mm7, %%mm7 \n\t"
"mov %0, %%"REG_c" \n\t"
"mov %1, %%"REG_D" \n\t"
"mov %2, %%"REG_d" \n\t"
"mov %3, %%"REG_b" \n\t"
"xor %%"REG_a", %%"REG_a" \n\t" // i
PREFETCH" (%%"REG_c") \n\t"
PREFETCH" 32(%%"REG_c") \n\t"
PREFETCH" 64(%%"REG_c") \n\t"
#if ARCH_X86_64
#define CALL_MMX2_FILTER_CODE \
"movl (%%"REG_b"), %%esi \n\t"\
"call *%4 \n\t"\
"movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
"add %%"REG_S", %%"REG_c" \n\t"\
"add %%"REG_a", %%"REG_D" \n\t"\
"xor %%"REG_a", %%"REG_a" \n\t"\
"movl (%%"REG_b"), %%esi \n\t"\
"call *%4 \n\t"\
"movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
"add %%"REG_S", %%"REG_c" \n\t"\
"add %%"REG_a", %%"REG_D" \n\t"\
"xor %%"REG_a", %%"REG_a" \n\t"\
#else
#define CALL_MMX2_FILTER_CODE \
"movl (%%"REG_b"), %%esi \n\t"\
"call *%4 \n\t"\
"addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
"add %%"REG_a", %%"REG_D" \n\t"\
"xor %%"REG_a", %%"REG_a" \n\t"\
"movl (%%"REG_b"), %%esi \n\t"\
"call *%4 \n\t"\
"addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
"add %%"REG_a", %%"REG_D" \n\t"\
"xor %%"REG_a", %%"REG_a" \n\t"\
#endif /* ARCH_X86_64 */
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
#if defined(PIC)
"mov %5, %%"REG_b" \n\t"
"mov %5, %%"REG_b" \n\t"
#endif
:: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
"m" (mmx2FilterCode)
:: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
"m" (mmx2FilterCode)
#if defined(PIC)
,"m" (ebxsave)
,"m" (ebxsave)
#endif
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
#if !defined(PIC)
,"%"REG_b
,"%"REG_b
#endif
);
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
);
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
dst[i] = src[srcW-1]*128;
}
static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
@ -2242,54 +2242,55 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
DECLARE_ALIGNED(8, uint64_t, ebxsave);
#endif
__asm__ volatile(
__asm__ volatile(
#if defined(PIC)
"mov %%"REG_b", %6 \n\t"
"mov %%"REG_b", %6 \n\t"
#endif
"pxor %%mm7, %%mm7 \n\t"
"mov %0, %%"REG_c" \n\t"
"mov %1, %%"REG_D" \n\t"
"mov %2, %%"REG_d" \n\t"
"mov %3, %%"REG_b" \n\t"
"xor %%"REG_a", %%"REG_a" \n\t" // i
PREFETCH" (%%"REG_c") \n\t"
PREFETCH" 32(%%"REG_c") \n\t"
PREFETCH" 64(%%"REG_c") \n\t"
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
"xor %%"REG_a", %%"REG_a" \n\t" // i
"mov %5, %%"REG_c" \n\t" // src
"mov %1, %%"REG_D" \n\t" // buf1
"add $"AV_STRINGIFY(VOF)", %%"REG_D" \n\t"
PREFETCH" (%%"REG_c") \n\t"
PREFETCH" 32(%%"REG_c") \n\t"
PREFETCH" 64(%%"REG_c") \n\t"
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
"pxor %%mm7, %%mm7 \n\t"
"mov %0, %%"REG_c" \n\t"
"mov %1, %%"REG_D" \n\t"
"mov %2, %%"REG_d" \n\t"
"mov %3, %%"REG_b" \n\t"
"xor %%"REG_a", %%"REG_a" \n\t" // i
PREFETCH" (%%"REG_c") \n\t"
PREFETCH" 32(%%"REG_c") \n\t"
PREFETCH" 64(%%"REG_c") \n\t"
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
"xor %%"REG_a", %%"REG_a" \n\t" // i
"mov %5, %%"REG_c" \n\t" // src
"mov %1, %%"REG_D" \n\t" // buf1
"add $"AV_STRINGIFY(VOF)", %%"REG_D" \n\t"
PREFETCH" (%%"REG_c") \n\t"
PREFETCH" 32(%%"REG_c") \n\t"
PREFETCH" 64(%%"REG_c") \n\t"
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
CALL_MMX2_FILTER_CODE
#if defined(PIC)
"mov %6, %%"REG_b" \n\t"
"mov %6, %%"REG_b" \n\t"
#endif
:: "m" (src1), "m" (dst), "m" (filter), "m" (filterPos),
"m" (mmx2FilterCode), "m" (src2)
:: "m" (src1), "m" (dst), "m" (filter), "m" (filterPos),
"m" (mmx2FilterCode), "m" (src2)
#if defined(PIC)
,"m" (ebxsave)
,"m" (ebxsave)
#endif
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
#if !defined(PIC)
,"%"REG_b
,"%"REG_b
#endif
);
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
dst[i] = src1[srcW-1]*128;
dst[i+VOFW] = src2[srcW-1]*128;
}
);
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
dst[i] = src1[srcW-1]*128;
dst[i+VOFW] = src2[srcW-1]*128;
}
}
#endif /* COMPILE_TEMPLATE_MMX2 */
@ -2317,62 +2318,62 @@ static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int
const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
c->blueDither= ff_dither8[dstY&1];
if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
c->greenDither= ff_dither8[dstY&1];
else
c->greenDither= ff_dither4[dstY&1];
c->redDither= ff_dither8[(dstY+1)&1];
if (dstY < dstH - 2) {
const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
int i;
if (flags & SWS_ACCURATE_RND) {
int s= APCK_SIZE / 8;
for (i=0; i<vLumFilterSize; i+=2) {
*(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
*(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
lumMmxFilter[s*i+APCK_COEF/4 ]=
lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
+ (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
*(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
*(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
alpMmxFilter[s*i+APCK_COEF/4 ]=
alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
}
}
for (i=0; i<vChrFilterSize; i+=2) {
*(const void**)&chrMmxFilter[s*i ]= chrSrcPtr[i ];
*(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrSrcPtr[i+(vChrFilterSize>1)];
chrMmxFilter[s*i+APCK_COEF/4 ]=
chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
+ (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
}
} else {
for (i=0; i<vLumFilterSize; i++) {
lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
lumMmxFilter[4*i+2]=
lumMmxFilter[4*i+3]=
((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i];
alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32;
alpMmxFilter[4*i+2]=
alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
}
c->blueDither= ff_dither8[dstY&1];
if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
c->greenDither= ff_dither8[dstY&1];
else
c->greenDither= ff_dither4[dstY&1];
c->redDither= ff_dither8[(dstY+1)&1];
if (dstY < dstH - 2) {
const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
int i;
if (flags & SWS_ACCURATE_RND) {
int s= APCK_SIZE / 8;
for (i=0; i<vLumFilterSize; i+=2) {
*(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
*(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
lumMmxFilter[s*i+APCK_COEF/4 ]=
lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
+ (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
*(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
*(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
alpMmxFilter[s*i+APCK_COEF/4 ]=
alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
}
for (i=0; i<vChrFilterSize; i++) {
chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
chrMmxFilter[4*i+2]=
chrMmxFilter[4*i+3]=
((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
}
for (i=0; i<vChrFilterSize; i+=2) {
*(const void**)&chrMmxFilter[s*i ]= chrSrcPtr[i ];
*(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrSrcPtr[i+(vChrFilterSize>1)];
chrMmxFilter[s*i+APCK_COEF/4 ]=
chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
+ (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
}
} else {
for (i=0; i<vLumFilterSize; i++) {
lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
lumMmxFilter[4*i+2]=
lumMmxFilter[4*i+3]=
((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i];
alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32;
alpMmxFilter[4*i+2]=
alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
}
}
for (i=0; i<vChrFilterSize; i++) {
chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
chrMmxFilter[4*i+2]=
chrMmxFilter[4*i+3]=
((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
}
}
}
}
#endif /* !COMPILE_TEMPLATE_MMX2 */

Loading…
Cancel
Save