swscale: fix compile on ppc.

pull/2/head
Ronald S. Bultje 14 years ago
parent e758573a88
commit 93681fbd50
  1. 30
      libswscale/ppc/swscale_altivec_template.c
  2. 25
      libswscale/ppc/swscale_template.c
  3. 15
      libswscale/ppc/yuv2rgb_altivec.c
  4. 7
      libswscale/swscale_internal.h

@ -86,9 +86,11 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW)
} }
static inline void static inline void
yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc,
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, int lumFilterSize, const int16_t *chrFilter,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW) const int16_t **chrUSrc, const int16_t **chrVSrc,
int chrFilterSize, uint8_t *dest, uint8_t *uDest,
uint8_t *vDest, int dstW, int chrDstW)
{ {
const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 18)}; const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 18)};
register int i, j; register int i, j;
@ -159,22 +161,22 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF
vChrFilter = vec_perm(vChrFilter, vChrFilter, perm0); vChrFilter = vec_perm(vChrFilter, vChrFilter, perm0);
vChrFilter = vec_splat(vChrFilter, 0); // chrFilter[j] is loaded 8 times in vChrFilter vChrFilter = vec_splat(vChrFilter, 0); // chrFilter[j] is loaded 8 times in vChrFilter
perm = vec_lvsl(0, chrSrc[j]); perm = vec_lvsl(0, chrUSrc[j]);
l1 = vec_ld(0, chrSrc[j]); l1 = vec_ld(0, chrUSrc[j]);
l1_V = vec_ld(2048 << 1, chrSrc[j]); l1_V = vec_ld(0, chrVSrc[j]);
for (i = 0; i < (chrDstW - 7); i+=8) { for (i = 0; i < (chrDstW - 7); i+=8) {
int offset = i << 2; int offset = i << 2;
vector signed short l2 = vec_ld((i << 1) + 16, chrSrc[j]); vector signed short l2 = vec_ld((i << 1) + 16, chrUSrc[j]);
vector signed short l2_V = vec_ld(((i + 2048) << 1) + 16, chrSrc[j]); vector signed short l2_V = vec_ld((i << 1) + 16, chrVSrc[j]);
vector signed int v1 = vec_ld(offset, u); vector signed int v1 = vec_ld(offset, u);
vector signed int v2 = vec_ld(offset + 16, u); vector signed int v2 = vec_ld(offset + 16, u);
vector signed int v1_V = vec_ld(offset, v); vector signed int v1_V = vec_ld(offset, v);
vector signed int v2_V = vec_ld(offset + 16, v); vector signed int v2_V = vec_ld(offset + 16, v);
vector signed short ls = vec_perm(l1, l2, perm); // chrSrc[j][i] ... chrSrc[j][i+7] vector signed short ls = vec_perm(l1, l2, perm); // chrUSrc[j][i] ... chrUSrc[j][i+7]
vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrSrc[j][i+2048] ... chrSrc[j][i+2055] vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrVSrc[j][i] ... chrVSrc[j][i]
vector signed int i1 = vec_mule(vChrFilter, ls); vector signed int i1 = vec_mule(vChrFilter, ls);
vector signed int i2 = vec_mulo(vChrFilter, ls); vector signed int i2 = vec_mulo(vChrFilter, ls);
@ -182,9 +184,9 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF
vector signed int i2_V = vec_mulo(vChrFilter, ls_V); vector signed int i2_V = vec_mulo(vChrFilter, ls_V);
vector signed int vf1 = vec_mergeh(i1, i2); vector signed int vf1 = vec_mergeh(i1, i2);
vector signed int vf2 = vec_mergel(i1, i2); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j] vector signed int vf2 = vec_mergel(i1, i2); // chrUSrc[j][i] * chrFilter[j] ... chrUSrc[j][i+7] * chrFilter[j]
vector signed int vf1_V = vec_mergeh(i1_V, i2_V); vector signed int vf1_V = vec_mergeh(i1_V, i2_V);
vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j] vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrVSrc[j][i] * chrFilter[j] ... chrVSrc[j][i+7] * chrFilter[j]
vector signed int vo1 = vec_add(v1, vf1); vector signed int vo1 = vec_add(v1, vf1);
vector signed int vo2 = vec_add(v2, vf2); vector signed int vo2 = vec_add(v2, vf2);
@ -200,8 +202,8 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF
l1_V = l2_V; l1_V = l2_V;
} }
for ( ; i < chrDstW; i++) { for ( ; i < chrDstW; i++) {
u[i] += chrSrc[j][i] * chrFilter[j]; u[i] += chrUSrc[j][i] * chrFilter[j];
v[i] += chrSrc[j][i + 2048] * chrFilter[j]; v[i] += chrVSrc[j][i] * chrFilter[j];
} }
} }
altivec_packIntArrayToCharArray(u, uDest, chrDstW); altivec_packIntArrayToCharArray(u, uDest, chrDstW);

@ -24,21 +24,28 @@
#endif #endif
#if COMPILE_TEMPLATE_ALTIVEC #if COMPILE_TEMPLATE_ALTIVEC
static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc, const int16_t **lumSrc, int lumFilterSize,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW) const int16_t *chrFilter, const int16_t **chrUSrc,
const int16_t **chrVSrc, int chrFilterSize,
const int16_t **alpSrc,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
uint8_t *aDest, long dstW, long chrDstW)
{ {
yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize, yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
chrFilter, chrSrc, chrFilterSize, chrFilter, chrUSrc, chrVSrc, chrFilterSize,
dest, uDest, vDest, dstW, chrDstW); dest, uDest, vDest, dstW, chrDstW);
} }
/** /**
* vertical scale YV12 to RGB * vertical scale YV12 to RGB
*/ */
static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **lumSrc, int lumFilterSize,
const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY) const int16_t *chrFilter, const int16_t **chrUSrc,
const int16_t **chrVSrc, int chrFilterSize,
const int16_t **alpSrc, uint8_t *dest,
long dstW, long dstY)
{ {
/* The following list of supported dstFormat values should /* The following list of supported dstFormat values should
match what's found in the body of ff_yuv2packedX_altivec() */ match what's found in the body of ff_yuv2packedX_altivec() */
@ -47,11 +54,11 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 || c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB)) c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB))
ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize,
chrFilter, chrSrc, chrFilterSize, chrFilter, chrUSrc, chrVSrc, chrFilterSize,
dest, dstW, dstY); dest, dstW, dstY);
else else
yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize, yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
chrFilter, chrSrc, chrFilterSize, chrFilter, chrUSrc, chrVSrc, chrFilterSize,
alpSrc, dest, dstW, dstY); alpSrc, dest, dstW, dstY);
} }
#endif #endif

@ -778,9 +778,10 @@ void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], int b
void void
ff_yuv2packedX_altivec(SwsContext *c, ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc,
const int16_t **chrVSrc, int chrFilterSize,
uint8_t *dest, int dstW, int dstY) uint8_t *dest, int dstW, int dstY)
{ {
int i,j; int i,j;
@ -816,9 +817,9 @@ ff_yuv2packedX_altivec(SwsContext *c,
V = RND; V = RND;
/* extract 8 coeffs from U,V */ /* extract 8 coeffs from U,V */
for (j=0; j<chrFilterSize; j++) { for (j=0; j<chrFilterSize; j++) {
X = vec_ld (0, &chrSrc[j][i/2]); X = vec_ld (0, &chrUSrc[j][i/2]);
U = vec_mradds (X, CCoeffs[j], U); U = vec_mradds (X, CCoeffs[j], U);
X = vec_ld (0, &chrSrc[j][i/2+2048]); X = vec_ld (0, &chrVSrc[j][i/2]);
V = vec_mradds (X, CCoeffs[j], V); V = vec_mradds (X, CCoeffs[j], V);
} }
@ -894,9 +895,9 @@ ff_yuv2packedX_altivec(SwsContext *c,
V = RND; V = RND;
/* extract 8 coeffs from U,V */ /* extract 8 coeffs from U,V */
for (j=0; j<chrFilterSize; j++) { for (j=0; j<chrFilterSize; j++) {
X = vec_ld (0, &chrSrc[j][i/2]); X = vec_ld (0, &chrUSrc[j][i/2]);
U = vec_mradds (X, CCoeffs[j], U); U = vec_mradds (X, CCoeffs[j], U);
X = vec_ld (0, &chrSrc[j][i/2+2048]); X = vec_ld (0, &chrVSrc[j][i/2]);
V = vec_mradds (X, CCoeffs[j], V); V = vec_mradds (X, CCoeffs[j], V);
} }

@ -333,9 +333,10 @@ SwsFunc ff_yuv2rgb_init_mlib(SwsContext *c);
SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c); SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c);
SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c); SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c);
void ff_bfin_get_unscaled_swscale(SwsContext *c); void ff_bfin_get_unscaled_swscale(SwsContext *c);
void ff_yuv2packedX_altivec(SwsContext *c, void ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc,
const int16_t **chrVSrc, int chrFilterSize,
uint8_t *dest, int dstW, int dstY); uint8_t *dest, int dstW, int dstY);
const char *sws_format_name(enum PixelFormat format); const char *sws_format_name(enum PixelFormat format);

Loading…
Cancel
Save