|
|
|
@ -44,7 +44,7 @@ static void FUNC(yuv2planeX_8_16)(const int16_t *filter, int filterSize, |
|
|
|
|
for (j = 0; j < filterSize; j++) { |
|
|
|
|
unsigned int joffset=j<<1; |
|
|
|
|
unsigned int xoffset=x<<1; |
|
|
|
|
vector unsigned char perm; |
|
|
|
|
vector unsigned char av_unused perm; |
|
|
|
|
vector signed short l1,vLumFilter; |
|
|
|
|
LOAD_FILTER(vLumFilter,filter); |
|
|
|
|
vLumFilter = vec_splat(vLumFilter, 0); |
|
|
|
@ -133,8 +133,8 @@ static void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW, |
|
|
|
|
case 8: |
|
|
|
|
for (i = 0; i < dstW; i++) { |
|
|
|
|
register int srcPos = filterPos[i]; |
|
|
|
|
vector unsigned char src_vF, src_v0, src_v1; |
|
|
|
|
vector unsigned char permS; |
|
|
|
|
vector unsigned char src_vF, av_unused src_v0, av_unused src_v1; |
|
|
|
|
vector unsigned char av_unused permS; |
|
|
|
|
vector signed short src_v, filter_v; |
|
|
|
|
vector signed int val_v, val_s; |
|
|
|
|
FIRST_LOAD(src_v0, srcPos, src, permS); |
|
|
|
@ -173,18 +173,19 @@ static void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW, |
|
|
|
|
|
|
|
|
|
default: |
|
|
|
|
for (i = 0; i < dstW; i++) { |
|
|
|
|
register int j, offset = i * 2 * filterSize; |
|
|
|
|
register int j, av_unused offset = i * 2 * filterSize; |
|
|
|
|
register int srcPos = filterPos[i]; |
|
|
|
|
|
|
|
|
|
vector signed int val_s, val_v = (vector signed int)vzero; |
|
|
|
|
vector signed short filter_v0R; |
|
|
|
|
vector unsigned char permF, src_v0, permS; |
|
|
|
|
vector signed short av_unused filter_v0R; |
|
|
|
|
vector unsigned char av_unused permF, av_unused src_v0, av_unused permS; |
|
|
|
|
FIRST_LOAD(filter_v0R, offset, filter, permF); |
|
|
|
|
FIRST_LOAD(src_v0, srcPos, src, permS); |
|
|
|
|
|
|
|
|
|
for (j = 0; j < filterSize - 15; j += 16) { |
|
|
|
|
vector unsigned char src_v1, src_vF; |
|
|
|
|
vector signed short filter_v1R, filter_v2R, filter_v0, filter_v1, src_vA, src_vB; |
|
|
|
|
vector unsigned char av_unused src_v1, src_vF; |
|
|
|
|
vector signed short av_unused filter_v1R, av_unused filter_v2R, |
|
|
|
|
filter_v0, filter_v1, src_vA, src_vB; |
|
|
|
|
vector signed int val_acc; |
|
|
|
|
LOAD_SRCV(srcPos, j, src, permS, src_v0, src_v1, src_vF); |
|
|
|
|
src_vA = // vec_unpackh sign-extends...
|
|
|
|
@ -201,8 +202,8 @@ static void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW, |
|
|
|
|
|
|
|
|
|
if (j < filterSize - 7) { |
|
|
|
|
// loading src_v0 is useless, it's already done above
|
|
|
|
|
vector unsigned char src_v1, src_vF; |
|
|
|
|
vector signed short src_v, filter_v1R, filter_v; |
|
|
|
|
vector unsigned char av_unused src_v1, src_vF; |
|
|
|
|
vector signed short src_v, av_unused filter_v1R, filter_v; |
|
|
|
|
LOAD_SRCV8(srcPos, j, src, permS, src_v0, src_v1, src_vF); |
|
|
|
|
src_v = // vec_unpackh sign-extends...
|
|
|
|
|
(vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); |
|
|
|
|