|
|
|
@ -1310,29 +1310,28 @@ POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); |
|
|
|
|
|
|
|
|
|
int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ |
|
|
|
|
POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); |
|
|
|
|
int sum; |
|
|
|
|
register const_vector unsigned char vzero = (const_vector unsigned char)vec_splat_u8(0); |
|
|
|
|
register vector signed short temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; |
|
|
|
|
int sum; |
|
|
|
|
register const_vector unsigned char vzero = |
|
|
|
|
(const_vector unsigned char)vec_splat_u8(0); |
|
|
|
|
register vector signed short temp0, temp1, temp2, temp3, temp4, |
|
|
|
|
temp5, temp6, temp7; |
|
|
|
|
POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); |
|
|
|
|
{ |
|
|
|
|
register const_vector signed short vprod1 = (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); |
|
|
|
|
register const_vector signed short vprod2 = (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); |
|
|
|
|
register const_vector signed short vprod3 = (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); |
|
|
|
|
register const_vector signed short vprod1 =(const_vector signed short) |
|
|
|
|
AVV( 1,-1, 1,-1, 1,-1, 1,-1); |
|
|
|
|
register const_vector signed short vprod2 =(const_vector signed short) |
|
|
|
|
AVV( 1, 1,-1,-1, 1, 1,-1,-1); |
|
|
|
|
register const_vector signed short vprod3 =(const_vector signed short) |
|
|
|
|
AVV( 1, 1, 1, 1,-1,-1,-1,-1); |
|
|
|
|
register const_vector unsigned char perm1 = (const_vector unsigned char) |
|
|
|
|
AVV(0x02, 0x03, 0x00, 0x01, |
|
|
|
|
0x06, 0x07, 0x04, 0x05, |
|
|
|
|
0x0A, 0x0B, 0x08, 0x09, |
|
|
|
|
0x0E, 0x0F, 0x0C, 0x0D); |
|
|
|
|
AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, |
|
|
|
|
0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D); |
|
|
|
|
register const_vector unsigned char perm2 = (const_vector unsigned char) |
|
|
|
|
AVV(0x04, 0x05, 0x06, 0x07, |
|
|
|
|
0x00, 0x01, 0x02, 0x03, |
|
|
|
|
0x0C, 0x0D, 0x0E, 0x0F, |
|
|
|
|
0x08, 0x09, 0x0A, 0x0B); |
|
|
|
|
AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, |
|
|
|
|
0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B); |
|
|
|
|
register const_vector unsigned char perm3 = (const_vector unsigned char) |
|
|
|
|
AVV(0x08, 0x09, 0x0A, 0x0B, |
|
|
|
|
0x0C, 0x0D, 0x0E, 0x0F, |
|
|
|
|
0x00, 0x01, 0x02, 0x03, |
|
|
|
|
0x04, 0x05, 0x06, 0x07); |
|
|
|
|
AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
|
|
|
|
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); |
|
|
|
|
|
|
|
|
|
#define ONEITERBUTTERFLY(i, res) \ |
|
|
|
|
{ \
|
|
|
|
@ -1443,45 +1442,46 @@ POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff8x8_num, 1); |
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) { |
|
|
|
|
int sum; |
|
|
|
|
register vector signed short |
|
|
|
|
temp0 REG_v(v0), |
|
|
|
|
temp1 REG_v(v1), |
|
|
|
|
temp2 REG_v(v2), |
|
|
|
|
temp3 REG_v(v3), |
|
|
|
|
temp4 REG_v(v4), |
|
|
|
|
temp5 REG_v(v5), |
|
|
|
|
temp6 REG_v(v6), |
|
|
|
|
temp7 REG_v(v7); |
|
|
|
|
register vector signed short |
|
|
|
|
temp0S REG_v(v8), |
|
|
|
|
temp1S REG_v(v9), |
|
|
|
|
temp2S REG_v(v10), |
|
|
|
|
temp3S REG_v(v11), |
|
|
|
|
temp4S REG_v(v12), |
|
|
|
|
temp5S REG_v(v13), |
|
|
|
|
temp6S REG_v(v14), |
|
|
|
|
temp7S REG_v(v15); |
|
|
|
|
register const_vector unsigned char vzero REG_v(v31)= (const_vector unsigned char)vec_splat_u8(0); |
|
|
|
|
int sum; |
|
|
|
|
register vector signed short |
|
|
|
|
temp0 REG_v(v0), |
|
|
|
|
temp1 REG_v(v1), |
|
|
|
|
temp2 REG_v(v2), |
|
|
|
|
temp3 REG_v(v3), |
|
|
|
|
temp4 REG_v(v4), |
|
|
|
|
temp5 REG_v(v5), |
|
|
|
|
temp6 REG_v(v6), |
|
|
|
|
temp7 REG_v(v7); |
|
|
|
|
register vector signed short |
|
|
|
|
temp0S REG_v(v8), |
|
|
|
|
temp1S REG_v(v9), |
|
|
|
|
temp2S REG_v(v10), |
|
|
|
|
temp3S REG_v(v11), |
|
|
|
|
temp4S REG_v(v12), |
|
|
|
|
temp5S REG_v(v13), |
|
|
|
|
temp6S REG_v(v14), |
|
|
|
|
temp7S REG_v(v15); |
|
|
|
|
register const_vector unsigned char vzero REG_v(v31)= |
|
|
|
|
(const_vector unsigned char)vec_splat_u8(0); |
|
|
|
|
{ |
|
|
|
|
register const_vector signed short vprod1 REG_v(v16)= (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); |
|
|
|
|
register const_vector signed short vprod2 REG_v(v17)= (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); |
|
|
|
|
register const_vector signed short vprod3 REG_v(v18)= (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); |
|
|
|
|
register const_vector unsigned char perm1 REG_v(v19)= (const_vector unsigned char) |
|
|
|
|
AVV(0x02, 0x03, 0x00, 0x01, |
|
|
|
|
0x06, 0x07, 0x04, 0x05, |
|
|
|
|
0x0A, 0x0B, 0x08, 0x09, |
|
|
|
|
0x0E, 0x0F, 0x0C, 0x0D); |
|
|
|
|
register const_vector unsigned char perm2 REG_v(v20)= (const_vector unsigned char) |
|
|
|
|
AVV(0x04, 0x05, 0x06, 0x07, |
|
|
|
|
0x00, 0x01, 0x02, 0x03, |
|
|
|
|
0x0C, 0x0D, 0x0E, 0x0F, |
|
|
|
|
0x08, 0x09, 0x0A, 0x0B); |
|
|
|
|
register const_vector unsigned char perm3 REG_v(v21)= (const_vector unsigned char) |
|
|
|
|
AVV(0x08, 0x09, 0x0A, 0x0B, |
|
|
|
|
0x0C, 0x0D, 0x0E, 0x0F, |
|
|
|
|
0x00, 0x01, 0x02, 0x03, |
|
|
|
|
0x04, 0x05, 0x06, 0x07); |
|
|
|
|
register const_vector signed short vprod1 REG_v(v16)= |
|
|
|
|
(const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); |
|
|
|
|
register const_vector signed short vprod2 REG_v(v17)= |
|
|
|
|
(const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); |
|
|
|
|
register const_vector signed short vprod3 REG_v(v18)= |
|
|
|
|
(const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); |
|
|
|
|
register const_vector unsigned char perm1 REG_v(v19)= |
|
|
|
|
(const_vector unsigned char) |
|
|
|
|
AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, |
|
|
|
|
0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D); |
|
|
|
|
register const_vector unsigned char perm2 REG_v(v20)= |
|
|
|
|
(const_vector unsigned char) |
|
|
|
|
AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, |
|
|
|
|
0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B); |
|
|
|
|
register const_vector unsigned char perm3 REG_v(v21)= |
|
|
|
|
(const_vector unsigned char) |
|
|
|
|
AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
|
|
|
|
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); |
|
|
|
|
|
|
|
|
|
#define ONEITERBUTTERFLY(i, res1, res2) \ |
|
|
|
|
{ \
|
|
|
|
@ -1642,27 +1642,27 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, |
|
|
|
|
|
|
|
|
|
int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ |
|
|
|
|
POWERPC_PERF_DECLARE(altivec_hadamard8_diff16_num, 1); |
|
|
|
|
int score; |
|
|
|
|
int score; |
|
|
|
|
POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1); |
|
|
|
|
score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8); |
|
|
|
|
if (h==16) { |
|
|
|
|
dst += 8*stride; |
|
|
|
|
src += 8*stride; |
|
|
|
|
score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8); |
|
|
|
|
} |
|
|
|
|
score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8); |
|
|
|
|
if (h==16) { |
|
|
|
|
dst += 8*stride; |
|
|
|
|
src += 8*stride; |
|
|
|
|
score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8); |
|
|
|
|
} |
|
|
|
|
POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1); |
|
|
|
|
return score; |
|
|
|
|
return score; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int has_altivec(void) |
|
|
|
|
{ |
|
|
|
|
#ifdef __AMIGAOS4__ |
|
|
|
|
ULONG result = 0; |
|
|
|
|
extern struct ExecIFace *IExec; |
|
|
|
|
ULONG result = 0; |
|
|
|
|
extern struct ExecIFace *IExec; |
|
|
|
|
|
|
|
|
|
IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE); |
|
|
|
|
if (result == VECTORTYPE_ALTIVEC) return 1; |
|
|
|
|
return 0; |
|
|
|
|
IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE); |
|
|
|
|
if (result == VECTORTYPE_ALTIVEC) return 1; |
|
|
|
|
return 0; |
|
|
|
|
#else /* __AMIGAOS4__ */ |
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_DARWIN |
|
|
|
@ -1757,75 +1757,66 @@ POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1); |
|
|
|
|
} |
|
|
|
|
POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); |
|
|
|
|
#else /* ALTIVEC_USE_REFERENCE_C_CODE */ |
|
|
|
|
register int i; |
|
|
|
|
register vector unsigned char |
|
|
|
|
pixelsv1, pixelsv2, |
|
|
|
|
pixelsavg; |
|
|
|
|
register vector unsigned char |
|
|
|
|
blockv, temp1, temp2, blocktemp; |
|
|
|
|
register vector unsigned short |
|
|
|
|
pixelssum1, pixelssum2, temp3; |
|
|
|
|
register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); |
|
|
|
|
register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2); |
|
|
|
|
|
|
|
|
|
temp1 = vec_ld(0, pixels); |
|
|
|
|
temp2 = vec_ld(16, pixels); |
|
|
|
|
pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); |
|
|
|
|
if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) |
|
|
|
|
{ |
|
|
|
|
pixelsv2 = temp2; |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); |
|
|
|
|
} |
|
|
|
|
pixelsv1 = vec_mergeh(vczero, pixelsv1); |
|
|
|
|
pixelsv2 = vec_mergeh(vczero, pixelsv2); |
|
|
|
|
pixelssum1 = vec_add((vector unsigned short)pixelsv1, |
|
|
|
|
(vector unsigned short)pixelsv2); |
|
|
|
|
pixelssum1 = vec_add(pixelssum1, vctwo); |
|
|
|
|
register int i; |
|
|
|
|
register vector unsigned char pixelsv1, pixelsv2, pixelsavg; |
|
|
|
|
register vector unsigned char blockv, temp1, temp2, blocktemp; |
|
|
|
|
register vector unsigned short pixelssum1, pixelssum2, temp3; |
|
|
|
|
|
|
|
|
|
register const_vector unsigned char vczero = (const_vector unsigned char) |
|
|
|
|
vec_splat_u8(0); |
|
|
|
|
register const_vector unsigned short vctwo = (const_vector unsigned short) |
|
|
|
|
vec_splat_u16(2); |
|
|
|
|
|
|
|
|
|
temp1 = vec_ld(0, pixels); |
|
|
|
|
temp2 = vec_ld(16, pixels); |
|
|
|
|
pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); |
|
|
|
|
if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) { |
|
|
|
|
pixelsv2 = temp2; |
|
|
|
|
} else { |
|
|
|
|
pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); |
|
|
|
|
} |
|
|
|
|
pixelsv1 = vec_mergeh(vczero, pixelsv1); |
|
|
|
|
pixelsv2 = vec_mergeh(vczero, pixelsv2); |
|
|
|
|
pixelssum1 = vec_add((vector unsigned short)pixelsv1, |
|
|
|
|
(vector unsigned short)pixelsv2); |
|
|
|
|
pixelssum1 = vec_add(pixelssum1, vctwo); |
|
|
|
|
|
|
|
|
|
POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1); |
|
|
|
|
for (i = 0; i < h ; i++) { |
|
|
|
|
int rightside = ((unsigned long)block & 0x0000000F); |
|
|
|
|
blockv = vec_ld(0, block); |
|
|
|
|
|
|
|
|
|
temp1 = vec_ld(line_size, pixels); |
|
|
|
|
temp2 = vec_ld(line_size + 16, pixels); |
|
|
|
|
pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); |
|
|
|
|
if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) |
|
|
|
|
{ |
|
|
|
|
pixelsv2 = temp2; |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); |
|
|
|
|
} |
|
|
|
|
for (i = 0; i < h ; i++) { |
|
|
|
|
int rightside = ((unsigned long)block & 0x0000000F); |
|
|
|
|
blockv = vec_ld(0, block); |
|
|
|
|
|
|
|
|
|
temp1 = vec_ld(line_size, pixels); |
|
|
|
|
temp2 = vec_ld(line_size + 16, pixels); |
|
|
|
|
pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); |
|
|
|
|
if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) |
|
|
|
|
{ |
|
|
|
|
pixelsv2 = temp2; |
|
|
|
|
} else { |
|
|
|
|
pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
pixelsv1 = vec_mergeh(vczero, pixelsv1); |
|
|
|
|
pixelsv2 = vec_mergeh(vczero, pixelsv2); |
|
|
|
|
pixelssum2 = vec_add((vector unsigned short)pixelsv1, |
|
|
|
|
(vector unsigned short)pixelsv2); |
|
|
|
|
temp3 = vec_add(pixelssum1, pixelssum2); |
|
|
|
|
temp3 = vec_sra(temp3, vctwo); |
|
|
|
|
pixelssum1 = vec_add(pixelssum2, vctwo); |
|
|
|
|
pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); |
|
|
|
|
|
|
|
|
|
if (rightside) { |
|
|
|
|
blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); |
|
|
|
|
} else { |
|
|
|
|
blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
blockv = vec_avg(blocktemp, blockv); |
|
|
|
|
vec_st(blockv, 0, block); |
|
|
|
|
|
|
|
|
|
pixelsv1 = vec_mergeh(vczero, pixelsv1); |
|
|
|
|
pixelsv2 = vec_mergeh(vczero, pixelsv2); |
|
|
|
|
pixelssum2 = vec_add((vector unsigned short)pixelsv1, |
|
|
|
|
(vector unsigned short)pixelsv2); |
|
|
|
|
temp3 = vec_add(pixelssum1, pixelssum2); |
|
|
|
|
temp3 = vec_sra(temp3, vctwo); |
|
|
|
|
pixelssum1 = vec_add(pixelssum2, vctwo); |
|
|
|
|
pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); |
|
|
|
|
|
|
|
|
|
if (rightside) |
|
|
|
|
{ |
|
|
|
|
blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
blockv = vec_avg(blocktemp, blockv); |
|
|
|
|
vec_st(blockv, 0, block); |
|
|
|
|
|
|
|
|
|
block += line_size; |
|
|
|
|
pixels += line_size; |
|
|
|
|
} |
|
|
|
|
block += line_size; |
|
|
|
|
pixels += line_size; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); |
|
|
|
|
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ |
|
|
|
|