postproc: altivec: fix trivial cases of mixed declarations and code

This moves declarations without initialisers or with constant
initialisers to the start of a block, and adds do {} while(0)
around some macros, thus allowing declarations within them.

Signed-off-by: Mans Rullgard <mans@mansr.com>
pull/2/head
Mans Rullgard 13 years ago
parent 37c0dc626d
commit b1bcddfb6d
  1. 123
      libpostproc/postprocess_altivec_template.c
  2. 3
      libpostproc/postprocess_template.c

@ -530,6 +530,39 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext
} }
static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
const vector signed int vsint32_8 = vec_splat_s32(8);
const vector unsigned int vuint32_4 = vec_splat_u32(4);
const vector signed char neg1 = vec_splat_s8(-1);
const vector unsigned char permA1 = (vector unsigned char)
{0x00, 0x01, 0x02, 0x10, 0x11, 0x12, 0x1F, 0x1F,
0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F};
const vector unsigned char permA2 = (vector unsigned char)
{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x11,
0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F};
const vector unsigned char permA1inc = (vector unsigned char)
{0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
const vector unsigned char permA2inc = (vector unsigned char)
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
const vector unsigned char magic = (vector unsigned char)
{0x01, 0x02, 0x01, 0x02, 0x04, 0x02, 0x01, 0x02,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
const vector unsigned char extractPerm = (vector unsigned char)
{0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01,
0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01};
const vector unsigned char extractPermInc = (vector unsigned char)
{0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01};
const vector unsigned char identity = vec_lvsl(0,(unsigned char *)0);
const vector unsigned char tenRight = (vector unsigned char)
{0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
const vector unsigned char eightLeft = (vector unsigned char)
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08};
/* /*
this code makes no assumption on src or stride. this code makes no assumption on src or stride.
One could remove the recomputation of the perm One could remove the recomputation of the perm
@ -565,6 +598,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
#undef LOAD_LINE #undef LOAD_LINE
vector unsigned char v_avg; vector unsigned char v_avg;
DECLARE_ALIGNED(16, signed int, S)[8];
{ {
const vector unsigned char trunc_perm = (vector unsigned char) const vector unsigned char trunc_perm = (vector unsigned char)
{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
@ -603,7 +637,6 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
v_avg = vec_avg(v_min, v_max); v_avg = vec_avg(v_min, v_max);
} }
DECLARE_ALIGNED(16, signed int, S)[8];
{ {
const vector unsigned short mask1 = (vector unsigned short) const vector unsigned short mask1 = (vector unsigned short)
{0x0001, 0x0002, 0x0004, 0x0008, {0x0001, 0x0002, 0x0004, 0x0008,
@ -615,8 +648,12 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
const vector unsigned int vuint32_16 = vec_sl(vec_splat_u32(1), vec_splat_u32(4)); const vector unsigned int vuint32_16 = vec_sl(vec_splat_u32(1), vec_splat_u32(4));
const vector unsigned int vuint32_1 = vec_splat_u32(1); const vector unsigned int vuint32_1 = vec_splat_u32(1);
vector signed int sumA2;
vector signed int sumB2;
vector signed int sum0, sum1, sum2, sum3, sum4;
vector signed int sum5, sum6, sum7, sum8, sum9;
#define COMPARE(i) \ #define COMPARE(i) \
vector signed int sum##i; \
do { \ do { \
const vector unsigned char cmp##i = \ const vector unsigned char cmp##i = \
(vector unsigned char)vec_cmpgt(src##i, v_avg); \ (vector unsigned char)vec_cmpgt(src##i, v_avg); \
@ -644,8 +681,6 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
COMPARE(9); COMPARE(9);
#undef COMPARE #undef COMPARE
vector signed int sumA2;
vector signed int sumB2;
{ {
const vector signed int sump02 = vec_mergel(sum0, sum2); const vector signed int sump02 = vec_mergel(sum0, sum2);
const vector signed int sump13 = vec_mergel(sum1, sum3); const vector signed int sump13 = vec_mergel(sum1, sum3);
@ -703,39 +738,6 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
tQP2[0]= c->QP/2 + 1; tQP2[0]= c->QP/2 + 1;
vector signed int vQP2 = vec_ld(0, tQP2); vector signed int vQP2 = vec_ld(0, tQP2);
vQP2 = vec_splat(vQP2, 0); vQP2 = vec_splat(vQP2, 0);
const vector signed int vsint32_8 = vec_splat_s32(8);
const vector unsigned int vuint32_4 = vec_splat_u32(4);
const vector unsigned char permA1 = (vector unsigned char)
{0x00, 0x01, 0x02, 0x10, 0x11, 0x12, 0x1F, 0x1F,
0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F};
const vector unsigned char permA2 = (vector unsigned char)
{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x11,
0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F};
const vector unsigned char permA1inc = (vector unsigned char)
{0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
const vector unsigned char permA2inc = (vector unsigned char)
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
const vector unsigned char magic = (vector unsigned char)
{0x01, 0x02, 0x01, 0x02, 0x04, 0x02, 0x01, 0x02,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
const vector unsigned char extractPerm = (vector unsigned char)
{0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01,
0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01};
const vector unsigned char extractPermInc = (vector unsigned char)
{0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01};
const vector unsigned char identity = vec_lvsl(0,(unsigned char *)0);
const vector unsigned char tenRight = (vector unsigned char)
{0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
const vector unsigned char eightLeft = (vector unsigned char)
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08};
#define F_INIT(i) \ #define F_INIT(i) \
vector unsigned char tenRightM##i = tenRight; \ vector unsigned char tenRightM##i = tenRight; \
vector unsigned char permA1M##i = permA1; \ vector unsigned char permA1M##i = permA1; \
@ -777,7 +779,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
tenRightM##i = vec_sro(tenRightM##i, eightLeft); \ tenRightM##i = vec_sro(tenRightM##i, eightLeft); \
extractPermM##i = vec_add(extractPermM##i, extractPermInc) extractPermM##i = vec_add(extractPermM##i, extractPermInc)
#define ITER(i, j, k) \ #define ITER(i, j, k) do { \
F_INIT(i); \ F_INIT(i); \
F2(i, j, k, 0); \ F2(i, j, k, 0); \
F2(i, j, k, 1); \ F2(i, j, k, 1); \
@ -786,7 +788,8 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
F2(i, j, k, 4); \ F2(i, j, k, 4); \
F2(i, j, k, 5); \ F2(i, j, k, 5); \
F2(i, j, k, 6); \ F2(i, j, k, 6); \
F2(i, j, k, 7) F2(i, j, k, 7); \
} while (0)
ITER(0, 1, 2); ITER(0, 1, 2);
ITER(1, 2, 3); ITER(1, 2, 3);
@ -797,9 +800,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
ITER(6, 7, 8); ITER(6, 7, 8);
ITER(7, 8, 9); ITER(7, 8, 9);
const vector signed char neg1 = vec_splat_s8(-1); #define STORE_LINE(i) do { \
#define STORE_LINE(i) \
const vector unsigned char permST##i = \ const vector unsigned char permST##i = \
vec_lvsr(i * stride, srcCopy); \ vec_lvsr(i * stride, srcCopy); \
const vector unsigned char maskST##i = \ const vector unsigned char maskST##i = \
@ -809,7 +810,8 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
sA##i= vec_sel(sA##i, src##i, maskST##i); \ sA##i= vec_sel(sA##i, src##i, maskST##i); \
sB##i= vec_sel(src##i, sB##i, maskST##i); \ sB##i= vec_sel(src##i, sB##i, maskST##i); \
vec_st(sA##i, i * stride, srcCopy); \ vec_st(sA##i, i * stride, srcCopy); \
vec_st(sB##i, i * stride + 16, srcCopy) vec_st(sB##i, i * stride + 16, srcCopy); \
} while (0)
STORE_LINE(1); STORE_LINE(1);
STORE_LINE(2); STORE_LINE(2);
@ -832,6 +834,10 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise) uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)
{ {
const vector signed char neg1 = vec_splat_s8(-1);
const vector unsigned char permHH = (const vector unsigned char){0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F};
const vector signed int zero = vec_splat_s32(0); const vector signed int zero = vec_splat_s32(0);
const vector signed short vsint16_1 = vec_splat_s16(1); const vector signed short vsint16_1 = vec_splat_s16(1);
vector signed int v_dp = zero; vector signed int v_dp = zero;
@ -872,11 +878,12 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
LOAD_LINE(tempBlurred, 7); LOAD_LINE(tempBlurred, 7);
#undef LOAD_LINE #undef LOAD_LINE
#define ACCUMULATE_DIFFS(i) \ #define ACCUMULATE_DIFFS(i) do { \
vector signed short v_d##i = vec_sub(v_tempBlurredAss##i, \ vector signed short v_d##i = vec_sub(v_tempBlurredAss##i, \
v_srcAss##i); \ v_srcAss##i); \
v_dp = vec_msums(v_d##i, v_d##i, v_dp); \ v_dp = vec_msums(v_d##i, v_d##i, v_dp); \
v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp) v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp); \
} while (0)
ACCUMULATE_DIFFS(0); ACCUMULATE_DIFFS(0);
ACCUMULATE_DIFFS(1); ACCUMULATE_DIFFS(1);
@ -938,13 +945,14 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
const vector signed short vsint16_4 = vec_splat_s16(4); const vector signed short vsint16_4 = vec_splat_s16(4);
const vector unsigned short vuint16_3 = vec_splat_u16(3); const vector unsigned short vuint16_3 = vec_splat_u16(3);
#define OP(i) \ #define OP(i) do { \
const vector signed short v_temp##i = \ const vector signed short v_temp##i = \
vec_mladd(v_tempBlurredAss##i, \ vec_mladd(v_tempBlurredAss##i, \
vsint16_7, v_srcAss##i); \ vsint16_7, v_srcAss##i); \
const vector signed short v_temp2##i = \ const vector signed short v_temp2##i = \
vec_add(v_temp##i, vsint16_4); \ vec_add(v_temp##i, vsint16_4); \
v_tempBlurredAss##i = vec_sr(v_temp2##i, vuint16_3) v_tempBlurredAss##i = vec_sr(v_temp2##i, vuint16_3);\
} while (0)
OP(0); OP(0);
OP(1); OP(1);
@ -959,13 +967,14 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
const vector signed short vsint16_3 = vec_splat_s16(3); const vector signed short vsint16_3 = vec_splat_s16(3);
const vector signed short vsint16_2 = vec_splat_s16(2); const vector signed short vsint16_2 = vec_splat_s16(2);
#define OP(i) \ #define OP(i) do { \
const vector signed short v_temp##i = \ const vector signed short v_temp##i = \
vec_mladd(v_tempBlurredAss##i, \ vec_mladd(v_tempBlurredAss##i, \
vsint16_3, v_srcAss##i); \ vsint16_3, v_srcAss##i); \
const vector signed short v_temp2##i = \ const vector signed short v_temp2##i = \
vec_add(v_temp##i, vsint16_2); \ vec_add(v_temp##i, vsint16_2); \
v_tempBlurredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2) v_tempBlurredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2); \
} while (0)
OP(0); OP(0);
OP(1); OP(1);
@ -979,11 +988,7 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
} }
} }
const vector signed char neg1 = vec_splat_s8(-1); #define PACK_AND_STORE(src, i) do { \
const vector unsigned char permHH = (const vector unsigned char){0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F};
#define PACK_AND_STORE(src, i) \
const vector unsigned char perms##src##i = \ const vector unsigned char perms##src##i = \
vec_lvsr(i * stride, src); \ vec_lvsr(i * stride, src); \
const vector unsigned char vf##src##i = \ const vector unsigned char vf##src##i = \
@ -999,7 +1004,8 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
const vector unsigned char svB##src##i = \ const vector unsigned char svB##src##i = \
vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i); \ vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i); \
vec_st(svA##src##i, i * stride, src); \ vec_st(svA##src##i, i * stride, src); \
vec_st(svB##src##i, i * stride + 16, src) vec_st(svB##src##i, i * stride + 16, src); \
} while (0)
PACK_AND_STORE(src, 0); PACK_AND_STORE(src, 0);
PACK_AND_STORE(src, 1); PACK_AND_STORE(src, 1);
@ -1127,6 +1133,7 @@ static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst,
static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) { static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) {
const vector unsigned char zero = vec_splat_u8(0); const vector unsigned char zero = vec_splat_u8(0);
const vector signed char neg1 = vec_splat_s8(-1);
#define LOAD_DOUBLE_LINE(i, j) \ #define LOAD_DOUBLE_LINE(i, j) \
vector unsigned char src##i = vec_ld(i * 16, src); \ vector unsigned char src##i = vec_ld(i * 16, src); \
@ -1187,8 +1194,7 @@ static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* ds
temp7 = vec_mergel(tempD, tempL); temp7 = vec_mergel(tempD, tempL);
const vector signed char neg1 = vec_splat_s8(-1); #define STORE_DOUBLE_LINE(i, j) do { \
#define STORE_DOUBLE_LINE(i, j) \
vector unsigned char dstA##i = vec_ld(i * stride, dst); \ vector unsigned char dstA##i = vec_ld(i * stride, dst); \
vector unsigned char dstB##i = vec_ld(i * stride + 16, dst); \ vector unsigned char dstB##i = vec_ld(i * stride + 16, dst); \
vector unsigned char dstA##j = vec_ld(j * stride, dst); \ vector unsigned char dstA##j = vec_ld(j * stride, dst); \
@ -1206,7 +1212,8 @@ static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* ds
vec_st(dstAF##i, i * stride, dst); \ vec_st(dstAF##i, i * stride, dst); \
vec_st(dstBF##i, i * stride + 16, dst); \ vec_st(dstBF##i, i * stride + 16, dst); \
vec_st(dstAF##j, j * stride, dst); \ vec_st(dstAF##j, j * stride, dst); \
vec_st(dstBF##j, j * stride + 16, dst) vec_st(dstBF##j, j * stride + 16, dst); \
} while (0)
STORE_DOUBLE_LINE(0,1); STORE_DOUBLE_LINE(0,1);
STORE_DOUBLE_LINE(2,3); STORE_DOUBLE_LINE(2,3);

@ -3518,9 +3518,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
else if(mode & H_DEBLOCK){ else if(mode & H_DEBLOCK){
#if HAVE_ALTIVEC #if HAVE_ALTIVEC
DECLARE_ALIGNED(16, unsigned char, tempBlock)[272]; DECLARE_ALIGNED(16, unsigned char, tempBlock)[272];
int t;
transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride); transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride);
const int t=vertClassify_altivec(tempBlock-48, 16, &c); t = vertClassify_altivec(tempBlock-48, 16, &c);
if(t==1) { if(t==1) {
doVertLowPass_altivec(tempBlock-48, 16, &c); doVertLowPass_altivec(tempBlock-48, 16, &c);
transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride); transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);

Loading…
Cancel
Save