|
|
|
@ -151,39 +151,39 @@ static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#define OP(LOAD, STORE, INCR) \ |
|
|
|
|
do { \
|
|
|
|
|
STORE(LOAD(pixels), block); \
|
|
|
|
|
pixels += line_size; \
|
|
|
|
|
block += INCR; \
|
|
|
|
|
#define OP(LOAD, STORE) \ |
|
|
|
|
do { \
|
|
|
|
|
STORE(LOAD(pixels), block); \
|
|
|
|
|
pixels += line_size; \
|
|
|
|
|
block += line_size; \
|
|
|
|
|
} while (--h) |
|
|
|
|
|
|
|
|
|
#define OP_X2(LOAD, STORE, INCR) \ |
|
|
|
|
do { \
|
|
|
|
|
uint64_t pix1, pix2; \
|
|
|
|
|
\
|
|
|
|
|
pix1 = LOAD(pixels); \
|
|
|
|
|
pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
|
|
|
|
|
STORE(AVG2(pix1, pix2), block); \
|
|
|
|
|
pixels += line_size; \
|
|
|
|
|
block += INCR; \
|
|
|
|
|
#define OP_X2(LOAD, STORE) \ |
|
|
|
|
do { \
|
|
|
|
|
uint64_t pix1, pix2; \
|
|
|
|
|
\
|
|
|
|
|
pix1 = LOAD(pixels); \
|
|
|
|
|
pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
|
|
|
|
|
STORE(AVG2(pix1, pix2), block); \
|
|
|
|
|
pixels += line_size; \
|
|
|
|
|
block += line_size; \
|
|
|
|
|
} while (--h) |
|
|
|
|
|
|
|
|
|
#define OP_Y2(LOAD, STORE, INCR) \ |
|
|
|
|
do { \
|
|
|
|
|
uint64_t pix = LOAD(pixels); \
|
|
|
|
|
do { \
|
|
|
|
|
uint64_t next_pix; \
|
|
|
|
|
\
|
|
|
|
|
pixels += line_size; \
|
|
|
|
|
next_pix = LOAD(pixels); \
|
|
|
|
|
STORE(AVG2(pix, next_pix), block); \
|
|
|
|
|
block += INCR; \
|
|
|
|
|
pix = next_pix; \
|
|
|
|
|
} while (--h); \
|
|
|
|
|
#define OP_Y2(LOAD, STORE) \ |
|
|
|
|
do { \
|
|
|
|
|
uint64_t pix = LOAD(pixels); \
|
|
|
|
|
do { \
|
|
|
|
|
uint64_t next_pix; \
|
|
|
|
|
\
|
|
|
|
|
pixels += line_size; \
|
|
|
|
|
next_pix = LOAD(pixels); \
|
|
|
|
|
STORE(AVG2(pix, next_pix), block); \
|
|
|
|
|
block += line_size; \
|
|
|
|
|
pix = next_pix; \
|
|
|
|
|
} while (--h); \
|
|
|
|
|
} while (0) |
|
|
|
|
|
|
|
|
|
#define OP_XY2(LOAD, STORE, INCR) \ |
|
|
|
|
#define OP_XY2(LOAD, STORE) \ |
|
|
|
|
do { \
|
|
|
|
|
uint64_t pix1 = LOAD(pixels); \
|
|
|
|
|
uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
|
|
|
|
@ -208,40 +208,40 @@ static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) |
|
|
|
|
+ pix_h + npix_h; \
|
|
|
|
|
STORE(avg, block); \
|
|
|
|
|
\
|
|
|
|
|
block += INCR; \
|
|
|
|
|
block += line_size; \
|
|
|
|
|
pix_l = npix_l; \
|
|
|
|
|
pix_h = npix_h; \
|
|
|
|
|
} while (--h); \
|
|
|
|
|
} while (0) |
|
|
|
|
|
|
|
|
|
#define MAKE_OP(BTYPE, OPNAME, SUFF, OPKIND, STORE, INCR) \ |
|
|
|
|
static void OPNAME ## _pixels ## SUFF ## _axp \
|
|
|
|
|
(BTYPE *restrict block, const uint8_t *restrict pixels, \
|
|
|
|
|
int line_size, int h) \
|
|
|
|
|
{ \
|
|
|
|
|
if ((size_t) pixels & 0x7) { \
|
|
|
|
|
OPKIND(uldq, STORE, INCR); \
|
|
|
|
|
} else { \
|
|
|
|
|
OPKIND(ldq, STORE, INCR); \
|
|
|
|
|
} \
|
|
|
|
|
#define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \ |
|
|
|
|
static void OPNAME ## _pixels ## SUFF ## _axp \
|
|
|
|
|
(uint8_t *restrict block, const uint8_t *restrict pixels, \
|
|
|
|
|
int line_size, int h) \
|
|
|
|
|
{ \
|
|
|
|
|
if ((size_t) pixels & 0x7) { \
|
|
|
|
|
OPKIND(uldq, STORE); \
|
|
|
|
|
} else { \
|
|
|
|
|
OPKIND(ldq, STORE); \
|
|
|
|
|
} \
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#define PIXOP(BTYPE, OPNAME, STORE, INCR) \ |
|
|
|
|
MAKE_OP(BTYPE, OPNAME, , OP, STORE, INCR); \
|
|
|
|
|
MAKE_OP(BTYPE, OPNAME, _x2, OP_X2, STORE, INCR); \
|
|
|
|
|
MAKE_OP(BTYPE, OPNAME, _y2, OP_Y2, STORE, INCR); \
|
|
|
|
|
MAKE_OP(BTYPE, OPNAME, _xy2, OP_XY2, STORE, INCR); |
|
|
|
|
#define PIXOP(OPNAME, STORE) \ |
|
|
|
|
MAKE_OP(OPNAME, , OP, STORE) \
|
|
|
|
|
MAKE_OP(OPNAME, _x2, OP_X2, STORE) \
|
|
|
|
|
MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \
|
|
|
|
|
MAKE_OP(OPNAME, _xy2, OP_XY2, STORE) |
|
|
|
|
|
|
|
|
|
/* Rounding primitives. */ |
|
|
|
|
#define AVG2 avg2 |
|
|
|
|
#define AVG4 avg4 |
|
|
|
|
#define AVG4_ROUNDER BYTE_VEC(0x02) |
|
|
|
|
#define STORE(l, b) stq(l, b) |
|
|
|
|
PIXOP(uint8_t, put, STORE, line_size); |
|
|
|
|
PIXOP(put, STORE); |
|
|
|
|
|
|
|
|
|
#undef STORE |
|
|
|
|
#define STORE(l, b) stq(AVG2(l, ldq(b)), b); |
|
|
|
|
PIXOP(uint8_t, avg, STORE, line_size); |
|
|
|
|
PIXOP(avg, STORE); |
|
|
|
|
|
|
|
|
|
/* Not rounding primitives. */ |
|
|
|
|
#undef AVG2 |
|
|
|
@ -252,11 +252,11 @@ PIXOP(uint8_t, avg, STORE, line_size); |
|
|
|
|
#define AVG4 avg4_no_rnd |
|
|
|
|
#define AVG4_ROUNDER BYTE_VEC(0x01) |
|
|
|
|
#define STORE(l, b) stq(l, b) |
|
|
|
|
PIXOP(uint8_t, put_no_rnd, STORE, line_size); |
|
|
|
|
PIXOP(put_no_rnd, STORE); |
|
|
|
|
|
|
|
|
|
#undef STORE |
|
|
|
|
#define STORE(l, b) stq(AVG2(l, ldq(b)), b); |
|
|
|
|
PIXOP(uint8_t, avg_no_rnd, STORE, line_size); |
|
|
|
|
PIXOP(avg_no_rnd, STORE); |
|
|
|
|
|
|
|
|
|
void dsputil_init_alpha(void) |
|
|
|
|
{ |
|
|
|
|