|
|
|
@ -230,150 +230,6 @@ static void FUNCC(add_pixels4)(uint8_t *restrict _pixels, DCTELEM *_block, int l |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#if 0 |
|
|
|
|
|
|
|
|
|
#define PIXOP2(OPNAME, OP) \ |
|
|
|
|
static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
|
|
|
{\
|
|
|
|
|
int i;\
|
|
|
|
|
for(i=0; i<h; i++){\
|
|
|
|
|
OP(*((uint64_t*)block), AV_RN64(pixels));\
|
|
|
|
|
pixels+=line_size;\
|
|
|
|
|
block +=line_size;\
|
|
|
|
|
}\
|
|
|
|
|
}\
|
|
|
|
|
\
|
|
|
|
|
static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
|
|
|
{\
|
|
|
|
|
int i;\
|
|
|
|
|
for(i=0; i<h; i++){\
|
|
|
|
|
const uint64_t a= AV_RN64(pixels );\
|
|
|
|
|
const uint64_t b= AV_RN64(pixels+1);\
|
|
|
|
|
OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
|
|
|
|
pixels+=line_size;\
|
|
|
|
|
block +=line_size;\
|
|
|
|
|
}\
|
|
|
|
|
}\
|
|
|
|
|
\
|
|
|
|
|
static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
|
|
|
{\
|
|
|
|
|
int i;\
|
|
|
|
|
for(i=0; i<h; i++){\
|
|
|
|
|
const uint64_t a= AV_RN64(pixels );\
|
|
|
|
|
const uint64_t b= AV_RN64(pixels+1);\
|
|
|
|
|
OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
|
|
|
|
pixels+=line_size;\
|
|
|
|
|
block +=line_size;\
|
|
|
|
|
}\
|
|
|
|
|
}\
|
|
|
|
|
\
|
|
|
|
|
static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
|
|
|
{\
|
|
|
|
|
int i;\
|
|
|
|
|
for(i=0; i<h; i++){\
|
|
|
|
|
const uint64_t a= AV_RN64(pixels );\
|
|
|
|
|
const uint64_t b= AV_RN64(pixels+line_size);\
|
|
|
|
|
OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
|
|
|
|
pixels+=line_size;\
|
|
|
|
|
block +=line_size;\
|
|
|
|
|
}\
|
|
|
|
|
}\
|
|
|
|
|
\
|
|
|
|
|
static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
|
|
|
{\
|
|
|
|
|
int i;\
|
|
|
|
|
for(i=0; i<h; i++){\
|
|
|
|
|
const uint64_t a= AV_RN64(pixels );\
|
|
|
|
|
const uint64_t b= AV_RN64(pixels+line_size);\
|
|
|
|
|
OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
|
|
|
|
pixels+=line_size;\
|
|
|
|
|
block +=line_size;\
|
|
|
|
|
}\
|
|
|
|
|
}\
|
|
|
|
|
\
|
|
|
|
|
static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
|
|
|
{\
|
|
|
|
|
int i;\
|
|
|
|
|
const uint64_t a= AV_RN64(pixels );\
|
|
|
|
|
const uint64_t b= AV_RN64(pixels+1);\
|
|
|
|
|
uint64_t l0= (a&0x0303030303030303ULL)\
|
|
|
|
|
+ (b&0x0303030303030303ULL)\
|
|
|
|
|
+ 0x0202020202020202ULL;\
|
|
|
|
|
uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
|
|
|
|
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
|
|
|
|
uint64_t l1,h1;\
|
|
|
|
|
\
|
|
|
|
|
pixels+=line_size;\
|
|
|
|
|
for(i=0; i<h; i+=2){\
|
|
|
|
|
uint64_t a= AV_RN64(pixels );\
|
|
|
|
|
uint64_t b= AV_RN64(pixels+1);\
|
|
|
|
|
l1= (a&0x0303030303030303ULL)\
|
|
|
|
|
+ (b&0x0303030303030303ULL);\
|
|
|
|
|
h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
|
|
|
|
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
|
|
|
|
OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
|
|
|
|
pixels+=line_size;\
|
|
|
|
|
block +=line_size;\
|
|
|
|
|
a= AV_RN64(pixels );\
|
|
|
|
|
b= AV_RN64(pixels+1);\
|
|
|
|
|
l0= (a&0x0303030303030303ULL)\
|
|
|
|
|
+ (b&0x0303030303030303ULL)\
|
|
|
|
|
+ 0x0202020202020202ULL;\
|
|
|
|
|
h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
|
|
|
|
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
|
|
|
|
OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
|
|
|
|
pixels+=line_size;\
|
|
|
|
|
block +=line_size;\
|
|
|
|
|
}\
|
|
|
|
|
}\
|
|
|
|
|
\
|
|
|
|
|
static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
|
|
|
{\
|
|
|
|
|
int i;\
|
|
|
|
|
const uint64_t a= AV_RN64(pixels );\
|
|
|
|
|
const uint64_t b= AV_RN64(pixels+1);\
|
|
|
|
|
uint64_t l0= (a&0x0303030303030303ULL)\
|
|
|
|
|
+ (b&0x0303030303030303ULL)\
|
|
|
|
|
+ 0x0101010101010101ULL;\
|
|
|
|
|
uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
|
|
|
|
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
|
|
|
|
uint64_t l1,h1;\
|
|
|
|
|
\
|
|
|
|
|
pixels+=line_size;\
|
|
|
|
|
for(i=0; i<h; i+=2){\
|
|
|
|
|
uint64_t a= AV_RN64(pixels );\
|
|
|
|
|
uint64_t b= AV_RN64(pixels+1);\
|
|
|
|
|
l1= (a&0x0303030303030303ULL)\
|
|
|
|
|
+ (b&0x0303030303030303ULL);\
|
|
|
|
|
h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
|
|
|
|
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
|
|
|
|
OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
|
|
|
|
pixels+=line_size;\
|
|
|
|
|
block +=line_size;\
|
|
|
|
|
a= AV_RN64(pixels );\
|
|
|
|
|
b= AV_RN64(pixels+1);\
|
|
|
|
|
l0= (a&0x0303030303030303ULL)\
|
|
|
|
|
+ (b&0x0303030303030303ULL)\
|
|
|
|
|
+ 0x0101010101010101ULL;\
|
|
|
|
|
h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
|
|
|
|
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
|
|
|
|
OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
|
|
|
|
pixels+=line_size;\
|
|
|
|
|
block +=line_size;\
|
|
|
|
|
}\
|
|
|
|
|
}\
|
|
|
|
|
\
|
|
|
|
|
CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8*sizeof(pixel))\
|
|
|
|
|
CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8*sizeof(pixel))\
|
|
|
|
|
CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8*sizeof(pixel))\
|
|
|
|
|
CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8*sizeof(pixel))\
|
|
|
|
|
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8*sizeof(pixel))\
|
|
|
|
|
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8*sizeof(pixel))\
|
|
|
|
|
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8*sizeof(pixel)) |
|
|
|
|
|
|
|
|
|
#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) |
|
|
|
|
#else // 64 bit variant
|
|
|
|
|
|
|
|
|
|
#define PIXOP2(OPNAME, OP) \ |
|
|
|
|
static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
|
|
|
int i;\
|
|
|
|
@ -749,7 +605,6 @@ CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_y2) , FUNCC(OPNAME ## _no_rnd_pi |
|
|
|
|
CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_xy2), FUNCC(OPNAME ## _no_rnd_pixels8_xy2), 8*sizeof(pixel))\
|
|
|
|
|
|
|
|
|
|
#define op_avg(a, b) a = rnd_avg_pixel4(a, b) |
|
|
|
|
#endif |
|
|
|
|
#define op_put(a, b) a = b |
|
|
|
|
|
|
|
|
|
PIXOP2(avg, op_avg) |
|
|
|
|