|
|
|
@ -21,14 +21,12 @@ |
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
#define LD(adr) *(uint32_t*)(adr) |
|
|
|
|
|
|
|
|
|
#define PIXOP2(OPNAME, OP) \ |
|
|
|
|
/*static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
|
|
|
|
|
{\
|
|
|
|
|
do {\
|
|
|
|
|
OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \
|
|
|
|
|
OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \
|
|
|
|
|
OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
|
|
|
|
|
OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
|
|
|
|
|
src1+=src_stride1; \
|
|
|
|
|
src2+=src_stride2; \
|
|
|
|
|
dst+=dst_stride; \
|
|
|
|
@ -38,8 +36,8 @@ |
|
|
|
|
static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
|
|
|
|
|
{\
|
|
|
|
|
do {\
|
|
|
|
|
OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \
|
|
|
|
|
OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \
|
|
|
|
|
OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
|
|
|
|
|
OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
|
|
|
|
|
src1+=src_stride1; \
|
|
|
|
|
src2+=src_stride2; \
|
|
|
|
|
dst+=dst_stride; \
|
|
|
|
@ -49,7 +47,7 @@ static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, cons |
|
|
|
|
static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
|
|
|
|
|
{\
|
|
|
|
|
do {\
|
|
|
|
|
OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \
|
|
|
|
|
OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
|
|
|
|
|
src1+=src_stride1; \
|
|
|
|
|
src2+=src_stride2; \
|
|
|
|
|
dst+=dst_stride; \
|
|
|
|
@ -59,10 +57,10 @@ static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, cons |
|
|
|
|
static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
|
|
|
|
|
{\
|
|
|
|
|
do {\
|
|
|
|
|
OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \
|
|
|
|
|
OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \
|
|
|
|
|
OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \
|
|
|
|
|
OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \
|
|
|
|
|
OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
|
|
|
|
|
OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
|
|
|
|
|
OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),AV_RN32(src2+8)) ); \
|
|
|
|
|
OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),AV_RN32(src2+12)) ); \
|
|
|
|
|
src1+=src_stride1; \
|
|
|
|
|
src2+=src_stride2; \
|
|
|
|
|
dst+=dst_stride; \
|
|
|
|
@ -72,10 +70,10 @@ static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *sr |
|
|
|
|
static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
|
|
|
|
|
{\
|
|
|
|
|
do {\
|
|
|
|
|
OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \
|
|
|
|
|
OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \
|
|
|
|
|
OP(LP(dst+8),rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \
|
|
|
|
|
OP(LP(dst+12),rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \
|
|
|
|
|
OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
|
|
|
|
|
OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
|
|
|
|
|
OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),AV_RN32(src2+8)) ); \
|
|
|
|
|
OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),AV_RN32(src2+12)) ); \
|
|
|
|
|
src1+=src_stride1; \
|
|
|
|
|
src2+=src_stride2; \
|
|
|
|
|
dst+=dst_stride; \
|
|
|
|
@ -95,7 +93,7 @@ static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *sr |
|
|
|
|
static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
|
|
|
|
|
{\
|
|
|
|
|
do {\
|
|
|
|
|
OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \
|
|
|
|
|
OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
|
|
|
|
|
src1+=src_stride1; \
|
|
|
|
|
src2+=src_stride2; \
|
|
|
|
|
dst+=dst_stride; \
|
|
|
|
@ -105,10 +103,10 @@ static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *s |
|
|
|
|
static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
|
|
|
|
|
{\
|
|
|
|
|
do {\
|
|
|
|
|
OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \
|
|
|
|
|
OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
|
|
|
|
|
OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LP(src2+8)) ); \
|
|
|
|
|
OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LP(src2+12)) ); \
|
|
|
|
|
OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
|
|
|
|
|
OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
|
|
|
|
|
OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),LP(src2+8)) ); \
|
|
|
|
|
OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),LP(src2+12)) ); \
|
|
|
|
|
src1+=src_stride1; \
|
|
|
|
|
src2+=src_stride2; \
|
|
|
|
|
dst+=dst_stride; \
|
|
|
|
@ -118,10 +116,10 @@ static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const ui |
|
|
|
|
static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
|
|
|
|
|
{\
|
|
|
|
|
do {\
|
|
|
|
|
OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \
|
|
|
|
|
OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
|
|
|
|
|
OP(LP(dst+8),rnd_avg32(LD32(src1+8),LP(src2+8)) ); \
|
|
|
|
|
OP(LP(dst+12),rnd_avg32(LD32(src1+12),LP(src2+12)) ); \
|
|
|
|
|
OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
|
|
|
|
|
OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
|
|
|
|
|
OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),LP(src2+8)) ); \
|
|
|
|
|
OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),LP(src2+12)) ); \
|
|
|
|
|
src1+=src_stride1; \
|
|
|
|
|
src2+=src_stride2; \
|
|
|
|
|
dst+=dst_stride; \
|
|
|
|
@ -131,8 +129,8 @@ static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t * |
|
|
|
|
static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
|
|
|
|
|
{\
|
|
|
|
|
do { /* onlye src2 aligned */\
|
|
|
|
|
OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \
|
|
|
|
|
OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
|
|
|
|
|
OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
|
|
|
|
|
OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
|
|
|
|
|
src1+=src_stride1; \
|
|
|
|
|
src2+=src_stride2; \
|
|
|
|
|
dst+=dst_stride; \
|
|
|
|
@ -142,8 +140,8 @@ static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uin |
|
|
|
|
static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
|
|
|
|
|
{\
|
|
|
|
|
do {\
|
|
|
|
|
OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \
|
|
|
|
|
OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
|
|
|
|
|
OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
|
|
|
|
|
OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
|
|
|
|
|
src1+=src_stride1; \
|
|
|
|
|
src2+=src_stride2; \
|
|
|
|
|
dst+=dst_stride; \
|
|
|
|
@ -247,10 +245,10 @@ static inline void OPNAME ## _no_rnd_pixels8_l4_aligned(uint8_t *dst, const uint |
|
|
|
|
static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
|
|
|
|
|
do { \
|
|
|
|
|
uint32_t a0,a1,a2,a3; /* src1 only not aligned */\
|
|
|
|
|
UNPACK(a0,a1,LD32(src1),LP(src2)); \
|
|
|
|
|
UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
|
|
|
|
|
UNPACK(a2,a3,LP(src3),LP(src4)); \
|
|
|
|
|
OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
|
|
|
|
|
UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \
|
|
|
|
|
UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
|
|
|
|
|
UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
|
|
|
|
|
OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \
|
|
|
|
|
src1+=src_stride1;\
|
|
|
|
@ -264,10 +262,10 @@ static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *s |
|
|
|
|
static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
|
|
|
|
|
do { \
|
|
|
|
|
uint32_t a0,a1,a2,a3; \
|
|
|
|
|
UNPACK(a0,a1,LD32(src1),LP(src2)); \
|
|
|
|
|
UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
|
|
|
|
|
UNPACK(a2,a3,LP(src3),LP(src4)); \
|
|
|
|
|
OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
|
|
|
|
|
UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \
|
|
|
|
|
UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
|
|
|
|
|
UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
|
|
|
|
|
OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
|
|
|
|
|
src1+=src_stride1;\
|
|
|
|
@ -327,16 +325,16 @@ static inline void OPNAME ## _no_rnd_pixels16_l4_aligned(uint8_t *dst, const uin |
|
|
|
|
static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
|
|
|
|
|
do { /* src1 is unaligned */\
|
|
|
|
|
uint32_t a0,a1,a2,a3; \
|
|
|
|
|
UNPACK(a0,a1,LD32(src1),LP(src2)); \
|
|
|
|
|
UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
|
|
|
|
|
UNPACK(a2,a3,LP(src3),LP(src4)); \
|
|
|
|
|
OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
|
|
|
|
|
UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \
|
|
|
|
|
UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
|
|
|
|
|
UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
|
|
|
|
|
OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
|
|
|
|
|
UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \
|
|
|
|
|
UNPACK(a0,a1,AV_RN32(src1+8),LP(src2+8)); \
|
|
|
|
|
UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
|
|
|
|
|
OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
|
|
|
|
|
UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \
|
|
|
|
|
UNPACK(a0,a1,AV_RN32(src1+12),LP(src2+12)); \
|
|
|
|
|
UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
|
|
|
|
|
OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
|
|
|
|
|
src1+=src_stride1;\
|
|
|
|
@ -350,16 +348,16 @@ static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t * |
|
|
|
|
static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
|
|
|
|
|
do { \
|
|
|
|
|
uint32_t a0,a1,a2,a3; \
|
|
|
|
|
UNPACK(a0,a1,LD32(src1),LP(src2)); \
|
|
|
|
|
UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
|
|
|
|
|
UNPACK(a2,a3,LP(src3),LP(src4)); \
|
|
|
|
|
OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
|
|
|
|
|
UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \
|
|
|
|
|
UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
|
|
|
|
|
UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
|
|
|
|
|
OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
|
|
|
|
|
UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \
|
|
|
|
|
UNPACK(a0,a1,AV_RN32(src1+8),LP(src2+8)); \
|
|
|
|
|
UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
|
|
|
|
|
OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
|
|
|
|
|
UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \
|
|
|
|
|
UNPACK(a0,a1,AV_RN32(src1+12),LP(src2+12)); \
|
|
|
|
|
UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
|
|
|
|
|
OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
|
|
|
|
|
src1+=src_stride1;\
|
|
|
|
|