|
|
|
@ -18,16 +18,32 @@ |
|
|
|
|
|
|
|
|
|
#include "config.h" |
|
|
|
|
#include "libavutil/attributes.h" |
|
|
|
|
#include "libavutil/intreadwrite.h" |
|
|
|
|
#include "huffyuvencdsp.h" |
|
|
|
|
#include "mathops.h" |
|
|
|
|
|
|
|
|
|
#if HAVE_FAST_64BIT |
|
|
|
|
#define BITS 64 |
|
|
|
|
typedef uint64_t uint_native; |
|
|
|
|
#else |
|
|
|
|
#define BITS 32 |
|
|
|
|
typedef uint32_t uint_native; |
|
|
|
|
#endif |
|
|
|
|
#define RN AV_JOIN(AV_RN, BITS) |
|
|
|
|
#define RNA AV_JOIN(AV_JOIN(AV_RN, BITS), A) |
|
|
|
|
#define WNA AV_JOIN(AV_JOIN(AV_WN, BITS), A) |
|
|
|
|
|
|
|
|
|
// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
|
|
|
|
|
#define pb_7f (~(uint_native)0 / 255 * 0x7f) |
|
|
|
|
#define pb_80 (~(uint_native)0 / 255 * 0x80) |
|
|
|
|
|
|
|
|
|
// 0x00010001 or 0x0001000100010001 or whatever, depending on the cpu's native arithmetic size
|
|
|
|
|
#define pw_1 (ULONG_MAX / UINT16_MAX) |
|
|
|
|
#define pw_1 ((uint_native)-1 / UINT16_MAX) |
|
|
|
|
|
|
|
|
|
static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){ |
|
|
|
|
long i; |
|
|
|
|
#if !HAVE_FAST_UNALIGNED |
|
|
|
|
if((long)src2 & (sizeof(long)-1)){ |
|
|
|
|
if ((uintptr_t)src2 & (sizeof(uint_native) - 1)) { |
|
|
|
|
for(i=0; i+3<w; i+=4){ |
|
|
|
|
dst[i+0] = (src1[i+0]-src2[i+0]) & mask; |
|
|
|
|
dst[i+1] = (src1[i+1]-src2[i+1]) & mask; |
|
|
|
@ -37,13 +53,13 @@ static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *sr |
|
|
|
|
}else |
|
|
|
|
#endif |
|
|
|
|
{ |
|
|
|
|
unsigned long pw_lsb = (mask >> 1) * pw_1; |
|
|
|
|
unsigned long pw_msb = pw_lsb + pw_1; |
|
|
|
|
uint_native pw_lsb = (mask >> 1) * pw_1; |
|
|
|
|
uint_native pw_msb = pw_lsb + pw_1; |
|
|
|
|
|
|
|
|
|
for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) { |
|
|
|
|
long a = *(long*)(src1+i); |
|
|
|
|
long b = *(long*)(src2+i); |
|
|
|
|
*(long*)(dst+i) = ((a|pw_msb) - (b&pw_lsb)) ^ ((a^b^pw_msb)&pw_msb); |
|
|
|
|
for (i = 0; i <= w - (int)sizeof(uint_native)/2; i += sizeof(uint_native)/2) { |
|
|
|
|
uint_native a = RNA(src1 + i); |
|
|
|
|
uint_native b = RN (src2 + i); |
|
|
|
|
WNA(dst + i, ((a | pw_msb) - (b & pw_lsb)) ^ ((a^b^pw_msb) & pw_msb)); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
for (; i<w; i++) |
|
|
|
|