|
|
|
@ -30,32 +30,32 @@ |
|
|
|
|
#undef PMINUB |
|
|
|
|
#undef PMAXUB |
|
|
|
|
|
|
|
|
|
#if HAVE_MMXEXT |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
#define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t" |
|
|
|
|
#elif HAVE_AMD3DNOW |
|
|
|
|
#elif HAVE_AMD3DNOW_INLINE |
|
|
|
|
#define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" |
|
|
|
|
#endif |
|
|
|
|
#define PAVGB(a,b) REAL_PAVGB(a,b) |
|
|
|
|
|
|
|
|
|
#if HAVE_MMXEXT |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
#define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t" |
|
|
|
|
#elif HAVE_MMX |
|
|
|
|
#elif HAVE_MMX_INLINE |
|
|
|
|
#define PMINUB(b,a,t) \ |
|
|
|
|
"movq " #a ", " #t " \n\t"\
|
|
|
|
|
"psubusb " #b ", " #t " \n\t"\
|
|
|
|
|
"psubb " #t ", " #a " \n\t" |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#if HAVE_MMXEXT |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
#define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t" |
|
|
|
|
#elif HAVE_MMX |
|
|
|
|
#elif HAVE_MMX_INLINE |
|
|
|
|
#define PMAXUB(a,b) \ |
|
|
|
|
"psubusb " #a ", " #b " \n\t"\
|
|
|
|
|
"paddb " #a ", " #b " \n\t" |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
//FIXME? |255-0| = 1 (should not be a problem ...)
|
|
|
|
|
#if HAVE_MMX |
|
|
|
|
#if HAVE_MMX_INLINE |
|
|
|
|
/**
|
|
|
|
|
* Check if the middle 8x8 Block in the given 8x16 block is flat |
|
|
|
|
*/ |
|
|
|
@ -135,7 +135,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ |
|
|
|
|
"psubusb %%mm3, %%mm4 \n\t" |
|
|
|
|
|
|
|
|
|
" \n\t" |
|
|
|
|
#if HAVE_MMXEXT |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
"pxor %%mm7, %%mm7 \n\t" |
|
|
|
|
"psadbw %%mm7, %%mm0 \n\t" |
|
|
|
|
#else |
|
|
|
@ -169,7 +169,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ |
|
|
|
|
return 2; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
#endif //HAVE_MMX
|
|
|
|
|
#endif //HAVE_MMX_INLINE
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) |
|
|
|
@ -178,7 +178,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ |
|
|
|
|
#if !HAVE_ALTIVEC |
|
|
|
|
static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) |
|
|
|
|
{ |
|
|
|
|
#if HAVE_MMXEXT || HAVE_AMD3DNOW |
|
|
|
|
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE |
|
|
|
|
src+= stride*3; |
|
|
|
|
__asm__ volatile( //"movv %0 %1 %2\n\t"
|
|
|
|
|
"movq %2, %%mm0 \n\t" // QP,..., QP
|
|
|
|
@ -305,7 +305,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) |
|
|
|
|
: "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) |
|
|
|
|
: "%"REG_a, "%"REG_c |
|
|
|
|
); |
|
|
|
|
#else //HAVE_MMXEXT || HAVE_AMD3DNOW
|
|
|
|
|
#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
|
|
|
|
const int l1= stride; |
|
|
|
|
const int l2= stride + l1; |
|
|
|
|
const int l3= stride + l2; |
|
|
|
@ -344,7 +344,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) |
|
|
|
|
|
|
|
|
|
src++; |
|
|
|
|
} |
|
|
|
|
#endif //HAVE_MMXEXT || HAVE_AMD3DNOW
|
|
|
|
|
#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
|
|
|
|
} |
|
|
|
|
#endif //HAVE_ALTIVEC
|
|
|
|
|
|
|
|
|
@ -357,7 +357,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) |
|
|
|
|
*/ |
|
|
|
|
static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) |
|
|
|
|
{ |
|
|
|
|
#if HAVE_MMXEXT || HAVE_AMD3DNOW |
|
|
|
|
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE |
|
|
|
|
src+= stride*3; |
|
|
|
|
|
|
|
|
|
__asm__ volatile( |
|
|
|
@ -443,7 +443,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) |
|
|
|
|
: "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb) |
|
|
|
|
: "%"REG_a, "%"REG_c |
|
|
|
|
); |
|
|
|
|
#else //HAVE_MMXEXT || HAVE_AMD3DNOW
|
|
|
|
|
#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
|
|
|
|
|
|
|
|
|
const int l1= stride; |
|
|
|
|
const int l2= stride + l1; |
|
|
|
@ -477,13 +477,13 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) |
|
|
|
|
} |
|
|
|
|
src++; |
|
|
|
|
} |
|
|
|
|
#endif //HAVE_MMXEXT || HAVE_AMD3DNOW
|
|
|
|
|
#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#if !HAVE_ALTIVEC |
|
|
|
|
static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c) |
|
|
|
|
{ |
|
|
|
|
#if HAVE_MMXEXT || HAVE_AMD3DNOW |
|
|
|
|
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE |
|
|
|
|
/*
|
|
|
|
|
uint8_t tmp[16]; |
|
|
|
|
const int l1= stride; |
|
|
|
@ -764,7 +764,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
*/ |
|
|
|
|
#elif HAVE_MMX |
|
|
|
|
#elif HAVE_MMX_INLINE |
|
|
|
|
DECLARE_ALIGNED(8, uint64_t, tmp)[4]; // make space for 4 8-byte vars
|
|
|
|
|
src+= stride*4; |
|
|
|
|
__asm__ volatile( |
|
|
|
@ -872,7 +872,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext |
|
|
|
|
"movq (%3), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
|
|
|
|
|
"movq 8(%3), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
|
|
|
|
|
|
|
|
|
|
#if HAVE_MMXEXT |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
"movq %%mm7, %%mm6 \n\t" // 0
|
|
|
|
|
"psubw %%mm0, %%mm6 \n\t" |
|
|
|
|
"pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
|
|
|
|
@ -904,7 +904,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext |
|
|
|
|
"psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#if HAVE_MMXEXT |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
"pminsw %%mm2, %%mm0 \n\t" |
|
|
|
|
"pminsw %%mm3, %%mm1 \n\t" |
|
|
|
|
#else |
|
|
|
@ -968,7 +968,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext |
|
|
|
|
"pand %%mm2, %%mm4 \n\t" |
|
|
|
|
"pand %%mm3, %%mm5 \n\t" |
|
|
|
|
|
|
|
|
|
#if HAVE_MMXEXT |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
"pminsw %%mm0, %%mm4 \n\t" |
|
|
|
|
"pminsw %%mm1, %%mm5 \n\t" |
|
|
|
|
#else |
|
|
|
@ -995,7 +995,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext |
|
|
|
|
: "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp) |
|
|
|
|
: "%"REG_a |
|
|
|
|
); |
|
|
|
|
#else //HAVE_MMXEXT || HAVE_AMD3DNOW
|
|
|
|
|
#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
|
|
|
|
const int l1= stride; |
|
|
|
|
const int l2= stride + l1; |
|
|
|
|
const int l3= stride + l2; |
|
|
|
@ -1033,14 +1033,14 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext |
|
|
|
|
} |
|
|
|
|
src++; |
|
|
|
|
} |
|
|
|
|
#endif //HAVE_MMXEXT || HAVE_AMD3DNOW
|
|
|
|
|
#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
|
|
|
|
} |
|
|
|
|
#endif //HAVE_ALTIVEC
|
|
|
|
|
|
|
|
|
|
#if !HAVE_ALTIVEC |
|
|
|
|
static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) |
|
|
|
|
{ |
|
|
|
|
#if HAVE_MMXEXT || HAVE_AMD3DNOW |
|
|
|
|
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE |
|
|
|
|
DECLARE_ALIGNED(8, uint64_t, tmp)[3]; |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"pxor %%mm6, %%mm6 \n\t" |
|
|
|
@ -1060,7 +1060,7 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) |
|
|
|
|
|
|
|
|
|
#undef REAL_FIND_MIN_MAX |
|
|
|
|
#undef FIND_MIN_MAX |
|
|
|
|
#if HAVE_MMXEXT |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
#define REAL_FIND_MIN_MAX(addr)\ |
|
|
|
|
"movq " #addr ", %%mm0 \n\t"\
|
|
|
|
|
"pminub %%mm0, %%mm7 \n\t"\
|
|
|
|
@ -1087,7 +1087,7 @@ FIND_MIN_MAX((%0, %1, 8)) |
|
|
|
|
|
|
|
|
|
"movq %%mm7, %%mm4 \n\t" |
|
|
|
|
"psrlq $8, %%mm7 \n\t" |
|
|
|
|
#if HAVE_MMXEXT |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
"pminub %%mm4, %%mm7 \n\t" // min of pixels
|
|
|
|
|
"pshufw $0xF9, %%mm7, %%mm4 \n\t" |
|
|
|
|
"pminub %%mm4, %%mm7 \n\t" // min of pixels
|
|
|
|
@ -1112,7 +1112,7 @@ FIND_MIN_MAX((%0, %1, 8)) |
|
|
|
|
|
|
|
|
|
"movq %%mm6, %%mm4 \n\t" |
|
|
|
|
"psrlq $8, %%mm6 \n\t" |
|
|
|
|
#if HAVE_MMXEXT |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
"pmaxub %%mm4, %%mm6 \n\t" // max of pixels
|
|
|
|
|
"pshufw $0xF9, %%mm6, %%mm4 \n\t" |
|
|
|
|
"pmaxub %%mm4, %%mm6 \n\t" |
|
|
|
@ -1266,7 +1266,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, |
|
|
|
|
: : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp) |
|
|
|
|
: "%"REG_a, "%"REG_d |
|
|
|
|
); |
|
|
|
|
#else //HAVE_MMXEXT || HAVE_AMD3DNOW
|
|
|
|
|
#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
|
|
|
|
int y; |
|
|
|
|
int min=255; |
|
|
|
|
int max=0; |
|
|
|
@ -1383,7 +1383,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, |
|
|
|
|
// src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255;
|
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
#endif //HAVE_MMXEXT || HAVE_AMD3DNOW
|
|
|
|
|
#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
|
|
|
|
} |
|
|
|
|
#endif //HAVE_ALTIVEC
|
|
|
|
|
|
|
|
|
@ -1395,7 +1395,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, |
|
|
|
|
*/ |
|
|
|
|
static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride) |
|
|
|
|
{ |
|
|
|
|
#if HAVE_MMXEXT || HAVE_AMD3DNOW |
|
|
|
|
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE |
|
|
|
|
src+= 4*stride; |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"lea (%0, %1), %%"REG_a" \n\t" |
|
|
|
@ -1448,7 +1448,7 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid |
|
|
|
|
*/ |
|
|
|
|
static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride) |
|
|
|
|
{ |
|
|
|
|
#if HAVE_MMXEXT || HAVE_AMD3DNOW |
|
|
|
|
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE |
|
|
|
|
src+= stride*3; |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"lea (%0, %1), %%"REG_a" \n\t" |
|
|
|
@ -1490,7 +1490,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, |
|
|
|
|
: : "r" (src), "r" ((x86_reg)stride) |
|
|
|
|
: "%"REG_a, "%"REG_d, "%"REG_c |
|
|
|
|
); |
|
|
|
|
#else //HAVE_MMXEXT || HAVE_AMD3DNOW
|
|
|
|
|
#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
|
|
|
|
int x; |
|
|
|
|
src+= stride*3; |
|
|
|
|
for(x=0; x<8; x++){ |
|
|
|
@ -1500,7 +1500,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, |
|
|
|
|
src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4); |
|
|
|
|
src++; |
|
|
|
|
} |
|
|
|
|
#endif //HAVE_MMXEXT || HAVE_AMD3DNOW
|
|
|
|
|
#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@ -1512,7 +1512,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, |
|
|
|
|
*/ |
|
|
|
|
static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp) |
|
|
|
|
{ |
|
|
|
|
#if HAVE_MMXEXT || HAVE_AMD3DNOW |
|
|
|
|
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE |
|
|
|
|
src+= stride*4; |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"lea (%0, %1), %%"REG_a" \n\t" |
|
|
|
@ -1561,7 +1561,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) |
|
|
|
|
: : "r" (src), "r" ((x86_reg)stride), "r"(tmp) |
|
|
|
|
: "%"REG_a, "%"REG_d |
|
|
|
|
); |
|
|
|
|
#else //HAVE_MMXEXT || HAVE_AMD3DNOW
|
|
|
|
|
#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
|
|
|
|
int x; |
|
|
|
|
src+= stride*4; |
|
|
|
|
for(x=0; x<8; x++){ |
|
|
|
@ -1579,7 +1579,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) |
|
|
|
|
|
|
|
|
|
src++; |
|
|
|
|
} |
|
|
|
|
#endif //HAVE_MMXEXT || HAVE_AMD3DNOW
|
|
|
|
|
#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@ -1591,7 +1591,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) |
|
|
|
|
*/ |
|
|
|
|
static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2) |
|
|
|
|
{ |
|
|
|
|
#if HAVE_MMXEXT || HAVE_AMD3DNOW |
|
|
|
|
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE |
|
|
|
|
src+= stride*4; |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"lea (%0, %1), %%"REG_a" \n\t" |
|
|
|
@ -1651,7 +1651,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) |
|
|
|
|
: : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2) |
|
|
|
|
: "%"REG_a, "%"REG_d |
|
|
|
|
); |
|
|
|
|
#else //HAVE_MMXEXT || HAVE_AMD3DNOW
|
|
|
|
|
#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
|
|
|
|
int x; |
|
|
|
|
src+= stride*4; |
|
|
|
|
for(x=0; x<8; x++){ |
|
|
|
@ -1680,7 +1680,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) |
|
|
|
|
|
|
|
|
|
src++; |
|
|
|
|
} |
|
|
|
|
#endif //HAVE_MMXEXT || HAVE_AMD3DNOW
|
|
|
|
|
#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@ -1692,7 +1692,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) |
|
|
|
|
*/ |
|
|
|
|
static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp) |
|
|
|
|
{ |
|
|
|
|
#if HAVE_MMXEXT || HAVE_AMD3DNOW |
|
|
|
|
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE |
|
|
|
|
src+= 4*stride; |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"lea (%0, %1), %%"REG_a" \n\t" |
|
|
|
@ -1739,7 +1739,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin |
|
|
|
|
: : "r" (src), "r" ((x86_reg)stride), "r" (tmp) |
|
|
|
|
: "%"REG_a, "%"REG_d |
|
|
|
|
); |
|
|
|
|
#else //HAVE_MMXEXT || HAVE_AMD3DNOW
|
|
|
|
|
#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
|
|
|
|
int a, b, c, x; |
|
|
|
|
src+= 4*stride; |
|
|
|
|
|
|
|
|
@ -1782,7 +1782,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin |
|
|
|
|
src += 4; |
|
|
|
|
tmp += 4; |
|
|
|
|
} |
|
|
|
|
#endif //HAVE_MMXEXT || HAVE_AMD3DNOW
|
|
|
|
|
#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@ -1793,9 +1793,9 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin |
|
|
|
|
*/ |
|
|
|
|
static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) |
|
|
|
|
{ |
|
|
|
|
#if HAVE_MMX |
|
|
|
|
#if HAVE_MMX_INLINE |
|
|
|
|
src+= 4*stride; |
|
|
|
|
#if HAVE_MMXEXT |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"lea (%0, %1), %%"REG_a" \n\t" |
|
|
|
|
"lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" |
|
|
|
@ -1885,8 +1885,8 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) |
|
|
|
|
: : "r" (src), "r" ((x86_reg)stride) |
|
|
|
|
: "%"REG_a, "%"REG_d |
|
|
|
|
); |
|
|
|
|
#endif //HAVE_MMXEXT
|
|
|
|
|
#else //HAVE_MMX
|
|
|
|
|
#endif //HAVE_MMXEXT_INLINE
|
|
|
|
|
#else //HAVE_MMX_INLINE
|
|
|
|
|
int x, y; |
|
|
|
|
src+= 4*stride; |
|
|
|
|
// FIXME - there should be a way to do a few columns in parallel like w/mmx
|
|
|
|
@ -1905,10 +1905,10 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) |
|
|
|
|
} |
|
|
|
|
src++; |
|
|
|
|
} |
|
|
|
|
#endif //HAVE_MMX
|
|
|
|
|
#endif //HAVE_MMX_INLINE
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#if HAVE_MMX |
|
|
|
|
#if HAVE_MMX_INLINE |
|
|
|
|
/**
|
|
|
|
|
* Transpose and shift the given 8x8 Block into dst1 and dst2. |
|
|
|
|
*/ |
|
|
|
@ -2073,7 +2073,7 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src) |
|
|
|
|
: "%"REG_a, "%"REG_d |
|
|
|
|
); |
|
|
|
|
} |
|
|
|
|
#endif //HAVE_MMX
|
|
|
|
|
#endif //HAVE_MMX_INLINE
|
|
|
|
|
//static long test=0;
|
|
|
|
|
|
|
|
|
|
#if !HAVE_ALTIVEC |
|
|
|
@ -2087,7 +2087,7 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, |
|
|
|
|
|
|
|
|
|
#define FAST_L2_DIFF |
|
|
|
|
//#define L1_DIFF //u should change the thresholds too if u try that one
|
|
|
|
|
#if HAVE_MMXEXT || HAVE_AMD3DNOW |
|
|
|
|
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"lea (%2, %2, 2), %%"REG_a" \n\t" // 3*stride
|
|
|
|
|
"lea (%2, %2, 4), %%"REG_d" \n\t" // 5*stride
|
|
|
|
@ -2375,7 +2375,7 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc)) |
|
|
|
|
:: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast) |
|
|
|
|
: "%"REG_a, "%"REG_d, "%"REG_c, "memory" |
|
|
|
|
); |
|
|
|
|
#else //HAVE_MMXEXT || HAVE_AMD3DNOW
|
|
|
|
|
#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
|
|
|
|
{ |
|
|
|
|
int y; |
|
|
|
|
int d=0; |
|
|
|
@ -2458,11 +2458,11 @@ Switch between |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
#endif //HAVE_MMXEXT || HAVE_AMD3DNOW
|
|
|
|
|
#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
|
|
|
|
} |
|
|
|
|
#endif //HAVE_ALTIVEC
|
|
|
|
|
|
|
|
|
|
#if HAVE_MMX |
|
|
|
|
#if HAVE_MMX_INLINE |
|
|
|
|
/**
|
|
|
|
|
* accurate deblock filter |
|
|
|
|
*/ |
|
|
|
@ -2865,7 +2865,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st |
|
|
|
|
"movq (%4), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
|
|
|
|
|
"movq 8(%4), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
|
|
|
|
|
|
|
|
|
|
#if HAVE_MMXEXT |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
"movq %%mm7, %%mm6 \n\t" // 0
|
|
|
|
|
"psubw %%mm0, %%mm6 \n\t" |
|
|
|
|
"pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
|
|
|
|
@ -2897,7 +2897,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st |
|
|
|
|
"psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#if HAVE_MMXEXT |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
"pminsw %%mm2, %%mm0 \n\t" |
|
|
|
|
"pminsw %%mm3, %%mm1 \n\t" |
|
|
|
|
#else |
|
|
|
@ -2961,7 +2961,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st |
|
|
|
|
"pand %%mm2, %%mm4 \n\t" |
|
|
|
|
"pand %%mm3, %%mm5 \n\t" |
|
|
|
|
|
|
|
|
|
#if HAVE_MMXEXT |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
"pminsw %%mm0, %%mm4 \n\t" |
|
|
|
|
"pminsw %%mm1, %%mm5 \n\t" |
|
|
|
|
#else |
|
|
|
@ -2998,7 +2998,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st |
|
|
|
|
} |
|
|
|
|
} */ |
|
|
|
|
} |
|
|
|
|
#endif //HAVE_MMX
|
|
|
|
|
#endif //HAVE_MMX_INLINE
|
|
|
|
|
|
|
|
|
|
static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
|
|
|
|
const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c); |
|
|
|
@ -3013,18 +3013,18 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride, |
|
|
|
|
int levelFix, int64_t *packedOffsetAndScale) |
|
|
|
|
{ |
|
|
|
|
#if !HAVE_MMX |
|
|
|
|
#if !HAVE_MMX_INLINE |
|
|
|
|
int i; |
|
|
|
|
#endif |
|
|
|
|
if(levelFix){ |
|
|
|
|
#if HAVE_MMX |
|
|
|
|
#if HAVE_MMX_INLINE |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset
|
|
|
|
|
"movq 8(%%"REG_a"), %%mm3 \n\t" // packedYScale
|
|
|
|
|
"lea (%2,%4), %%"REG_a" \n\t" |
|
|
|
|
"lea (%3,%5), %%"REG_d" \n\t" |
|
|
|
|
"pxor %%mm4, %%mm4 \n\t" |
|
|
|
|
#if HAVE_MMXEXT |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
#define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ |
|
|
|
|
"movq " #src1 ", %%mm0 \n\t"\
|
|
|
|
|
"movq " #src1 ", %%mm5 \n\t"\
|
|
|
|
@ -3047,7 +3047,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t |
|
|
|
|
"movq %%mm0, " #dst1 " \n\t"\
|
|
|
|
|
"movq %%mm1, " #dst2 " \n\t"\
|
|
|
|
|
|
|
|
|
|
#else //HAVE_MMXEXT
|
|
|
|
|
#else //HAVE_MMXEXT_INLINE
|
|
|
|
|
#define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ |
|
|
|
|
"movq " #src1 ", %%mm0 \n\t"\
|
|
|
|
|
"movq " #src1 ", %%mm5 \n\t"\
|
|
|
|
@ -3074,7 +3074,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t |
|
|
|
|
"movq %%mm0, " #dst1 " \n\t"\
|
|
|
|
|
"movq %%mm1, " #dst2 " \n\t"\
|
|
|
|
|
|
|
|
|
|
#endif //HAVE_MMXEXT
|
|
|
|
|
#endif //HAVE_MMXEXT_INLINE
|
|
|
|
|
#define SCALED_CPY(src1, src2, dst1, dst2)\ |
|
|
|
|
REAL_SCALED_CPY(src1, src2, dst1, dst2) |
|
|
|
|
|
|
|
|
@ -3094,13 +3094,13 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2)) |
|
|
|
|
"r" ((x86_reg)dstStride) |
|
|
|
|
: "%"REG_d |
|
|
|
|
); |
|
|
|
|
#else //HAVE_MMX
|
|
|
|
|
#else //HAVE_MMX_INLINE
|
|
|
|
|
for(i=0; i<8; i++) |
|
|
|
|
memcpy( &(dst[dstStride*i]), |
|
|
|
|
&(src[srcStride*i]), BLOCK_SIZE); |
|
|
|
|
#endif //HAVE_MMX
|
|
|
|
|
#endif //HAVE_MMX_INLINE
|
|
|
|
|
}else{ |
|
|
|
|
#if HAVE_MMX |
|
|
|
|
#if HAVE_MMX_INLINE |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"lea (%0,%2), %%"REG_a" \n\t" |
|
|
|
|
"lea (%1,%3), %%"REG_d" \n\t" |
|
|
|
@ -3127,11 +3127,11 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2)) |
|
|
|
|
"r" ((x86_reg)dstStride) |
|
|
|
|
: "%"REG_a, "%"REG_d |
|
|
|
|
); |
|
|
|
|
#else //HAVE_MMX
|
|
|
|
|
#else //HAVE_MMX_INLINE
|
|
|
|
|
for(i=0; i<8; i++) |
|
|
|
|
memcpy( &(dst[dstStride*i]), |
|
|
|
|
&(src[srcStride*i]), BLOCK_SIZE); |
|
|
|
|
#endif //HAVE_MMX
|
|
|
|
|
#endif //HAVE_MMX_INLINE
|
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -3140,7 +3140,7 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2)) |
|
|
|
|
*/ |
|
|
|
|
static inline void RENAME(duplicate)(uint8_t src[], int stride) |
|
|
|
|
{ |
|
|
|
|
#if HAVE_MMX |
|
|
|
|
#if HAVE_MMX_INLINE |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"movq (%0), %%mm0 \n\t" |
|
|
|
|
"add %1, %0 \n\t" |
|
|
|
@ -3177,7 +3177,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
int QPCorrecture= 256*256; |
|
|
|
|
|
|
|
|
|
int copyAhead; |
|
|
|
|
#if HAVE_MMX |
|
|
|
|
#if HAVE_MMX_INLINE |
|
|
|
|
int i; |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
@ -3190,7 +3190,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride; |
|
|
|
|
//const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4;
|
|
|
|
|
|
|
|
|
|
#if HAVE_MMX |
|
|
|
|
#if HAVE_MMX_INLINE |
|
|
|
|
for(i=0; i<57; i++){ |
|
|
|
|
int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1; |
|
|
|
|
int threshold= offset*2 + 1; |
|
|
|
@ -3248,7 +3248,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
|
|
|
|
|
scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black); |
|
|
|
|
|
|
|
|
|
#if HAVE_MMXEXT |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
c.packedYScale= (uint16_t)(scale*256.0 + 0.5); |
|
|
|
|
c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF; |
|
|
|
|
#else |
|
|
|
@ -3281,7 +3281,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
|
|
|
|
|
for(x=0; x<width; x+=BLOCK_SIZE){ |
|
|
|
|
|
|
|
|
|
#if HAVE_MMXEXT |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
/*
|
|
|
|
|
prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32); |
|
|
|
|
prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); |
|
|
|
@ -3308,7 +3308,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
: "%"REG_a, "%"REG_d |
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
#elif HAVE_AMD3DNOW |
|
|
|
|
#elif HAVE_AMD3DNOW_INLINE |
|
|
|
|
//FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...
|
|
|
|
|
/* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
|
|
|
|
|
prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32); |
|
|
|
@ -3354,7 +3354,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
//1% speedup if these are here instead of the inner loop
|
|
|
|
|
const uint8_t *srcBlock= &(src[y*srcStride]); |
|
|
|
|
uint8_t *dstBlock= &(dst[y*dstStride]); |
|
|
|
|
#if HAVE_MMX |
|
|
|
|
#if HAVE_MMX_INLINE |
|
|
|
|
uint8_t *tempBlock1= c.tempBlocks; |
|
|
|
|
uint8_t *tempBlock2= c.tempBlocks + 8; |
|
|
|
|
#endif |
|
|
|
@ -3390,7 +3390,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
|
|
|
|
|
for(x=0; x<width; x+=BLOCK_SIZE){ |
|
|
|
|
const int stride= dstStride; |
|
|
|
|
#if HAVE_MMX |
|
|
|
|
#if HAVE_MMX_INLINE |
|
|
|
|
uint8_t *tmpXchg; |
|
|
|
|
#endif |
|
|
|
|
if(isColor){ |
|
|
|
@ -3404,7 +3404,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
yHistogram[ srcBlock[srcStride*12 + 4] ]++; |
|
|
|
|
} |
|
|
|
|
c.QP= QP; |
|
|
|
|
#if HAVE_MMX |
|
|
|
|
#if HAVE_MMX_INLINE |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"movd %1, %%mm7 \n\t" |
|
|
|
|
"packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
|
|
|
|
@ -3417,7 +3417,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if HAVE_MMXEXT |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
/*
|
|
|
|
|
prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32); |
|
|
|
|
prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); |
|
|
|
@ -3444,7 +3444,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
: "%"REG_a, "%"REG_d |
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
#elif HAVE_AMD3DNOW |
|
|
|
|
#elif HAVE_AMD3DNOW_INLINE |
|
|
|
|
//FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...
|
|
|
|
|
/* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
|
|
|
|
|
prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32); |
|
|
|
@ -3488,12 +3488,12 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#if HAVE_MMX |
|
|
|
|
#if HAVE_MMX_INLINE |
|
|
|
|
RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride); |
|
|
|
|
#endif |
|
|
|
|
/* check if we have a previous block to deblock it with dstBlock */ |
|
|
|
|
if(x - 8 >= 0){ |
|
|
|
|
#if HAVE_MMX |
|
|
|
|
#if HAVE_MMX_INLINE |
|
|
|
|
if(mode & H_X1_FILTER) |
|
|
|
|
RENAME(vertX1Filter)(tempBlock1, 16, &c); |
|
|
|
|
else if(mode & H_DEBLOCK){ |
|
|
|
@ -3539,7 +3539,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
}else if(mode & H_A_DEBLOCK){ |
|
|
|
|
RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c); |
|
|
|
|
} |
|
|
|
|
#endif //HAVE_MMX
|
|
|
|
|
#endif //HAVE_MMX_INLINE
|
|
|
|
|
if(mode & DERING){ |
|
|
|
|
//FIXME filter first line
|
|
|
|
|
if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c); |
|
|
|
@ -3557,7 +3557,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
dstBlock+=8; |
|
|
|
|
srcBlock+=8; |
|
|
|
|
|
|
|
|
|
#if HAVE_MMX |
|
|
|
|
#if HAVE_MMX_INLINE |
|
|
|
|
tmpXchg= tempBlock1; |
|
|
|
|
tempBlock1= tempBlock2; |
|
|
|
|
tempBlock2 = tmpXchg; |
|
|
|
@ -3597,9 +3597,9 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
+ dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride]; |
|
|
|
|
}*/ |
|
|
|
|
} |
|
|
|
|
#if HAVE_AMD3DNOW |
|
|
|
|
#if HAVE_AMD3DNOW_INLINE |
|
|
|
|
__asm__ volatile("femms"); |
|
|
|
|
#elif HAVE_MMX |
|
|
|
|
#elif HAVE_MMX_INLINE |
|
|
|
|
__asm__ volatile("emms"); |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|