|
|
|
@ -158,7 +158,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ |
|
|
|
|
"movd %%mm4, %1 \n\t" |
|
|
|
|
|
|
|
|
|
: "=r" (numEq), "=r" (dcOk) |
|
|
|
|
: "r" (src), "r" ((long)stride), "m" (c->pQPb) |
|
|
|
|
: "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) |
|
|
|
|
: "%"REG_a |
|
|
|
|
); |
|
|
|
|
|
|
|
|
@ -303,7 +303,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) |
|
|
|
|
"sub %1, %0 \n\t" |
|
|
|
|
|
|
|
|
|
: |
|
|
|
|
: "r" (src), "r" ((long)stride), "m" (c->pQPb) |
|
|
|
|
: "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) |
|
|
|
|
: "%"REG_a, "%"REG_c |
|
|
|
|
); |
|
|
|
|
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
|
|
|
@ -423,7 +423,7 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP) |
|
|
|
|
"movq %%mm2, (%%"REG_c", %1) \n\t" |
|
|
|
|
|
|
|
|
|
: |
|
|
|
|
: "r" (src), "r" ((long)stride) |
|
|
|
|
: "r" (src), "r" ((x86_reg)stride) |
|
|
|
|
: "%"REG_a, "%"REG_c |
|
|
|
|
); |
|
|
|
|
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
|
|
|
@ -545,7 +545,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) |
|
|
|
|
"movq %%mm0, (%%"REG_c", %1, 2) \n\t" // line 7
|
|
|
|
|
|
|
|
|
|
: |
|
|
|
|
: "r" (src), "r" ((long)stride), "m" (co->pQPb) |
|
|
|
|
: "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb) |
|
|
|
|
: "%"REG_a, "%"REG_c |
|
|
|
|
); |
|
|
|
|
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
|
|
|
@ -810,7 +810,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext |
|
|
|
|
"movq %%mm2, (%0, %1, 4) \n\t" |
|
|
|
|
|
|
|
|
|
: |
|
|
|
|
: "r" (src), "r" ((long)stride), "m" (c->pQPb) |
|
|
|
|
: "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) |
|
|
|
|
: "%"REG_a, "%"REG_c |
|
|
|
|
); |
|
|
|
|
|
|
|
|
@ -1098,7 +1098,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext |
|
|
|
|
"movq %%mm0, (%0, %1) \n\t" |
|
|
|
|
|
|
|
|
|
: "+r" (src) |
|
|
|
|
: "r" ((long)stride), "m" (c->pQPb) |
|
|
|
|
: "r" ((x86_reg)stride), "m" (c->pQPb) |
|
|
|
|
: "%"REG_a, "%"REG_c |
|
|
|
|
); |
|
|
|
|
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
|
|
|
@ -1367,7 +1367,7 @@ DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5, |
|
|
|
|
DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) |
|
|
|
|
|
|
|
|
|
"1: \n\t" |
|
|
|
|
: : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2) |
|
|
|
|
: : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2) |
|
|
|
|
: "%"REG_a, "%"REG_d, "%"REG_c |
|
|
|
|
); |
|
|
|
|
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
|
|
|
@ -1521,7 +1521,7 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid |
|
|
|
|
PAVGB(%%mm0, %%mm1) |
|
|
|
|
"movq %%mm1, (%%"REG_c", %1, 2) \n\t" |
|
|
|
|
|
|
|
|
|
: : "r" (src), "r" ((long)stride) |
|
|
|
|
: : "r" (src), "r" ((x86_reg)stride) |
|
|
|
|
: "%"REG_a, "%"REG_c |
|
|
|
|
); |
|
|
|
|
#else |
|
|
|
@ -1591,7 +1591,7 @@ DEINT_CUBIC((%%REGa, %1), (%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%0, %1, |
|
|
|
|
DEINT_CUBIC((%0, %1, 4) , (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGc)) |
|
|
|
|
DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, %1, 2)) |
|
|
|
|
|
|
|
|
|
: : "r" (src), "r" ((long)stride) |
|
|
|
|
: : "r" (src), "r" ((x86_reg)stride) |
|
|
|
|
: "%"REG_a, "%"REG_d, "%"REG_c |
|
|
|
|
); |
|
|
|
|
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
|
|
|
@ -1662,7 +1662,7 @@ DEINT_FF((%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%%REGd, %1, 2)) |
|
|
|
|
DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) |
|
|
|
|
|
|
|
|
|
"movq %%mm0, (%2) \n\t" |
|
|
|
|
: : "r" (src), "r" ((long)stride), "r"(tmp) |
|
|
|
|
: : "r" (src), "r" ((x86_reg)stride), "r"(tmp) |
|
|
|
|
: "%"REG_a, "%"REG_d |
|
|
|
|
); |
|
|
|
|
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
|
|
|
@ -1752,7 +1752,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) |
|
|
|
|
|
|
|
|
|
"movq %%mm0, (%2) \n\t" |
|
|
|
|
"movq %%mm1, (%3) \n\t" |
|
|
|
|
: : "r" (src), "r" ((long)stride), "r"(tmp), "r"(tmp2) |
|
|
|
|
: : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2) |
|
|
|
|
: "%"REG_a, "%"REG_d |
|
|
|
|
); |
|
|
|
|
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
|
|
|
@ -1840,7 +1840,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin |
|
|
|
|
"movq %%mm2, (%%"REG_d", %1, 2) \n\t" |
|
|
|
|
"movq %%mm1, (%2) \n\t" |
|
|
|
|
|
|
|
|
|
: : "r" (src), "r" ((long)stride), "r" (tmp) |
|
|
|
|
: : "r" (src), "r" ((x86_reg)stride), "r" (tmp) |
|
|
|
|
: "%"REG_a, "%"REG_d |
|
|
|
|
); |
|
|
|
|
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
|
|
|
@ -1944,7 +1944,7 @@ static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) |
|
|
|
|
"movq %%mm2, (%%"REG_d", %1, 2) \n\t" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
: : "r" (src), "r" ((long)stride) |
|
|
|
|
: : "r" (src), "r" ((x86_reg)stride) |
|
|
|
|
: "%"REG_a, "%"REG_d |
|
|
|
|
); |
|
|
|
|
|
|
|
|
@ -1986,7 +1986,7 @@ MEDIAN((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4)) |
|
|
|
|
MEDIAN((%0, %1, 4) , (%%REGd) , (%%REGd, %1)) |
|
|
|
|
MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) |
|
|
|
|
|
|
|
|
|
: : "r" (src), "r" ((long)stride) |
|
|
|
|
: : "r" (src), "r" ((x86_reg)stride) |
|
|
|
|
: "%"REG_a, "%"REG_d |
|
|
|
|
); |
|
|
|
|
#endif //HAVE_MMX2
|
|
|
|
@ -2093,7 +2093,7 @@ static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src |
|
|
|
|
"movd %%mm1, 116(%3) \n\t" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
:: "r" (src), "r" ((long)srcStride), "r" (dst1), "r" (dst2) |
|
|
|
|
:: "r" (src), "r" ((x86_reg)srcStride), "r" (dst1), "r" (dst2) |
|
|
|
|
: "%"REG_a |
|
|
|
|
); |
|
|
|
|
} |
|
|
|
@ -2173,7 +2173,7 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src) |
|
|
|
|
"psrlq $32, %%mm1 \n\t" |
|
|
|
|
"movd %%mm1, 4(%%"REG_d", %1, 2) \n\t" |
|
|
|
|
|
|
|
|
|
:: "r" (dst), "r" ((long)dstStride), "r" (src) |
|
|
|
|
:: "r" (dst), "r" ((x86_reg)dstStride), "r" (src) |
|
|
|
|
: "%"REG_a, "%"REG_d |
|
|
|
|
); |
|
|
|
|
} |
|
|
|
@ -2476,7 +2476,7 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc)) |
|
|
|
|
|
|
|
|
|
"4: \n\t" |
|
|
|
|
|
|
|
|
|
:: "r" (src), "r" (tempBlurred), "r"((long)stride), "m" (tempBlurredPast) |
|
|
|
|
:: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast) |
|
|
|
|
: "%"REG_a, "%"REG_d, "%"REG_c, "memory" |
|
|
|
|
); |
|
|
|
|
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
|
|
|
@ -2676,14 +2676,14 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st |
|
|
|
|
"movq %%mm6, %0 \n\t" |
|
|
|
|
|
|
|
|
|
: "=m" (eq_mask), "=m" (dc_mask) |
|
|
|
|
: "r" (src), "r" ((long)step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold) |
|
|
|
|
: "r" (src), "r" ((x86_reg)step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold) |
|
|
|
|
: "%"REG_a |
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
both_masks = dc_mask & eq_mask; |
|
|
|
|
|
|
|
|
|
if(both_masks){ |
|
|
|
|
long offset= -8*step; |
|
|
|
|
x86_reg offset= -8*step; |
|
|
|
|
int64_t *temp_sums= sums; |
|
|
|
|
|
|
|
|
|
__asm__ volatile( |
|
|
|
@ -2820,7 +2820,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st |
|
|
|
|
"mov %4, %0 \n\t" //FIXME
|
|
|
|
|
|
|
|
|
|
: "+&r"(src) |
|
|
|
|
: "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src) |
|
|
|
|
: "r" ((x86_reg)step), "m" (c->pQPb), "r"(sums), "g"(src) |
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
src+= step; // src points to begin of the 8x8 Block
|
|
|
|
@ -2857,7 +2857,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st |
|
|
|
|
" js 1b \n\t" |
|
|
|
|
|
|
|
|
|
: "+r"(offset), "+r"(temp_sums) |
|
|
|
|
: "r" ((long)step), "r"(src - offset), "m"(both_masks) |
|
|
|
|
: "r" ((x86_reg)step), "r"(src - offset), "m"(both_masks) |
|
|
|
|
); |
|
|
|
|
}else |
|
|
|
|
src+= step; // src points to begin of the 8x8 Block
|
|
|
|
@ -3092,7 +3092,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st |
|
|
|
|
"movq %%mm0, (%0, %1) \n\t" |
|
|
|
|
|
|
|
|
|
: "+r" (temp_src) |
|
|
|
|
: "r" ((long)step), "m" (c->pQPb), "m"(eq_mask) |
|
|
|
|
: "r" ((x86_reg)step), "m" (c->pQPb), "m"(eq_mask) |
|
|
|
|
: "%"REG_a, "%"REG_c |
|
|
|
|
); |
|
|
|
|
} |
|
|
|
@ -3193,8 +3193,8 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2)) |
|
|
|
|
: "0" (packedOffsetAndScale), |
|
|
|
|
"r"(src), |
|
|
|
|
"r"(dst), |
|
|
|
|
"r" ((long)srcStride), |
|
|
|
|
"r" ((long)dstStride) |
|
|
|
|
"r" ((x86_reg)srcStride), |
|
|
|
|
"r" ((x86_reg)dstStride) |
|
|
|
|
: "%"REG_d |
|
|
|
|
); |
|
|
|
|
#else //HAVE_MMX
|
|
|
|
@ -3226,8 +3226,8 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2)) |
|
|
|
|
|
|
|
|
|
: : "r" (src), |
|
|
|
|
"r" (dst), |
|
|
|
|
"r" ((long)srcStride), |
|
|
|
|
"r" ((long)dstStride) |
|
|
|
|
"r" ((x86_reg)srcStride), |
|
|
|
|
"r" ((x86_reg)dstStride) |
|
|
|
|
: "%"REG_a, "%"REG_d |
|
|
|
|
); |
|
|
|
|
#else //HAVE_MMX
|
|
|
|
@ -3251,7 +3251,7 @@ static inline void RENAME(duplicate)(uint8_t src[], int stride) |
|
|
|
|
"movq %%mm0, (%0, %1) \n\t" |
|
|
|
|
"movq %%mm0, (%0, %1, 2) \n\t" |
|
|
|
|
: "+r" (src) |
|
|
|
|
: "r" ((long)-stride) |
|
|
|
|
: "r" ((x86_reg)-stride) |
|
|
|
|
); |
|
|
|
|
#else |
|
|
|
|
int i; |
|
|
|
@ -3406,8 +3406,8 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
"add %3, %%"REG_d" \n\t" |
|
|
|
|
"prefetchnta 32(%%"REG_a", %0) \n\t" |
|
|
|
|
"prefetcht0 32(%%"REG_d", %2) \n\t" |
|
|
|
|
:: "r" (srcBlock), "r" ((long)srcStride), "r" (dstBlock), "r" ((long)dstStride), |
|
|
|
|
"g" ((long)x), "g" ((long)copyAhead) |
|
|
|
|
:: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride), |
|
|
|
|
"g" ((x86_reg)x), "g" ((x86_reg)copyAhead) |
|
|
|
|
: "%"REG_a, "%"REG_d |
|
|
|
|
); |
|
|
|
|
|
|
|
|
@ -3542,8 +3542,8 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
"add %3, %%"REG_d" \n\t" |
|
|
|
|
"prefetchnta 32(%%"REG_a", %0) \n\t" |
|
|
|
|
"prefetcht0 32(%%"REG_d", %2) \n\t" |
|
|
|
|
:: "r" (srcBlock), "r" ((long)srcStride), "r" (dstBlock), "r" ((long)dstStride), |
|
|
|
|
"g" ((long)x), "g" ((long)copyAhead) |
|
|
|
|
:: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride), |
|
|
|
|
"g" ((x86_reg)x), "g" ((x86_reg)copyAhead) |
|
|
|
|
: "%"REG_a, "%"REG_d |
|
|
|
|
); |
|
|
|
|
|
|
|
|
|