|
|
@ -333,18 +333,18 @@ void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width){ |
|
|
|
"movq 12(%1), %%mm6 \n\t" |
|
|
|
"movq 12(%1), %%mm6 \n\t" |
|
|
|
"paddd (%1), %%mm2 \n\t" |
|
|
|
"paddd (%1), %%mm2 \n\t" |
|
|
|
"paddd 8(%1), %%mm6 \n\t" |
|
|
|
"paddd 8(%1), %%mm6 \n\t" |
|
|
|
"movq %%mm2, %%mm0 \n\t" |
|
|
|
"pxor %%mm0, %%mm0 \n\t" //note: the 2 xor could be avoided if we would flip the rounding direction
|
|
|
|
"movq %%mm6, %%mm4 \n\t" |
|
|
|
"pxor %%mm4, %%mm4 \n\t" |
|
|
|
"pslld $2, %%mm2 \n\t" |
|
|
|
|
|
|
|
"pslld $2, %%mm6 \n\t" |
|
|
|
|
|
|
|
"psubd %%mm2, %%mm0 \n\t" |
|
|
|
"psubd %%mm2, %%mm0 \n\t" |
|
|
|
"psubd %%mm6, %%mm4 \n\t" |
|
|
|
"psubd %%mm6, %%mm4 \n\t" |
|
|
|
"psrad $1, %%mm0 \n\t" |
|
|
|
"psrad $1, %%mm0 \n\t" |
|
|
|
"psrad $1, %%mm4 \n\t" |
|
|
|
"psrad $1, %%mm4 \n\t" |
|
|
|
"movq (%0), %%mm2 \n\t" |
|
|
|
|
|
|
|
"movq 8(%0), %%mm6 \n\t" |
|
|
|
|
|
|
|
"psubd %%mm0, %%mm2 \n\t" |
|
|
|
"psubd %%mm0, %%mm2 \n\t" |
|
|
|
"psubd %%mm4, %%mm6 \n\t" |
|
|
|
"psubd %%mm4, %%mm6 \n\t" |
|
|
|
|
|
|
|
"movq (%0), %%mm0 \n\t" |
|
|
|
|
|
|
|
"movq 8(%0), %%mm4 \n\t" |
|
|
|
|
|
|
|
"paddd %%mm0, %%mm2 \n\t" |
|
|
|
|
|
|
|
"paddd %%mm4, %%mm6 \n\t" |
|
|
|
"movq %%mm2, (%2) \n\t" |
|
|
|
"movq %%mm2, (%2) \n\t" |
|
|
|
"movq %%mm6, 8(%2) \n\t" |
|
|
|
"movq %%mm6, 8(%2) \n\t" |
|
|
|
:: "r"(&src[i]), "r"(&b[i]), "r"(&temp[i]) |
|
|
|
:: "r"(&src[i]), "r"(&b[i]), "r"(&temp[i]) |
|
|
|