|
|
|
@ -172,61 +172,6 @@ void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, |
|
|
|
|
} while (--i); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#define CLEAR_BLOCKS(name, n) \ |
|
|
|
|
void name(int16_t *blocks) \
|
|
|
|
|
{ \
|
|
|
|
|
__asm__ volatile ( \
|
|
|
|
|
"pxor %%mm7, %%mm7 \n\t" \
|
|
|
|
|
"mov $-"#n", %%"REG_a" \n\t" \
|
|
|
|
|
"1: \n\t" \
|
|
|
|
|
"movq %%mm7, (%0, %%"REG_a") \n\t" \
|
|
|
|
|
"movq %%mm7, 8(%0, %%"REG_a") \n\t" \
|
|
|
|
|
"movq %%mm7, 16(%0, %%"REG_a") \n\t" \
|
|
|
|
|
"movq %%mm7, 24(%0, %%"REG_a") \n\t" \
|
|
|
|
|
"add $32, %%"REG_a" \n\t" \
|
|
|
|
|
"js 1b \n\t" \
|
|
|
|
|
:: "r"(((uint8_t *) blocks) + n) \
|
|
|
|
|
: "%"REG_a); \
|
|
|
|
|
} |
|
|
|
|
CLEAR_BLOCKS(ff_clear_blocks_mmx, 768) |
|
|
|
|
CLEAR_BLOCKS(ff_clear_block_mmx, 128) |
|
|
|
|
|
|
|
|
|
void ff_clear_block_sse(int16_t *block) |
|
|
|
|
{ |
|
|
|
|
__asm__ volatile ( |
|
|
|
|
"xorps %%xmm0, %%xmm0 \n" |
|
|
|
|
"movaps %%xmm0, (%0) \n" |
|
|
|
|
"movaps %%xmm0, 16(%0) \n" |
|
|
|
|
"movaps %%xmm0, 32(%0) \n" |
|
|
|
|
"movaps %%xmm0, 48(%0) \n" |
|
|
|
|
"movaps %%xmm0, 64(%0) \n" |
|
|
|
|
"movaps %%xmm0, 80(%0) \n" |
|
|
|
|
"movaps %%xmm0, 96(%0) \n" |
|
|
|
|
"movaps %%xmm0, 112(%0) \n" |
|
|
|
|
:: "r" (block) |
|
|
|
|
: "memory"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ff_clear_blocks_sse(int16_t *blocks) |
|
|
|
|
{ |
|
|
|
|
__asm__ volatile ( |
|
|
|
|
"xorps %%xmm0, %%xmm0 \n" |
|
|
|
|
"mov $-768, %%"REG_a" \n" |
|
|
|
|
"1: \n" |
|
|
|
|
"movaps %%xmm0, (%0, %%"REG_a") \n" |
|
|
|
|
"movaps %%xmm0, 16(%0, %%"REG_a") \n" |
|
|
|
|
"movaps %%xmm0, 32(%0, %%"REG_a") \n" |
|
|
|
|
"movaps %%xmm0, 48(%0, %%"REG_a") \n" |
|
|
|
|
"movaps %%xmm0, 64(%0, %%"REG_a") \n" |
|
|
|
|
"movaps %%xmm0, 80(%0, %%"REG_a") \n" |
|
|
|
|
"movaps %%xmm0, 96(%0, %%"REG_a") \n" |
|
|
|
|
"movaps %%xmm0, 112(%0, %%"REG_a") \n" |
|
|
|
|
"add $128, %%"REG_a" \n" |
|
|
|
|
"js 1b \n" |
|
|
|
|
:: "r"(((uint8_t *) blocks) + 128 * 6) |
|
|
|
|
: "%"REG_a); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w) |
|
|
|
|
{ |
|
|
|
|
x86_reg i = 0; |
|
|
|
|