|
|
@ -28,7 +28,6 @@ |
|
|
|
#include "libavcodec/mpegvideo.h" |
|
|
|
#include "libavcodec/mpegvideo.h" |
|
|
|
#include "libavcodec/simple_idct.h" |
|
|
|
#include "libavcodec/simple_idct.h" |
|
|
|
#include "dsputil_mmx.h" |
|
|
|
#include "dsputil_mmx.h" |
|
|
|
#include "mmx.h" |
|
|
|
|
|
|
|
#include "vp3dsp_mmx.h" |
|
|
|
#include "vp3dsp_mmx.h" |
|
|
|
#include "vp3dsp_sse2.h" |
|
|
|
#include "vp3dsp_sse2.h" |
|
|
|
#include "vp6dsp_mmx.h" |
|
|
|
#include "vp6dsp_mmx.h" |
|
|
@ -273,22 +272,41 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size |
|
|
|
:"memory"); |
|
|
|
:"memory"); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static DECLARE_ALIGNED_8(const unsigned char, vector128[8]) = |
|
|
|
DECLARE_ALIGNED_8(const unsigned char, ff_vector128[8]) = |
|
|
|
{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; |
|
|
|
{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define put_signed_pixels_clamped_mmx_half(off) \ |
|
|
|
|
|
|
|
"movq "#off"(%2), %%mm1 \n\t"\
|
|
|
|
|
|
|
|
"movq 16+"#off"(%2), %%mm2 \n\t"\
|
|
|
|
|
|
|
|
"movq 32+"#off"(%2), %%mm3 \n\t"\
|
|
|
|
|
|
|
|
"movq 48+"#off"(%2), %%mm4 \n\t"\
|
|
|
|
|
|
|
|
"packsswb 8+"#off"(%2), %%mm1 \n\t"\
|
|
|
|
|
|
|
|
"packsswb 24+"#off"(%2), %%mm2 \n\t"\
|
|
|
|
|
|
|
|
"packsswb 40+"#off"(%2), %%mm3 \n\t"\
|
|
|
|
|
|
|
|
"packsswb 56+"#off"(%2), %%mm4 \n\t"\
|
|
|
|
|
|
|
|
"paddb %%mm0, %%mm1 \n\t"\
|
|
|
|
|
|
|
|
"paddb %%mm0, %%mm2 \n\t"\
|
|
|
|
|
|
|
|
"paddb %%mm0, %%mm3 \n\t"\
|
|
|
|
|
|
|
|
"paddb %%mm0, %%mm4 \n\t"\
|
|
|
|
|
|
|
|
"movq %%mm1, (%0) \n\t"\
|
|
|
|
|
|
|
|
"movq %%mm2, (%0, %3) \n\t"\
|
|
|
|
|
|
|
|
"movq %%mm3, (%0, %3, 2) \n\t"\
|
|
|
|
|
|
|
|
"movq %%mm4, (%0, %1) \n\t" |
|
|
|
|
|
|
|
|
|
|
|
void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) |
|
|
|
void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) |
|
|
|
{ |
|
|
|
{ |
|
|
|
int i; |
|
|
|
x86_reg line_skip = line_size; |
|
|
|
|
|
|
|
x86_reg av_uninit(line_skip3); |
|
|
|
movq_m2r(*vector128, mm1); |
|
|
|
|
|
|
|
for (i = 0; i < 8; i++) { |
|
|
|
__asm__ volatile ( |
|
|
|
movq_m2r(*(block), mm0); |
|
|
|
"movq "MANGLE(ff_vector128)", %%mm0 \n\t" |
|
|
|
packsswb_m2r(*(block + 4), mm0); |
|
|
|
"lea (%3, %3, 2), %1 \n\t" |
|
|
|
block += 8; |
|
|
|
put_signed_pixels_clamped_mmx_half(0) |
|
|
|
paddb_r2r(mm1, mm0); |
|
|
|
"lea (%0, %3, 4), %0 \n\t" |
|
|
|
movq_r2m(mm0, *pixels); |
|
|
|
put_signed_pixels_clamped_mmx_half(64) |
|
|
|
pixels += line_size; |
|
|
|
:"+r" (pixels), "+r" (line_skip3) |
|
|
|
} |
|
|
|
:"r" (block), "r"(line_skip) |
|
|
|
|
|
|
|
:"memory"); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) |
|
|
|
void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) |
|
|
|