x86/dsputilenc: implement SSE2 version of diff_pixels

Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
pull/72/head
James Almer 11 years ago committed by Michael Niedermayer
parent a0c5cd3475
commit e64e079ece
  1. 25
      libavcodec/x86/dsputilenc.asm
  2. 3
      libavcodec/x86/dsputilenc_mmx.c

@ -419,6 +419,31 @@ cglobal diff_pixels, 4,5
jne .loop
REP_RET
INIT_XMM sse2
cglobal diff_pixels, 4, 5, 5
movsxdifnidn r3, r3d
pxor m4, m4
add r0, 128
mov r4, -128
.loop:
movh m0, [r1]
movh m2, [r2]
movh m1, [r1+r3]
movh m3, [r2+r3]
punpcklbw m0, m4
punpcklbw m1, m4
punpcklbw m2, m4
punpcklbw m3, m4
psubw m0, m2
psubw m1, m3
mova [r0+r4+0 ], m0
mova [r0+r4+16], m1
lea r1, [r1+r3*2]
lea r2, [r2+r3*2]
add r4, 32
jne .loop
RET
INIT_MMX mmx
; int ff_pix_sum16_mmx(uint8_t *pix, int line_size)
cglobal pix_sum16, 2, 3

@ -36,6 +36,8 @@ void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size);
void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size);
void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
int stride);
void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
int stride);
int ff_pix_sum16_mmx(uint8_t *pix, int line_size);
int ff_pix_norm1_mmx(uint8_t *pix, int line_size);
int ff_sum_abs_dctelem_mmx(int16_t *block);
@ -971,6 +973,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
if (EXTERNAL_SSE2(cpu_flags)) {
c->sse[0] = ff_sse16_sse2;
c->sum_abs_dctelem = ff_sum_abs_dctelem_sse2;
c->diff_pixels = ff_diff_pixels_sse2;
#if HAVE_ALIGNED_STACK
c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2;

Loading…
Cancel
Save