avcodec/huffyuvdsp : add add_int16 AVX2 func

pull/272/head
Martin Vignali 7 years ago
parent 6955e8842e
commit e641c94190
  1. 5
      libavcodec/x86/huffyuvdsp.asm
  2. 5
      libavcodec/x86/huffyuvdsp_init.c
  3. 4
      libavcodec/x86/huffyuvdsp_template.asm

@ -53,6 +53,11 @@ ADD_INT16
INIT_XMM sse2 INIT_XMM sse2
ADD_INT16 ADD_INT16
%if HAVE_AVX2_EXTERNAL
INIT_YMM avx2
ADD_INT16
%endif
; void add_hfyu_left_pred_bgr32(uint8_t *dst, const uint8_t *src, ; void add_hfyu_left_pred_bgr32(uint8_t *dst, const uint8_t *src,
; intptr_t w, uint8_t *left) ; intptr_t w, uint8_t *left)
%macro LEFT_BGR32 0 %macro LEFT_BGR32 0

@ -28,6 +28,7 @@
void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w); void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w); void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
void ff_add_int16_avx2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
void ff_add_hfyu_left_pred_bgr32_mmx(uint8_t *dst, const uint8_t *src, void ff_add_hfyu_left_pred_bgr32_mmx(uint8_t *dst, const uint8_t *src,
intptr_t w, uint8_t *left); intptr_t w, uint8_t *left);
@ -53,4 +54,8 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c, enum AVPixelFormat pix
c->add_int16 = ff_add_int16_sse2; c->add_int16 = ff_add_int16_sse2;
c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_sse2; c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_sse2;
} }
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
c->add_int16 = ff_add_int16_avx2;
}
} }

@ -21,8 +21,8 @@
;****************************************************************************** ;******************************************************************************
%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub %macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub
movd m4, maskd movd xm4, maskd
SPLATW m4, m4 SPLATW m4, xm4
add wd, wd add wd, wd
test wq, 2*mmsize - 1 test wq, 2*mmsize - 1
jz %%.tomainloop jz %%.tomainloop

Loading…
Cancel
Save