diff --git a/libavcodec/lossless_videodsp.h b/libavcodec/lossless_videodsp.h index cecf0fe1e5..ccab39bac6 100644 --- a/libavcodec/lossless_videodsp.h +++ b/libavcodec/lossless_videodsp.h @@ -29,7 +29,7 @@ #include "libavutil/cpu.h" typedef struct LLVidDSPContext { - void (*add_bytes)(uint8_t *dst /* align 16 */, uint8_t *src /* align 16 */, + void (*add_bytes)(uint8_t *dst /* align 32 */, uint8_t *src /* align 32 */, ptrdiff_t w); void (*add_median_pred)(uint8_t *dst, const uint8_t *top, const uint8_t *diff, ptrdiff_t w, diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm index 443fe02951..70ed555737 100644 --- a/libavcodec/x86/lossless_videodsp.asm +++ b/libavcodec/x86/lossless_videodsp.asm @@ -217,6 +217,11 @@ ADD_BYTES INIT_XMM sse2 ADD_BYTES +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +ADD_BYTES +%endif + %macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst alignment (a/u), %2 = src alignment (a/u) add wd, wd add srcq, wq diff --git a/libavcodec/x86/lossless_videodsp_init.c b/libavcodec/x86/lossless_videodsp_init.c index 21bbd12bd2..4f20c1ce92 100644 --- a/libavcodec/x86/lossless_videodsp_init.c +++ b/libavcodec/x86/lossless_videodsp_init.c @@ -25,6 +25,7 @@ void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t w); void ff_add_bytes_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t w); +void ff_add_bytes_avx2(uint8_t *dst, uint8_t *src, ptrdiff_t w); void ff_add_median_pred_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, ptrdiff_t w, @@ -115,4 +116,7 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c) if (EXTERNAL_SSE4(cpu_flags)) { c->add_left_pred_int16 = ff_add_left_pred_int16_sse4; } + if (EXTERNAL_AVX2_FAST(cpu_flags)) { + c->add_bytes = ff_add_bytes_avx2; + } }