From b5ebe38443542c4d6ca285026670512da482d8e5 Mon Sep 17 00:00:00 2001 From: Martin Vignali Date: Tue, 21 Nov 2017 08:34:20 +0100 Subject: [PATCH] avcodec/utvideodsp : add avx2 version for the dsp --- libavcodec/x86/utvideodsp.asm | 20 ++++++++++++++++++++ libavcodec/x86/utvideodsp_init.c | 11 +++++++++++ 2 files changed, 31 insertions(+) diff --git a/libavcodec/x86/utvideodsp.asm b/libavcodec/x86/utvideodsp.asm index 55ef127ccb..b67a509dda 100644 --- a/libavcodec/x86/utvideodsp.asm +++ b/libavcodec/x86/utvideodsp.asm @@ -1,6 +1,7 @@ ;****************************************************************************** ;* SIMD-optimized UTVideo functions ;* Copyright (c) 2017 Paul B Mahol +;* Copyright (c) 2017 Jokyo Images ;* ;* This file is part of FFmpeg. ;* @@ -45,7 +46,11 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x %define wq r6m %define hd r7mp %endif +%if mmsize == 32 + vbroadcasti128 m3, [pb_128] +%else mova m3, [pb_128] +%endif .nextrow: mov xq, wq @@ -72,6 +77,11 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x INIT_XMM sse2 RESTORE_RGB_PLANES +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +RESTORE_RGB_PLANES +%endif + %macro RESTORE_RGB_PLANES10 0 cglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x shl wd, 1 @@ -81,8 +91,13 @@ cglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, sr add src_rq, wq add src_gq, wq add src_bq, wq +%if mmsize == 32 + vbroadcasti128 m3, [pw_512] + vbroadcasti128 m4, [pw_1023] +%else mova m3, [pw_512] mova m4, [pw_1023] +%endif neg wq %if ARCH_X86_64 == 0 mov wm, wq @@ -117,3 +132,8 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x INIT_XMM sse2 RESTORE_RGB_PLANES10 + +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +RESTORE_RGB_PLANES10 +%endif diff --git a/libavcodec/x86/utvideodsp_init.c b/libavcodec/x86/utvideodsp_init.c index f8b2a9b074..2b436c6c5c 100644 --- a/libavcodec/x86/utvideodsp_init.c +++ b/libavcodec/x86/utvideodsp_init.c @@ -28,9 +28,16 @@ void ff_restore_rgb_planes_sse2(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b, ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b, int width, int height); +void ff_restore_rgb_planes_avx2(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b, + ptrdiff_t linesize_r, ptrdiff_t linesize_g, + ptrdiff_t linesize_b, int width, int height); + void ff_restore_rgb_planes10_sse2(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b, ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b, int width, int height); +void ff_restore_rgb_planes10_avx2(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b, + ptrdiff_t linesize_r, ptrdiff_t linesize_g, + ptrdiff_t linesize_b, int width, int height); av_cold void ff_utvideodsp_init_x86(UTVideoDSPContext *c) { @@ -40,4 +47,8 @@ av_cold void ff_utvideodsp_init_x86(UTVideoDSPContext *c) c->restore_rgb_planes = ff_restore_rgb_planes_sse2; c->restore_rgb_planes10 = ff_restore_rgb_planes10_sse2; } + if (EXTERNAL_AVX2_FAST(cpu_flags)) { + c->restore_rgb_planes = ff_restore_rgb_planes_avx2; + c->restore_rgb_planes10 = ff_restore_rgb_planes10_avx2; + } }