avcodec/utvideodsp : add avx2 version for the dsp

pull/272/head
Martin Vignali 7 years ago
parent 48b7c45b0c
commit b5ebe38443
  1. 20
      libavcodec/x86/utvideodsp.asm
  2. 11
      libavcodec/x86/utvideodsp_init.c

@ -1,6 +1,7 @@
;****************************************************************************** ;******************************************************************************
;* SIMD-optimized UTVideo functions ;* SIMD-optimized UTVideo functions
;* Copyright (c) 2017 Paul B Mahol ;* Copyright (c) 2017 Paul B Mahol
;* Copyright (c) 2017 Jokyo Images
;* ;*
;* This file is part of FFmpeg. ;* This file is part of FFmpeg.
;* ;*
@ -45,7 +46,11 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
%define wq r6m %define wq r6m
%define hd r7mp %define hd r7mp
%endif %endif
%if mmsize == 32
vbroadcasti128 m3, [pb_128]
%else
mova m3, [pb_128] mova m3, [pb_128]
%endif
.nextrow: .nextrow:
mov xq, wq mov xq, wq
@ -72,6 +77,11 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
INIT_XMM sse2 INIT_XMM sse2
RESTORE_RGB_PLANES RESTORE_RGB_PLANES
%if HAVE_AVX2_EXTERNAL
INIT_YMM avx2
RESTORE_RGB_PLANES
%endif
%macro RESTORE_RGB_PLANES10 0 %macro RESTORE_RGB_PLANES10 0
cglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x cglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
shl wd, 1 shl wd, 1
@ -81,8 +91,13 @@ cglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, sr
add src_rq, wq add src_rq, wq
add src_gq, wq add src_gq, wq
add src_bq, wq add src_bq, wq
%if mmsize == 32
vbroadcasti128 m3, [pw_512]
vbroadcasti128 m4, [pw_1023]
%else
mova m3, [pw_512] mova m3, [pw_512]
mova m4, [pw_1023] mova m4, [pw_1023]
%endif
neg wq neg wq
%if ARCH_X86_64 == 0 %if ARCH_X86_64 == 0
mov wm, wq mov wm, wq
@ -117,3 +132,8 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
INIT_XMM sse2 INIT_XMM sse2
RESTORE_RGB_PLANES10 RESTORE_RGB_PLANES10
%if HAVE_AVX2_EXTERNAL
INIT_YMM avx2
RESTORE_RGB_PLANES10
%endif

@ -28,9 +28,16 @@
void ff_restore_rgb_planes_sse2(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b, void ff_restore_rgb_planes_sse2(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_r, ptrdiff_t linesize_g,
ptrdiff_t linesize_b, int width, int height); ptrdiff_t linesize_b, int width, int height);
void ff_restore_rgb_planes_avx2(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
ptrdiff_t linesize_r, ptrdiff_t linesize_g,
ptrdiff_t linesize_b, int width, int height);
void ff_restore_rgb_planes10_sse2(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b, void ff_restore_rgb_planes10_sse2(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b,
ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_r, ptrdiff_t linesize_g,
ptrdiff_t linesize_b, int width, int height); ptrdiff_t linesize_b, int width, int height);
void ff_restore_rgb_planes10_avx2(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b,
ptrdiff_t linesize_r, ptrdiff_t linesize_g,
ptrdiff_t linesize_b, int width, int height);
av_cold void ff_utvideodsp_init_x86(UTVideoDSPContext *c) av_cold void ff_utvideodsp_init_x86(UTVideoDSPContext *c)
{ {
@ -40,4 +47,8 @@ av_cold void ff_utvideodsp_init_x86(UTVideoDSPContext *c)
c->restore_rgb_planes = ff_restore_rgb_planes_sse2; c->restore_rgb_planes = ff_restore_rgb_planes_sse2;
c->restore_rgb_planes10 = ff_restore_rgb_planes10_sse2; c->restore_rgb_planes10 = ff_restore_rgb_planes10_sse2;
} }
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
c->restore_rgb_planes = ff_restore_rgb_planes_avx2;
c->restore_rgb_planes10 = ff_restore_rgb_planes10_avx2;
}
} }

Loading…
Cancel
Save