libavfilter/x86/vf_gblur: add ff_postscale_slice_avx512()

Co-authored-by: Cheng Yanfei <yanfei.cheng@intel.com>
Co-authored-by: Jin Jun <jun.i.jin@intel.com>
Signed-off-by: Wu Jianhua <jianhua.wu@intel.com>
pull/365/head
Wu Jianhua 4 years ago committed by Paul B Mahol
parent fdc0bb78fe
commit 4a5e24721c
  1. 21
      libavfilter/x86/vf_gblur.asm
  2. 4
      libavfilter/x86/vf_gblur_init.c

@ -194,19 +194,17 @@ cglobal postscale_slice, 2, 2, 4, ptr, length, postscale, min, max
VBROADCASTSS m1, minm
VBROADCASTSS m2, maxm
%elif WIN64
SWAP 0, 2
SWAP 1, 3
VBROADCASTSS m0, xm0
VBROADCASTSS m1, xm1
VBROADCASTSS m0, xmm2
VBROADCASTSS m1, xmm3
VBROADCASTSS m2, maxm
%else ; UNIX64
VBROADCASTSS m0, xm0
VBROADCASTSS m1, xm1
VBROADCASTSS m2, xm2
%else ; UNIX
VBROADCASTSS m0, xmm0
VBROADCASTSS m1, xmm1
VBROADCASTSS m2, xmm2
%endif
.loop:
%if cpuflag(avx2)
%if cpuflag(avx2) || cpuflag(avx512)
mulps m3, m0, [ptrq + lengthq]
%else
movu m3, [ptrq + lengthq]
@ -229,3 +227,8 @@ POSTSCALE_SLICE
INIT_YMM avx2
POSTSCALE_SLICE
%endif
%if HAVE_AVX512_EXTERNAL
INIT_ZMM avx512
POSTSCALE_SLICE
%endif

@ -29,6 +29,7 @@ void ff_horiz_slice_avx2(float *ptr, int width, int height, int steps, float nu,
void ff_postscale_slice_sse(float *ptr, int length, float postscale, float min, float max);
void ff_postscale_slice_avx2(float *ptr, int length, float postscale, float min, float max);
void ff_postscale_slice_avx512(float *ptr, int length, float postscale, float min, float max);
av_cold void ff_gblur_init_x86(GBlurContext *s)
{
@ -47,5 +48,8 @@ av_cold void ff_gblur_init_x86(GBlurContext *s)
if (EXTERNAL_AVX2(cpu_flags)) {
s->horiz_slice = ff_horiz_slice_avx2;
}
if (EXTERNAL_AVX512(cpu_flags)) {
s->postscale_slice = ff_postscale_slice_avx512;
}
#endif
}

Loading…
Cancel
Save