diff --git a/libavfilter/x86/vf_gblur.asm b/libavfilter/x86/vf_gblur.asm index 4d84e6d011..276fe347f5 100644 --- a/libavfilter/x86/vf_gblur.asm +++ b/libavfilter/x86/vf_gblur.asm @@ -194,19 +194,17 @@ cglobal postscale_slice, 2, 2, 4, ptr, length, postscale, min, max VBROADCASTSS m1, minm VBROADCASTSS m2, maxm %elif WIN64 - SWAP 0, 2 - SWAP 1, 3 - VBROADCASTSS m0, xm0 - VBROADCASTSS m1, xm1 + VBROADCASTSS m0, xmm2 + VBROADCASTSS m1, xmm3 VBROADCASTSS m2, maxm -%else ; UNIX64 - VBROADCASTSS m0, xm0 - VBROADCASTSS m1, xm1 - VBROADCASTSS m2, xm2 +%else ; UNIX + VBROADCASTSS m0, xmm0 + VBROADCASTSS m1, xmm1 + VBROADCASTSS m2, xmm2 %endif .loop: -%if cpuflag(avx2) +%if cpuflag(avx2) || cpuflag(avx512) mulps m3, m0, [ptrq + lengthq] %else movu m3, [ptrq + lengthq] @@ -229,3 +227,8 @@ POSTSCALE_SLICE INIT_YMM avx2 POSTSCALE_SLICE %endif + +%if HAVE_AVX512_EXTERNAL +INIT_ZMM avx512 +POSTSCALE_SLICE +%endif diff --git a/libavfilter/x86/vf_gblur_init.c b/libavfilter/x86/vf_gblur_init.c index d80fb46fe4..34aba4ca6e 100644 --- a/libavfilter/x86/vf_gblur_init.c +++ b/libavfilter/x86/vf_gblur_init.c @@ -29,6 +29,7 @@ void ff_horiz_slice_avx2(float *ptr, int width, int height, int steps, float nu, void ff_postscale_slice_sse(float *ptr, int length, float postscale, float min, float max); void ff_postscale_slice_avx2(float *ptr, int length, float postscale, float min, float max); +void ff_postscale_slice_avx512(float *ptr, int length, float postscale, float min, float max); av_cold void ff_gblur_init_x86(GBlurContext *s) { @@ -47,5 +48,8 @@ av_cold void ff_gblur_init_x86(GBlurContext *s) if (EXTERNAL_AVX2(cpu_flags)) { s->horiz_slice = ff_horiz_slice_avx2; } + if (EXTERNAL_AVX512(cpu_flags)) { + s->postscale_slice = ff_postscale_slice_avx512; + } #endif }