mirror of https://github.com/FFmpeg/FFmpeg.git
Blend function speedups on x86_64 Core i5 4460: ffmpeg -f lavfi -i allyuv -vf framerate=60:threads=1 -f null none C: 447548411 decicycles in Blend, 2048 runs, 0 skips SSSE3: 130020087 decicycles in Blend, 2048 runs, 0 skips AVX2: 128508221 decicycles in Blend, 2048 runs, 0 skips ffmpeg -f lavfi -i allyuv -vf format=yuv420p12,framerate=60:threads=1 -f null none C: 228932745 decicycles in Blend, 2048 runs, 0 skips SSE4: 123357781 decicycles in Blend, 2048 runs, 0 skips AVX2: 121215353 decicycles in Blend, 2048 runs, 0 skips Signed-off-by: Marton Balint <cus@passwd.hu>pull/277/head
parent
2cbe6bac03
commit
4d95c6d5d7
5 changed files with 268 additions and 54 deletions
@ -0,0 +1,74 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVFILTER_FRAMERATE_H |
||||
#define AVFILTER_FRAMERATE_H |
||||
|
||||
#include "libavutil/pixelutils.h" |
||||
#include "avfilter.h" |
||||
|
||||
#define BLEND_FUNC_PARAMS const uint8_t *src1, ptrdiff_t src1_linesize, \ |
||||
const uint8_t *src2, ptrdiff_t src2_linesize, \
|
||||
uint8_t *dst, ptrdiff_t dst_linesize, \
|
||||
ptrdiff_t width, ptrdiff_t height, \
|
||||
int factor1, int factor2, int half |
||||
|
||||
#define BLEND_FACTOR_DEPTH8 7 |
||||
#define BLEND_FACTOR_DEPTH16 15 |
||||
|
||||
typedef void (*blend_func)(BLEND_FUNC_PARAMS); |
||||
|
||||
typedef struct FrameRateContext { |
||||
const AVClass *class; |
||||
// parameters
|
||||
AVRational dest_frame_rate; ///< output frames per second
|
||||
int flags; ///< flags affecting frame rate conversion algorithm
|
||||
double scene_score; ///< score that denotes a scene change has happened
|
||||
int interp_start; ///< start of range to apply linear interpolation
|
||||
int interp_end; ///< end of range to apply linear interpolation
|
||||
|
||||
int line_size[4]; ///< bytes of pixel data per line for each plane
|
||||
int vsub; |
||||
|
||||
AVRational srce_time_base; ///< timebase of source
|
||||
AVRational dest_time_base; ///< timebase of destination
|
||||
|
||||
av_pixelutils_sad_fn sad; ///< Sum of the absolute difference function (scene detect only)
|
||||
double prev_mafd; ///< previous MAFD (scene detect only)
|
||||
|
||||
int blend_factor_max; |
||||
int bitdepth; |
||||
AVFrame *work; |
||||
|
||||
AVFrame *f0; ///< last frame
|
||||
AVFrame *f1; ///< current frame
|
||||
int64_t pts0; ///< last frame pts in dest_time_base
|
||||
int64_t pts1; ///< current frame pts in dest_time_base
|
||||
int64_t delta; ///< pts1 to pts0 delta
|
||||
double score; ///< scene change score (f0 to f1)
|
||||
int flush; ///< 1 if the filter is being flushed
|
||||
int64_t start_pts; ///< pts of the first output frame
|
||||
int64_t n; ///< output frame counter
|
||||
|
||||
blend_func blend; |
||||
} FrameRateContext; |
||||
|
||||
void ff_framerate_init(FrameRateContext *s); |
||||
void ff_framerate_init_x86(FrameRateContext *s); |
||||
|
||||
#endif /* AVFILTER_FRAMERATE_H */ |
@ -0,0 +1,134 @@ |
||||
;***************************************************************************** |
||||
;* x86-optimized functions for framerate filter |
||||
;* |
||||
;* Copyright (C) 2018 Marton Balint |
||||
;* |
||||
;* Based on vf_blend.asm, Copyright (C) 2015 Paul B Mahol |
||||
;* |
||||
;* This file is part of FFmpeg. |
||||
;* |
||||
;* FFmpeg is free software; you can redistribute it and/or |
||||
;* modify it under the terms of the GNU Lesser General Public |
||||
;* License as published by the Free Software Foundation; either |
||||
;* version 2.1 of the License, or (at your option) any later version. |
||||
;* |
||||
;* FFmpeg is distributed in the hope that it will be useful, |
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
;* Lesser General Public License for more details. |
||||
;* |
||||
;* You should have received a copy of the GNU Lesser General Public |
||||
;* License along with FFmpeg; if not, write to the Free Software |
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
;****************************************************************************** |
||||
|
||||
%include "libavutil/x86/x86util.asm" |
||||
|
||||
SECTION .text |
||||
|
||||
|
||||
%macro XSPLAT 3 |
||||
%if cpuflag(avx2) |
||||
vpbroadcast%3 %1, %2 |
||||
%else |
||||
movd %1, %2 |
||||
%ifidn %3, d |
||||
SPLATD %1 |
||||
%else |
||||
SPLATW %1, %1 |
||||
%endif |
||||
%endif |
||||
%endmacro |
||||
|
||||
|
||||
%macro BLEND_INIT 0-1 |
||||
%if ARCH_X86_64 |
||||
cglobal blend_frames%1, 6, 9, 5, src1, src1_linesize, src2, src2_linesize, dst, dst_linesize, width, end, x |
||||
mov widthd, dword widthm |
||||
%else |
||||
cglobal blend_frames%1, 5, 7, 5, src1, src1_linesize, src2, src2_linesize, dst, end, x |
||||
%define dst_linesizeq r5mp |
||||
%define widthq r6mp |
||||
%endif |
||||
mov endd, dword r7m |
||||
add src1q, widthq |
||||
add src2q, widthq |
||||
add dstq, widthq |
||||
neg widthq |
||||
%endmacro |
||||
|
||||
|
||||
%macro BLEND_LOOP 4 |
||||
.nextrow: |
||||
mov xq, widthq |
||||
|
||||
.loop: |
||||
movu m0, [src1q + xq] |
||||
movu m1, [src2q + xq] |
||||
SBUTTERFLY %1%2, 0, 1, 4 ; aAbBcCdD |
||||
; eEfFgGhH |
||||
pmadd%3 m0, m2 |
||||
pmadd%3 m1, m2 |
||||
|
||||
padd%2 m0, m3 |
||||
padd%2 m1, m3 |
||||
psrl%2 m0, %4 ; 0A0B0C0D |
||||
psrl%2 m1, %4 ; 0E0F0G0H |
||||
|
||||
packus%2%1 m0, m1 ; ABCDEFGH |
||||
movu [dstq + xq], m0 |
||||
add xq, mmsize |
||||
jl .loop |
||||
add src1q, src1_linesizeq |
||||
add src2q, src2_linesizeq |
||||
add dstq, dst_linesizeq |
||||
sub endd, 1 |
||||
jg .nextrow |
||||
REP_RET |
||||
%endmacro |
||||
|
||||
|
||||
%macro BLEND_FRAMES 0 |
||||
BLEND_INIT |
||||
|
||||
XSPLAT m2, r8m, w ; factor1 |
||||
XSPLAT m3, r9m, w ; factor2 |
||||
|
||||
psllw m3, 8 |
||||
por m2, m3 ; interleaved factors |
||||
|
||||
XSPLAT m3, r10m, w ; half |
||||
|
||||
BLEND_LOOP b, w, ubsw, 7 |
||||
%endmacro |
||||
|
||||
|
||||
%macro BLEND_FRAMES16 0 |
||||
BLEND_INIT 16 |
||||
|
||||
XSPLAT m2, r8m, d ; factor1 |
||||
XSPLAT m3, r9m, d ; factor2 |
||||
|
||||
pslld m3, 16 |
||||
por m2, m3 ; interleaved factors |
||||
|
||||
XSPLAT m3, r10m, d ; half |
||||
|
||||
BLEND_LOOP w, d, wd, 15 |
||||
%endmacro |
||||
|
||||
|
||||
INIT_XMM ssse3 |
||||
BLEND_FRAMES |
||||
|
||||
INIT_XMM sse4 |
||||
BLEND_FRAMES16 |
||||
|
||||
|
||||
%if HAVE_AVX2_EXTERNAL |
||||
|
||||
INIT_YMM avx2 |
||||
BLEND_FRAMES |
||||
BLEND_FRAMES16 |
||||
|
||||
%endif |
@ -0,0 +1,42 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/x86/cpu.h" |
||||
#include "libavfilter/framerate.h" |
||||
|
||||
void ff_blend_frames_ssse3(BLEND_FUNC_PARAMS); |
||||
void ff_blend_frames_avx2(BLEND_FUNC_PARAMS); |
||||
void ff_blend_frames16_sse4(BLEND_FUNC_PARAMS); |
||||
void ff_blend_frames16_avx2(BLEND_FUNC_PARAMS); |
||||
|
||||
void ff_framerate_init_x86(FrameRateContext *s) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
if (s->bitdepth == 8) { |
||||
if (EXTERNAL_AVX2_FAST(cpu_flags)) |
||||
s->blend = ff_blend_frames_avx2; |
||||
else if (EXTERNAL_SSSE3(cpu_flags)) |
||||
s->blend = ff_blend_frames_ssse3; |
||||
} else { |
||||
if (EXTERNAL_AVX2_FAST(cpu_flags)) |
||||
s->blend = ff_blend_frames16_avx2; |
||||
else if (EXTERNAL_SSE4(cpu_flags)) |
||||
s->blend = ff_blend_frames16_sse4; |
||||
} |
||||
} |
Loading…
Reference in new issue