mirror of https://github.com/FFmpeg/FFmpeg.git
parent
d11d78facb
commit
5024a82e95
5 changed files with 432 additions and 55 deletions
@ -0,0 +1,72 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVFILTER_BWDIF_H |
||||
#define AVFILTER_BWDIF_H |
||||
|
||||
#include "libavutil/pixdesc.h" |
||||
#include "avfilter.h" |
||||
|
||||
enum BWDIFMode { |
||||
BWDIF_MODE_SEND_FRAME = 0, ///< send 1 frame for each frame
|
||||
BWDIF_MODE_SEND_FIELD = 1, ///< send 1 frame for each field
|
||||
}; |
||||
|
||||
enum BWDIFParity { |
||||
BWDIF_PARITY_TFF = 0, ///< top field first
|
||||
BWDIF_PARITY_BFF = 1, ///< bottom field first
|
||||
BWDIF_PARITY_AUTO = -1, ///< auto detection
|
||||
}; |
||||
|
||||
enum BWDIFDeint { |
||||
BWDIF_DEINT_ALL = 0, ///< deinterlace all frames
|
||||
BWDIF_DEINT_INTERLACED = 1, ///< only deinterlace frames marked as interlaced
|
||||
}; |
||||
|
||||
typedef struct BWDIFContext { |
||||
const AVClass *class; |
||||
|
||||
int mode; ///< BWDIFMode
|
||||
int parity; ///< BWDIFParity
|
||||
int deint; ///< BWDIFDeint
|
||||
|
||||
int frame_pending; |
||||
|
||||
AVFrame *cur; |
||||
AVFrame *next; |
||||
AVFrame *prev; |
||||
AVFrame *out; |
||||
|
||||
void (*filter_intra)(void *dst1, void *cur1, int w, int prefs, int mrefs, |
||||
int prefs3, int mrefs3, int parity, int clip_max); |
||||
void (*filter_line)(void *dst, void *prev, void *cur, void *next, |
||||
int w, int prefs, int mrefs, int prefs2, int mrefs2, |
||||
int prefs3, int mrefs3, int prefs4, int mrefs4, |
||||
int parity, int clip_max); |
||||
void (*filter_edge)(void *dst, void *prev, void *cur, void *next, |
||||
int w, int prefs, int mrefs, int prefs2, int mrefs2, |
||||
int parity, int clip_max, int spat); |
||||
|
||||
const AVPixFmtDescriptor *csp; |
||||
int inter_field; |
||||
int eof; |
||||
} BWDIFContext; |
||||
|
||||
void ff_bwdif_init_x86(BWDIFContext *bwdif); |
||||
|
||||
#endif /* AVFILTER_BWDIF_H */ |
@ -0,0 +1,266 @@ |
||||
;***************************************************************************** |
||||
;* x86-optimized functions for bwdif filter |
||||
;* |
||||
;* Copyright (C) 2016 Thomas Mundt <loudmax@yahoo.de> |
||||
;* |
||||
;* This file is part of FFmpeg. |
||||
;* |
||||
;* FFmpeg is free software; you can redistribute it and/or |
||||
;* modify it under the terms of the GNU Lesser General Public |
||||
;* License as published by the Free Software Foundation; either |
||||
;* version 2.1 of the License, or (at your option) any later version. |
||||
;* |
||||
;* FFmpeg is distributed in the hope that it will be useful, |
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
;* Lesser General Public License for more details. |
||||
;* |
||||
;* You should have received a copy of the GNU Lesser General Public |
||||
;* License along with FFmpeg; if not, write to the Free Software |
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
;****************************************************************************** |
||||
|
||||
%include "libavutil/x86/x86util.asm" |
||||
|
||||
SECTION_RODATA |
||||
|
||||
pw_coefhf: times 4 dw 1016, 5570 |
||||
pw_coefhf1: times 8 dw -3801 |
||||
pw_coefsp: times 4 dw 5077, -981 |
||||
pw_splfdif: times 4 dw -768, 768 |
||||
|
||||
SECTION .text |
||||
|
||||
%macro LOAD8 2 |
||||
movh %1, %2 |
||||
punpcklbw %1, m7 |
||||
%endmacro |
||||
|
||||
%macro LOAD12 2 |
||||
movu %1, %2 |
||||
%endmacro |
||||
|
||||
%macro DISP8 0 |
||||
packuswb m2, m2 |
||||
movh [dstq], m2 |
||||
%endmacro |
||||
|
||||
%macro DISP12 0 |
||||
CLIPW m2, m7, m12 |
||||
movu [dstq], m2 |
||||
%endmacro |
||||
|
||||
%macro FILTER 5 |
||||
pxor m7, m7 |
||||
.loop%1: |
||||
LOAD%4 m0, [curq+t0*%5] |
||||
LOAD%4 m1, [curq+t1*%5] |
||||
LOAD%4 m2, [%2] |
||||
LOAD%4 m3, [%3] |
||||
mova m4, m3 |
||||
paddw m3, m2 |
||||
psubw m2, m4 |
||||
ABS1 m2, m4 |
||||
mova m8, m3 |
||||
mova m9, m2 |
||||
LOAD%4 m3, [prevq+t0*%5] |
||||
LOAD%4 m4, [prevq+t1*%5] |
||||
psubw m3, m0 |
||||
psubw m4, m1 |
||||
ABS2 m3, m4, m5, m6 |
||||
paddw m3, m4 |
||||
psrlw m2, 1 |
||||
psrlw m3, 1 |
||||
pmaxsw m2, m3 |
||||
LOAD%4 m3, [nextq+t0*%5] |
||||
LOAD%4 m4, [nextq+t1*%5] |
||||
psubw m3, m0 |
||||
psubw m4, m1 |
||||
ABS2 m3, m4, m5, m6 |
||||
paddw m3, m4 |
||||
psrlw m3, 1 |
||||
pmaxsw m2, m3 |
||||
|
||||
LOAD%4 m3, [%2+t0*2*%5] |
||||
LOAD%4 m4, [%3+t0*2*%5] |
||||
LOAD%4 m5, [%2+t1*2*%5] |
||||
LOAD%4 m6, [%3+t1*2*%5] |
||||
paddw m3, m4 |
||||
paddw m5, m6 |
||||
mova m6, m3 |
||||
paddw m6, m5 |
||||
mova m10, m6 |
||||
psrlw m3, 1 |
||||
psrlw m5, 1 |
||||
psubw m3, m0 |
||||
psubw m5, m1 |
||||
mova m6, m3 |
||||
pminsw m3, m5 |
||||
pmaxsw m5, m6 |
||||
mova m4, m8 |
||||
psraw m4, 1 |
||||
mova m6, m4 |
||||
psubw m6, m0 |
||||
psubw m4, m1 |
||||
pmaxsw m3, m6 |
||||
pminsw m5, m6 |
||||
pmaxsw m3, m4 |
||||
pminsw m5, m4 |
||||
mova m6, m7 |
||||
psubw m6, m3 |
||||
pmaxsw m6, m5 |
||||
mova m3, m2 |
||||
pcmpgtw m3, m7 |
||||
pand m6, m3 |
||||
pmaxsw m2, m6 |
||||
mova m11, m2 |
||||
|
||||
LOAD%4 m2, [%2+t0*4*%5] |
||||
LOAD%4 m3, [%3+t0*4*%5] |
||||
LOAD%4 m4, [%2+t1*4*%5] |
||||
LOAD%4 m5, [%3+t1*4*%5] |
||||
paddw m2, m3 |
||||
paddw m4, m5 |
||||
paddw m2, m4 |
||||
mova m3, m2 |
||||
punpcklwd m2, m8 |
||||
punpckhwd m3, m8 |
||||
pmaddwd m2, [pw_coefhf] |
||||
pmaddwd m3, [pw_coefhf] |
||||
mova m4, m10 |
||||
mova m6, m4 |
||||
pmullw m4, [pw_coefhf1] |
||||
pmulhw m6, [pw_coefhf1] |
||||
mova m5, m4 |
||||
punpcklwd m4, m6 |
||||
punpckhwd m5, m6 |
||||
paddd m2, m4 |
||||
paddd m3, m5 |
||||
psrad m2, 2 |
||||
psrad m3, 2 |
||||
|
||||
mova m4, m0 |
||||
paddw m0, m1 |
||||
%if ARCH_X86_64 |
||||
LOAD%4 m5, [curq+t2*%5] |
||||
LOAD%4 m6, [curq+t3*%5] |
||||
%else |
||||
mov r4, prefs3mp |
||||
mov r5, mrefs3mp |
||||
LOAD%4 m5, [curq+t0*%5] |
||||
LOAD%4 m6, [curq+t1*%5] |
||||
mov r4, prefsmp |
||||
mov r5, mrefsmp |
||||
%endif |
||||
paddw m6, m5 |
||||
psubw m1, m4 |
||||
ABS1 m1, m4 |
||||
pcmpgtw m1, m9 |
||||
mova m4, m1 |
||||
punpcklwd m1, m4 |
||||
punpckhwd m4, m4 |
||||
pand m2, m1 |
||||
pand m3, m4 |
||||
mova m5, [pw_splfdif] |
||||
mova m7, m5 |
||||
pand m5, m1 |
||||
pand m7, m4 |
||||
paddw m5, [pw_coefsp] |
||||
paddw m7, [pw_coefsp] |
||||
mova m4, m0 |
||||
punpcklwd m0, m6 |
||||
punpckhwd m4, m6 |
||||
pmaddwd m0, m5 |
||||
pmaddwd m4, m7 |
||||
paddd m2, m0 |
||||
paddd m3, m4 |
||||
psrad m2, 13 |
||||
psrad m3, 13 |
||||
packssdw m2, m3 |
||||
|
||||
mova m4, m8 |
||||
psraw m4, 1 |
||||
mova m0, m11 |
||||
mova m3, m4 |
||||
psubw m4, m0 |
||||
paddw m3, m0 |
||||
CLIPW m2, m4, m3 |
||||
pxor m7, m7 |
||||
DISP%4 |
||||
|
||||
add dstq, STEP |
||||
add prevq, STEP |
||||
add curq, STEP |
||||
add nextq, STEP |
||||
sub DWORD wm, mmsize/2 |
||||
jg .loop%1 |
||||
%endmacro |
||||
|
||||
%macro PROC 2 |
||||
%if ARCH_X86_64 |
||||
movsxd r5, DWORD prefsm |
||||
movsxd r6, DWORD mrefsm |
||||
movsxd r7, DWORD prefs3m |
||||
movsxd r8, DWORD mrefs3m |
||||
DECLARE_REG_TMP 5, 6, 7, 8 |
||||
%else |
||||
%define m8 [rsp+ 0] |
||||
%define m9 [rsp+16] |
||||
%define m10 [rsp+32] |
||||
%define m11 [rsp+48] |
||||
mov r4, prefsmp |
||||
mov r5, mrefsmp |
||||
DECLARE_REG_TMP 4, 5 |
||||
%endif |
||||
cmp DWORD paritym, 0 |
||||
je .parity0 |
||||
FILTER 1, prevq, curq, %1, %2 |
||||
jmp .ret |
||||
.parity0: |
||||
FILTER 0, curq, nextq, %1, %2 |
||||
.ret: |
||||
RET |
||||
%endmacro |
||||
|
||||
%macro BWDIF 0 |
||||
%if ARCH_X86_64 |
||||
cglobal bwdif_filter_line, 4, 9, 12, 0, dst, prev, cur, next, w, prefs, \ |
||||
mrefs, prefs2, mrefs2, prefs3, mrefs3, \ |
||||
prefs4, mrefs4, parity, clip_max |
||||
%else |
||||
cglobal bwdif_filter_line, 4, 6, 8, 64, dst, prev, cur, next, w, prefs, \ |
||||
mrefs, prefs2, mrefs2, prefs3, mrefs3, \ |
||||
prefs4, mrefs4, parity, clip_max |
||||
%endif |
||||
%define STEP mmsize/2 |
||||
PROC 8, 1 |
||||
|
||||
%if ARCH_X86_64 |
||||
cglobal bwdif_filter_line_12bit, 4, 9, 13, 0, dst, prev, cur, next, w, \ |
||||
prefs, mrefs, prefs2, mrefs2, \ |
||||
prefs3, mrefs3, prefs4, \ |
||||
mrefs4, parity, clip_max |
||||
movd m12, DWORD clip_maxm |
||||
SPLATW m12, m12, 0 |
||||
%else |
||||
cglobal bwdif_filter_line_12bit, 4, 6, 8, 80, dst, prev, cur, next, w, \ |
||||
prefs, mrefs, prefs2, mrefs2, \ |
||||
prefs3, mrefs3, prefs4, \ |
||||
mrefs4, parity, clip_max |
||||
%define m12 [rsp+64] |
||||
movd m0, DWORD clip_maxm |
||||
SPLATW m0, m0, 0 |
||||
mova m12, m0 |
||||
%endif |
||||
%define STEP mmsize |
||||
PROC 12, 2 |
||||
%endmacro |
||||
|
||||
INIT_XMM ssse3 |
||||
BWDIF |
||||
INIT_XMM sse2 |
||||
BWDIF |
||||
%if ARCH_X86_32 |
||||
INIT_MMX mmxext |
||||
BWDIF |
||||
%endif |
@ -0,0 +1,78 @@ |
||||
/*
|
||||
* Copyright (C) 2016 Thomas Mundt <loudmax@yahoo.de> |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/mem.h" |
||||
#include "libavutil/x86/asm.h" |
||||
#include "libavutil/x86/cpu.h" |
||||
#include "libavfilter/bwdif.h" |
||||
|
||||
void ff_bwdif_filter_line_mmxext(void *dst, void *prev, void *cur, void *next, |
||||
int w, int prefs, int mrefs, int prefs2, |
||||
int mrefs2, int prefs3, int mrefs3, int prefs4, |
||||
int mrefs4, int parity, int clip_max); |
||||
void ff_bwdif_filter_line_sse2(void *dst, void *prev, void *cur, void *next, |
||||
int w, int prefs, int mrefs, int prefs2, |
||||
int mrefs2, int prefs3, int mrefs3, int prefs4, |
||||
int mrefs4, int parity, int clip_max); |
||||
void ff_bwdif_filter_line_ssse3(void *dst, void *prev, void *cur, void *next, |
||||
int w, int prefs, int mrefs, int prefs2, |
||||
int mrefs2, int prefs3, int mrefs3, int prefs4, |
||||
int mrefs4, int parity, int clip_max); |
||||
|
||||
void ff_bwdif_filter_line_12bit_mmxext(void *dst, void *prev, void *cur, void *next, |
||||
int w, int prefs, int mrefs, int prefs2, |
||||
int mrefs2, int prefs3, int mrefs3, int prefs4, |
||||
int mrefs4, int parity, int clip_max); |
||||
void ff_bwdif_filter_line_12bit_sse2(void *dst, void *prev, void *cur, void *next, |
||||
int w, int prefs, int mrefs, int prefs2, |
||||
int mrefs2, int prefs3, int mrefs3, int prefs4, |
||||
int mrefs4, int parity, int clip_max); |
||||
void ff_bwdif_filter_line_12bit_ssse3(void *dst, void *prev, void *cur, void *next, |
||||
int w, int prefs, int mrefs, int prefs2, |
||||
int mrefs2, int prefs3, int mrefs3, int prefs4, |
||||
int mrefs4, int parity, int clip_max); |
||||
|
||||
av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
int bit_depth = (!bwdif->csp) ? 8 : bwdif->csp->comp[0].depth; |
||||
|
||||
if (bit_depth <= 8) { |
||||
#if ARCH_X86_32 |
||||
if (EXTERNAL_MMXEXT(cpu_flags)) |
||||
bwdif->filter_line = ff_bwdif_filter_line_mmxext; |
||||
#endif /* ARCH_X86_32 */ |
||||
if (EXTERNAL_SSE2(cpu_flags)) |
||||
bwdif->filter_line = ff_bwdif_filter_line_sse2; |
||||
if (EXTERNAL_SSSE3(cpu_flags)) |
||||
bwdif->filter_line = ff_bwdif_filter_line_ssse3; |
||||
} else if (bit_depth <= 12) { |
||||
#if ARCH_X86_32 |
||||
if (EXTERNAL_MMXEXT(cpu_flags)) |
||||
bwdif->filter_line = ff_bwdif_filter_line_12bit_mmxext; |
||||
#endif /* ARCH_X86_32 */ |
||||
if (EXTERNAL_SSE2(cpu_flags)) |
||||
bwdif->filter_line = ff_bwdif_filter_line_12bit_sse2; |
||||
if (EXTERNAL_SSSE3(cpu_flags)) |
||||
bwdif->filter_line = ff_bwdif_filter_line_12bit_ssse3; |
||||
} |
||||
} |
Loading…
Reference in new issue