mirror of https://github.com/FFmpeg/FFmpeg.git
parent
eb17bf6fd3
commit
295d99b439
5 changed files with 241 additions and 7 deletions
@ -0,0 +1,36 @@ |
||||
/*
|
||||
* Copyright (c) 2019 Paul B Mahol |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVFILTER_ATADENOISE_H |
||||
#define AVFILTER_ATADENOISE_H |
||||
|
||||
#include <stddef.h> |
||||
#include <stdint.h> |
||||
|
||||
typedef struct ATADenoiseDSPContext { |
||||
void (*filter_row)(const uint8_t *src, uint8_t *dst, |
||||
const uint8_t **srcf, |
||||
int w, int mid, int size, |
||||
int thra, int thrb); |
||||
} ATADenoiseDSPContext; |
||||
|
||||
void ff_atadenoise_init_x86(ATADenoiseDSPContext *dsp, int depth); |
||||
|
||||
#endif /* AVFILTER_ATADENOISE_H */ |
@ -0,0 +1,154 @@ |
||||
;***************************************************************************** |
||||
;* x86-optimized functions for blend filter |
||||
;* |
||||
;* Copyright (C) 2019 Paul B Mahol |
||||
;* |
||||
;* This file is part of FFmpeg. |
||||
;* |
||||
;* FFmpeg is free software; you can redistribute it and/or |
||||
;* modify it under the terms of the GNU Lesser General Public |
||||
;* License as published by the Free Software Foundation; either |
||||
;* version 2.1 of the License, or (at your option) any later version. |
||||
;* |
||||
;* FFmpeg is distributed in the hope that it will be useful, |
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
;* Lesser General Public License for more details. |
||||
;* |
||||
;* You should have received a copy of the GNU Lesser General Public |
||||
;* License along with FFmpeg; if not, write to the Free Software |
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
;****************************************************************************** |
||||
|
||||
%if ARCH_X86_64 |
||||
|
||||
%include "libavutil/x86/x86util.asm" |
||||
|
||||
SECTION_RODATA |
||||
pw_one: times 8 dw 1 |
||||
pw_ones: times 8 dw 65535 |
||||
|
||||
SECTION .text |
||||
|
||||
;------------------------------------------------------------------------------ |
||||
; void ff_filter_row(const uint8_t *src, uint8_t *dst, |
||||
; const uint8_t **srcf, |
||||
; int w, int mid, int size, |
||||
; int thra, int thrb) |
||||
;------------------------------------------------------------------------------ |
||||
|
||||
INIT_XMM sse4 |
||||
cglobal atadenoise_filter_row8, 8,10,13, src, dst, srcf, w, mid, size, i, j, srcfx, x |
||||
movsxdifnidn wq, wd |
||||
movsxdifnidn midq, midd |
||||
movsxdifnidn sizeq, sized |
||||
add srcq, wq |
||||
add dstq, wq |
||||
mov xq, wq |
||||
dec sizeq |
||||
neg xq |
||||
movd m4, r6m |
||||
SPLATW m4, m4 |
||||
movd m5, r7m |
||||
SPLATW m5, m5 |
||||
pxor m2, m2 |
||||
mova m10, [pw_ones] |
||||
|
||||
.loop: |
||||
mov iq, midq |
||||
mov jq, midq |
||||
pxor m3, m3 |
||||
pxor m11, m11 |
||||
movu m0, [srcq + xq] |
||||
punpcklbw m0, m2 |
||||
mova m7, m0 |
||||
mova m8, [pw_one] |
||||
mova m12, [pw_ones] |
||||
|
||||
.loop0: |
||||
inc iq |
||||
dec jq |
||||
|
||||
mov srcfxq, [srcfq + jq * 8] |
||||
add srcfxq, wq |
||||
|
||||
movu m1, [srcfxq + xq] |
||||
punpcklbw m1, m2 |
||||
mova m9, m1 |
||||
psubw m1, m0 |
||||
pabsw m1, m1 |
||||
paddw m11, m1 |
||||
pcmpgtw m1, m4 |
||||
mova m6, m11 |
||||
pcmpgtw m6, m5 |
||||
por m6, m1 |
||||
pxor m6, m10 |
||||
pand m12, m6 |
||||
pand m9, m12 |
||||
paddw m7, m9 |
||||
mova m6, m12 |
||||
psrlw m6, 15 |
||||
paddw m8, m6 |
||||
|
||||
mov srcfxq, [srcfq + iq * 8] |
||||
add srcfxq, wq |
||||
|
||||
movu m1, [srcfxq + xq] |
||||
punpcklbw m1, m2 |
||||
mova m9, m1 |
||||
psubw m1, m0 |
||||
pabsw m1, m1 |
||||
paddw m3, m1 |
||||
pcmpgtw m1, m4 |
||||
mova m6, m3 |
||||
pcmpgtw m6, m5 |
||||
por m6, m1 |
||||
pxor m6, m10 |
||||
pand m12, m6 |
||||
pand m9, m12 |
||||
paddw m7, m9 |
||||
mova m6, m12 |
||||
psrlw m6, 15 |
||||
paddw m8, m6 |
||||
|
||||
ptest m12, m12 |
||||
jz .finish |
||||
|
||||
cmp iq, sizeq |
||||
jl .loop0 |
||||
|
||||
.finish: |
||||
mova m9, m8 |
||||
psrlw m9, 1 |
||||
paddw m7, m9 |
||||
|
||||
mova m1, m7 |
||||
mova m6, m8 |
||||
|
||||
punpcklwd m7, m2 |
||||
punpcklwd m8, m2 |
||||
cvtdq2ps m7, m7 |
||||
cvtdq2ps m8, m8 |
||||
divps m7, m8 |
||||
cvttps2dq m7, m7 |
||||
packssdw m7, m7 |
||||
packuswb m7, m7 |
||||
|
||||
movd [dstq + xq], m7 |
||||
|
||||
punpckhwd m1, m2 |
||||
punpckhwd m6, m2 |
||||
cvtdq2ps m1, m1 |
||||
cvtdq2ps m6, m6 |
||||
divps m1, m6 |
||||
cvttps2dq m1, m1 |
||||
packssdw m1, m1 |
||||
packuswb m1, m1 |
||||
|
||||
movd [dstq + xq + 4], m1 |
||||
|
||||
add xq, mmsize/2 |
||||
jl .loop |
||||
RET |
||||
|
||||
%endif |
@ -0,0 +1,40 @@ |
||||
/*
|
||||
* Copyright (C) 2019 Paul B Mahol |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/mem.h" |
||||
#include "libavutil/x86/asm.h" |
||||
#include "libavutil/x86/cpu.h" |
||||
#include "libavfilter/atadenoise.h" |
||||
|
||||
void ff_atadenoise_filter_row8_sse4(const uint8_t *src, uint8_t *dst, |
||||
const uint8_t **srcf, |
||||
int w, int mid, int size, |
||||
int thra, int thrb); |
||||
|
||||
av_cold void ff_atadenoise_init_x86(ATADenoiseDSPContext *dsp, int depth) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags) && depth <= 8) { |
||||
dsp->filter_row = ff_atadenoise_filter_row8_sse4; |
||||
} |
||||
} |
Loading…
Reference in new issue