mirror of https://github.com/FFmpeg/FFmpeg.git
integration by Neil Birkbeck, with help from Vitor Sessak. core SSE2 loop by Skal (pascal.massimino@gmail.com) Reviewed-by: Clément Bœsch <u@pkh.me> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>pull/88/head
parent
53b0892005
commit
406a9ccffe
6 changed files with 251 additions and 32 deletions
@ -0,0 +1,58 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
* GNU General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU General Public License along |
||||
* with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVFILTER_IDET_H |
||||
#define AVFILTER_IDET_H |
||||
|
||||
#include "libavutil/pixdesc.h" |
||||
#include "avfilter.h" |
||||
|
||||
#define HIST_SIZE 4 |
||||
|
||||
typedef enum { |
||||
TFF, |
||||
BFF, |
||||
PROGRSSIVE, |
||||
UNDETERMINED, |
||||
} Type; |
||||
|
||||
typedef struct { |
||||
const AVClass *class; |
||||
float interlace_threshold; |
||||
float progressive_threshold; |
||||
|
||||
Type last_type; |
||||
int prestat[4]; |
||||
int poststat[4]; |
||||
|
||||
uint8_t history[HIST_SIZE]; |
||||
|
||||
AVFrame *cur; |
||||
AVFrame *next; |
||||
AVFrame *prev; |
||||
int (*filter_line)(const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w); |
||||
|
||||
const AVPixFmtDescriptor *csp; |
||||
} IDETContext; |
||||
|
||||
void ff_idet_init_x86(IDETContext *idet); |
||||
|
||||
/* main fall-back for left-over */ |
||||
int ff_idet_filter_line_c(const uint8_t *a, const uint8_t *b, const uint8_t *c, int w); |
||||
|
||||
#endif |
@ -0,0 +1,114 @@ |
||||
; ***************************************************************************** |
||||
; * x86-optimized functions for idet filter |
||||
; * |
||||
; * This file is part of FFmpeg. |
||||
; * |
||||
; * FFmpeg is free software; you can redistribute it and/or modify |
||||
; * it under the terms of the GNU General Public License as published by |
||||
; * the Free Software Foundation; either version 2 of the License, or |
||||
; * (at your option) any later version. |
||||
; * |
||||
; * FFmpeg is distributed in the hope that it will be useful, |
||||
; * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
; * GNU General Public License for more details. |
||||
; * |
||||
; * You should have received a copy of the GNU General Public License along |
||||
; * with FFmpeg; if not, write to the Free Software Foundation, Inc., |
||||
; * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
||||
; ****************************************************************************** |
||||
|
||||
%include "libavutil/x86/x86util.asm" |
||||
|
||||
SECTION_TEXT |
||||
|
||||
%if ARCH_X86_32 |
||||
|
||||
; Implementation that does 8-bytes at a time using single-word operations. |
||||
%macro IDET_FILTER_LINE 1 |
||||
INIT_MMX %1 |
||||
cglobal idet_filter_line, 4, 5, 0, a, b, c, width, index |
||||
xor indexq, indexq |
||||
%define m_zero m2 |
||||
%define m_sum m5 |
||||
pxor m_sum, m_sum |
||||
pxor m_zero, m_zero |
||||
|
||||
.loop: |
||||
movu m0, [aq + indexq*1] |
||||
punpckhbw m1, m0, m_zero |
||||
punpcklbw m0, m_zero |
||||
|
||||
movu m3, [cq + indexq*1] |
||||
punpckhbw m4, m3, m_zero |
||||
punpcklbw m3, m_zero |
||||
|
||||
paddsw m1, m4 |
||||
paddsw m0, m3 |
||||
|
||||
movu m3, [bq + indexq*1] |
||||
punpckhbw m4, m3, m_zero |
||||
punpcklbw m3, m_zero |
||||
|
||||
paddw m4, m4 |
||||
paddw m3, m3 |
||||
psubsw m1, m4 |
||||
psubsw m0, m3 |
||||
|
||||
ABS2 m1, m0, m4, m3 |
||||
|
||||
paddw m0, m1 |
||||
punpckhwd m1, m0, m_zero |
||||
punpcklwd m0, m_zero |
||||
|
||||
paddd m0, m1 |
||||
paddd m_sum, m0 |
||||
|
||||
add indexq, 0x8 |
||||
CMP widthd, indexd |
||||
jg .loop |
||||
|
||||
mova m0, m_sum |
||||
psrlq m_sum, 0x20 |
||||
paddq m0, m_sum |
||||
movd eax, m0 |
||||
RET |
||||
%endmacro |
||||
|
||||
IDET_FILTER_LINE mmxext |
||||
IDET_FILTER_LINE mmx |
||||
%endif |
||||
|
||||
; SSE2 8-bit implementation that does 16-bytes at a time: |
||||
INIT_XMM sse2 |
||||
cglobal idet_filter_line, 4, 6, 7, a, b, c, width, index, total |
||||
xor indexq, indexq |
||||
pxor m0, m0 |
||||
pxor m1, m1 |
||||
|
||||
.sse2_loop: |
||||
movu m2, [bq + indexq*1] ; B |
||||
movu m3, [aq + indexq*1] ; A |
||||
mova m6, m2 |
||||
mova m4, m3 |
||||
psubusb m5, m2, m3 ; ba |
||||
|
||||
movu m3, [cq + indexq*1] ; C |
||||
add indexq, 0x10 |
||||
psubusb m4, m2 ; ab |
||||
CMP indexd, widthd |
||||
|
||||
psubusb m6, m3 ; bc |
||||
psubusb m3, m2 ; cb |
||||
|
||||
psadbw m4, m6 ; |ab - bc| |
||||
paddq m0, m4 |
||||
psadbw m5, m3 ; |ba - cb| |
||||
paddq m1, m5 |
||||
jl .sse2_loop |
||||
|
||||
paddq m0, m1 |
||||
movhlps m1, m0 |
||||
paddq m0, m1 |
||||
movd eax, m0 |
||||
RET |
@ -0,0 +1,70 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
* GNU General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU General Public License along |
||||
* with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/mem.h" |
||||
#include "libavutil/x86/asm.h" |
||||
#include "libavutil/x86/cpu.h" |
||||
#include "libavfilter/vf_idet.h" |
||||
|
||||
/* declares main callable idet_filter_line_{mmx,mmxext,sse2}() */ |
||||
#define FUNC_MAIN_DECL(KIND, SPAN) \ |
||||
int ff_idet_filter_line_##KIND(const uint8_t *a, const uint8_t *b, \
|
||||
const uint8_t *c, int w); \
|
||||
static int idet_filter_line_##KIND(const uint8_t *a, const uint8_t *b, \
|
||||
const uint8_t *c, int w) { \
|
||||
int sum = 0; \
|
||||
const int left_over = w & (SPAN - 1); \
|
||||
w -= left_over; \
|
||||
if (w > 0) \
|
||||
sum += ff_idet_filter_line_##KIND(a, b, c, w); \
|
||||
if (left_over > 0) \
|
||||
sum += ff_idet_filter_line_c(a + w, b + w, c + w, left_over); \
|
||||
return sum; \
|
||||
} |
||||
|
||||
#if HAVE_YASM |
||||
|
||||
FUNC_MAIN_DECL(sse2, 16) |
||||
#if ARCH_X86_32 |
||||
FUNC_MAIN_DECL(mmx, 8) |
||||
FUNC_MAIN_DECL(mmxext, 8) |
||||
#endif |
||||
|
||||
#endif |
||||
|
||||
av_cold void ff_idet_init_x86(IDETContext *idet) |
||||
{ |
||||
#if HAVE_YASM |
||||
const int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
#if ARCH_X86_32 |
||||
if (EXTERNAL_MMX(cpu_flags)) { |
||||
idet->filter_line = idet_filter_line_mmx; |
||||
} |
||||
if (EXTERNAL_MMXEXT(cpu_flags)) { |
||||
idet->filter_line = idet_filter_line_mmxext; |
||||
} |
||||
#endif // ARCH_x86_32
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) { |
||||
idet->filter_line = idet_filter_line_sse2; |
||||
} |
||||
#endif // HAVE_YASM
|
||||
} |
Loading…
Reference in new issue