mirror of https://github.com/FFmpeg/FFmpeg.git
Specifically for yuv444, yuv422, yuv420 format when main stream has no alpha, and alpha is straight. Signed-off-by: Paul B Mahol <onemda@gmail.com>pull/298/head
parent
a150b2e3a0
commit
6d7c63588c
5 changed files with 313 additions and 56 deletions
@ -0,0 +1,85 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVFILTER_OVERLAY_H |
||||
#define AVFILTER_OVERLAY_H |
||||
|
||||
#include "libavutil/eval.h" |
||||
#include "libavutil/pixdesc.h" |
||||
#include "framesync.h" |
||||
#include "avfilter.h" |
||||
|
||||
enum var_name { |
||||
VAR_MAIN_W, VAR_MW, |
||||
VAR_MAIN_H, VAR_MH, |
||||
VAR_OVERLAY_W, VAR_OW, |
||||
VAR_OVERLAY_H, VAR_OH, |
||||
VAR_HSUB, |
||||
VAR_VSUB, |
||||
VAR_X, |
||||
VAR_Y, |
||||
VAR_N, |
||||
VAR_POS, |
||||
VAR_T, |
||||
VAR_VARS_NB |
||||
}; |
||||
|
||||
enum OverlayFormat { |
||||
OVERLAY_FORMAT_YUV420, |
||||
OVERLAY_FORMAT_YUV422, |
||||
OVERLAY_FORMAT_YUV444, |
||||
OVERLAY_FORMAT_RGB, |
||||
OVERLAY_FORMAT_GBRP, |
||||
OVERLAY_FORMAT_AUTO, |
||||
OVERLAY_FORMAT_NB |
||||
}; |
||||
|
||||
typedef struct OverlayContext { |
||||
const AVClass *class; |
||||
int x, y; ///< position of overlaid picture
|
||||
|
||||
uint8_t main_is_packed_rgb; |
||||
uint8_t main_rgba_map[4]; |
||||
uint8_t main_has_alpha; |
||||
uint8_t overlay_is_packed_rgb; |
||||
uint8_t overlay_rgba_map[4]; |
||||
uint8_t overlay_has_alpha; |
||||
int format; ///< OverlayFormat
|
||||
int alpha_format; |
||||
int eval_mode; ///< EvalMode
|
||||
|
||||
FFFrameSync fs; |
||||
|
||||
int main_pix_step[4]; ///< steps per pixel for each plane of the main output
|
||||
int overlay_pix_step[4]; ///< steps per pixel for each plane of the overlay
|
||||
int hsub, vsub; ///< chroma subsampling values
|
||||
const AVPixFmtDescriptor *main_desc; ///< format descriptor for main input
|
||||
|
||||
double var_values[VAR_VARS_NB]; |
||||
char *x_expr, *y_expr; |
||||
|
||||
AVExpr *x_pexpr, *y_pexpr; |
||||
|
||||
int (*blend_row[4])(uint8_t *d, uint8_t *da, uint8_t *s, uint8_t *a, int w, |
||||
ptrdiff_t alinesize); |
||||
int (*blend_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs); |
||||
} OverlayContext; |
||||
|
||||
void ff_overlay_init_x86(OverlayContext *s, int format, int alpha_format, int main_has_alpha); |
||||
|
||||
#endif /* AVFILTER_OVERLAY_H */ |
@ -0,0 +1,144 @@ |
||||
;***************************************************************************** |
||||
;* x86-optimized functions for overlay filter |
||||
;* |
||||
;* Copyright (C) 2018 Paul B Mahol |
||||
;* Copyright (C) 2018 Henrik Gramner |
||||
;* |
||||
;* This file is part of FFmpeg. |
||||
;* |
||||
;* FFmpeg is free software; you can redistribute it and/or |
||||
;* modify it under the terms of the GNU Lesser General Public |
||||
;* License as published by the Free Software Foundation; either |
||||
;* version 2.1 of the License, or (at your option) any later version. |
||||
;* |
||||
;* FFmpeg is distributed in the hope that it will be useful, |
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
;* Lesser General Public License for more details. |
||||
;* |
||||
;* You should have received a copy of the GNU Lesser General Public |
||||
;* License along with FFmpeg; if not, write to the Free Software |
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
;***************************************************************************** |
||||
|
||||
%include "libavutil/x86/x86util.asm" |
||||
|
||||
SECTION_RODATA |
||||
|
||||
pb_1: times 16 db 1 |
||||
pw_128: times 8 dw 128 |
||||
pw_255: times 8 dw 255 |
||||
pw_257: times 8 dw 257 |
||||
|
||||
SECTION .text |
||||
|
||||
INIT_XMM sse4 |
||||
cglobal overlay_row_44, 5, 7, 6, 0, d, da, s, a, w, r, x |
||||
xor xq, xq |
||||
movsxdifnidn wq, wd |
||||
mov rq, wq |
||||
and rq, mmsize/2 - 1 |
||||
cmp wq, mmsize/2 |
||||
jl .end |
||||
sub wq, rq |
||||
mova m3, [pw_255] |
||||
mova m4, [pw_128] |
||||
mova m5, [pw_257] |
||||
.loop: |
||||
pmovzxbw m0, [sq+xq] |
||||
pmovzxbw m2, [aq+xq] |
||||
pmovzxbw m1, [dq+xq] |
||||
pmullw m0, m2 |
||||
pxor m2, m3 |
||||
pmullw m1, m2 |
||||
paddw m0, m4 |
||||
paddw m0, m1 |
||||
pmulhuw m0, m5 |
||||
packuswb m0, m0 |
||||
movq [dq+xq], m0 |
||||
add xq, mmsize/2 |
||||
cmp xq, wq |
||||
jl .loop |
||||
|
||||
.end: |
||||
mov eax, xd |
||||
RET |
||||
|
||||
INIT_XMM sse4 |
||||
cglobal overlay_row_22, 5, 7, 6, 0, d, da, s, a, w, r, x |
||||
xor xq, xq |
||||
movsxdifnidn wq, wd |
||||
sub wq, 1 |
||||
mov rq, wq |
||||
and rq, mmsize/2 - 1 |
||||
cmp wq, mmsize/2 |
||||
jl .end |
||||
sub wq, rq |
||||
mova m3, [pw_255] |
||||
mova m4, [pw_128] |
||||
mova m5, [pw_257] |
||||
.loop: |
||||
pmovzxbw m0, [sq+xq] |
||||
movu m1, [aq+2*xq] |
||||
pandn m2, m3, m1 |
||||
psllw m1, 8 |
||||
pavgw m2, m1 |
||||
pavgw m2, m1 |
||||
psrlw m2, 8 |
||||
pmovzxbw m1, [dq+xq] |
||||
pmullw m0, m2 |
||||
pxor m2, m3 |
||||
pmullw m1, m2 |
||||
paddw m0, m4 |
||||
paddw m0, m1 |
||||
pmulhuw m0, m5 |
||||
packuswb m0, m0 |
||||
movq [dq+xq], m0 |
||||
add xq, mmsize/2 |
||||
cmp xq, wq |
||||
jl .loop |
||||
|
||||
.end: |
||||
mov eax, xd |
||||
RET |
||||
|
||||
INIT_XMM sse4 |
||||
cglobal overlay_row_20, 6, 7, 7, 0, d, da, s, a, w, r, x |
||||
mov daq, aq |
||||
add daq, rmp |
||||
xor xq, xq |
||||
movsxdifnidn wq, wd |
||||
sub wq, 1 |
||||
mov rq, wq |
||||
and rq, mmsize/2 - 1 |
||||
cmp wq, mmsize/2 |
||||
jl .end |
||||
sub wq, rq |
||||
mova m3, [pw_255] |
||||
mova m4, [pw_128] |
||||
mova m5, [pw_257] |
||||
mova m6, [pb_1] |
||||
.loop: |
||||
pmovzxbw m0, [sq+xq] |
||||
movu m2, [aq+2*xq] |
||||
movu m1, [daq+2*xq] |
||||
pmaddubsw m2, m6 |
||||
pmaddubsw m1, m6 |
||||
paddw m2, m1 |
||||
psrlw m2, 2 |
||||
pmovzxbw m1, [dq+xq] |
||||
pmullw m0, m2 |
||||
pxor m2, m3 |
||||
pmullw m1, m2 |
||||
paddw m0, m4 |
||||
paddw m0, m1 |
||||
pmulhuw m0, m5 |
||||
packuswb m0, m0 |
||||
movq [dq+xq], m0 |
||||
add xq, mmsize/2 |
||||
cmp xq, wq |
||||
jl .loop |
||||
|
||||
.end: |
||||
mov eax, xd |
||||
RET |
@ -0,0 +1,63 @@ |
||||
/*
|
||||
* Copyright (c) 2018 Paul B Mahol |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/x86/cpu.h" |
||||
#include "libavfilter/vf_overlay.h" |
||||
|
||||
int ff_overlay_row_44_sse4(uint8_t *d, uint8_t *da, uint8_t *s, uint8_t *a, |
||||
int w, ptrdiff_t alinesize); |
||||
|
||||
int ff_overlay_row_20_sse4(uint8_t *d, uint8_t *da, uint8_t *s, uint8_t *a, |
||||
int w, ptrdiff_t alinesize); |
||||
|
||||
int ff_overlay_row_22_sse4(uint8_t *d, uint8_t *da, uint8_t *s, uint8_t *a, |
||||
int w, ptrdiff_t alinesize); |
||||
|
||||
av_cold void ff_overlay_init_x86(OverlayContext *s, int format, int alpha_format, int main_has_alpha) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (EXTERNAL_SSE4(cpu_flags) && |
||||
(format == OVERLAY_FORMAT_YUV444 || |
||||
format == OVERLAY_FORMAT_GBRP) && |
||||
alpha_format == 0 && main_has_alpha == 0) { |
||||
s->blend_row[0] = ff_overlay_row_44_sse4; |
||||
s->blend_row[1] = ff_overlay_row_44_sse4; |
||||
s->blend_row[2] = ff_overlay_row_44_sse4; |
||||
} |
||||
|
||||
if (EXTERNAL_SSE4(cpu_flags) && |
||||
(format == OVERLAY_FORMAT_YUV420) && |
||||
alpha_format == 0 && main_has_alpha == 0) { |
||||
s->blend_row[0] = ff_overlay_row_44_sse4; |
||||
s->blend_row[1] = ff_overlay_row_20_sse4; |
||||
s->blend_row[2] = ff_overlay_row_20_sse4; |
||||
} |
||||
|
||||
if (EXTERNAL_SSE4(cpu_flags) && |
||||
(format == OVERLAY_FORMAT_YUV422) && |
||||
alpha_format == 0 && main_has_alpha == 0) { |
||||
s->blend_row[0] = ff_overlay_row_44_sse4; |
||||
s->blend_row[1] = ff_overlay_row_22_sse4; |
||||
s->blend_row[2] = ff_overlay_row_22_sse4; |
||||
} |
||||
} |
Loading…
Reference in new issue