mirror of https://github.com/FFmpeg/FFmpeg.git
Performance improvements: quant_bands: with: 681 decicycles in quant_bands, 8388453 runs, 155 skips without: 1190 decicycles in quant_bands, 8388386 runs, 222 skips Around 42% for the function Twoloop coder: abs_pow34: with/without: 7.82s/8.17s Around 4% for the entire encoder Both: with/without: 7.15s/8.17s Around 12% for the entire encoder Fast coder: abs_pow34: with/without: 3.40s/3.77s Around 10% for the entire encoder Both: with/without: 3.02s/3.77s Around 20% faster for the entire encoder Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com> Tested-by: Michael Niedermayer <michael@niedermayer.cc> Reviewed-by: James Almer <jamrial@gmail.com>pull/238/head
parent
3b02f6dd7b
commit
d2ae5f77c6
13 changed files with 170 additions and 26 deletions
@ -0,0 +1,86 @@ |
||||
;****************************************************************************** |
||||
;* SIMD optimized AAC encoder DSP functions |
||||
;* |
||||
;* Copyright (C) 2016 Rostislav Pehlivanov <atomnuker@gmail.com> |
||||
;* |
||||
;* This file is part of FFmpeg. |
||||
;* |
||||
;* FFmpeg is free software; you can redistribute it and/or |
||||
;* modify it under the terms of the GNU Lesser General Public |
||||
;* License as published by the Free Software Foundation; either |
||||
;* version 2.1 of the License, or (at your option) any later version. |
||||
;* |
||||
;* FFmpeg is distributed in the hope that it will be useful, |
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
;* Lesser General Public License for more details. |
||||
;* |
||||
;* You should have received a copy of the GNU Lesser General Public |
||||
;* License along with FFmpeg; if not, write to the Free Software |
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
;****************************************************************************** |
||||
|
||||
%include "libavutil/x86/x86util.asm" |
||||
|
||||
SECTION_RODATA |
||||
|
||||
float_abs_mask: times 4 dd 0x7fffffff |
||||
|
||||
SECTION .text |
||||
|
||||
;******************************************************************* |
||||
;void ff_abs_pow34(float *out, const float *in, const int size); |
||||
;******************************************************************* |
||||
INIT_XMM sse |
||||
cglobal abs_pow34, 3, 3, 3, out, in, size |
||||
mova m2, [float_abs_mask] |
||||
shl sizeq, 2 |
||||
add inq, sizeq |
||||
add outq, sizeq |
||||
neg sizeq |
||||
.loop: |
||||
andps m0, m2, [inq+sizeq] |
||||
sqrtps m1, m0 |
||||
mulps m0, m1 |
||||
sqrtps m0, m0 |
||||
mova [outq+sizeq], m0 |
||||
add sizeq, mmsize |
||||
jl .loop |
||||
RET |
||||
|
||||
;******************************************************************* |
||||
;void ff_aac_quantize_bands(int *out, const float *in, const float *scaled, |
||||
; int size, int is_signed, int maxval, const float Q34, |
||||
; const float rounding) |
||||
;******************************************************************* |
||||
INIT_XMM sse2 |
||||
cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q34, rounding |
||||
%if UNIX64 == 0 |
||||
movss m0, Q34m |
||||
movss m1, roundingm |
||||
cvtsi2ss m3, dword maxvalm |
||||
%else |
||||
cvtsi2ss m3, maxvald |
||||
%endif |
||||
shufps m0, m0, 0 |
||||
shufps m1, m1, 0 |
||||
shufps m3, m3, 0 |
||||
shl is_signedd, 31 |
||||
movd m4, is_signedd |
||||
shufps m4, m4, 0 |
||||
shl sized, 2 |
||||
add inq, sizeq |
||||
add outq, sizeq |
||||
add scaledq, sizeq |
||||
neg sizeq |
||||
.loop: |
||||
mulps m2, m0, [scaledq+sizeq] |
||||
addps m2, m1 |
||||
minps m2, m3 |
||||
andps m5, m4, [inq+sizeq] |
||||
orps m2, m5 |
||||
cvttps2dq m2, m2 |
||||
mova [outq+sizeq], m2 |
||||
add sizeq, mmsize |
||||
jl .loop |
||||
RET |
@ -0,0 +1,43 @@ |
||||
/*
|
||||
* AAC encoder assembly optimizations |
||||
* Copyright (C) 2016 Rostislav Pehlivanov <atomnuker@gmail.com> |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
|
||||
#include "libavutil/float_dsp.h" |
||||
#include "libavutil/x86/cpu.h" |
||||
#include "libavcodec/aacenc.h" |
||||
|
||||
void ff_abs_pow34_sse(float *out, const float *in, const int size); |
||||
|
||||
void ff_aac_quantize_bands_sse2(int *out, const float *in, const float *scaled, |
||||
int size, int is_signed, int maxval, const float Q34, |
||||
const float rounding); |
||||
|
||||
av_cold void ff_aac_dsp_init_x86(AACEncContext *s) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (EXTERNAL_SSE(cpu_flags)) |
||||
s->abs_pow34 = ff_abs_pow34_sse; |
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) |
||||
s->quant_bands = ff_aac_quantize_bands_sse2; |
||||
} |
Loading…
Reference in new issue