mirror of https://github.com/FFmpeg/FFmpeg.git
Modeled from the prores version. Clips to [0;1023] and is bitexact. Bitexactness requires to add offsets in different places compared to prores or C, and makes the function approximately 2% slower. For 16 frames of a DNxHD 4:2:2 10bits test sequence: C: 60861 decicycles in idct, 1048205 runs, 371 skips sse2: 27567 decicycles in idct, 1048216 runs, 360 skips avx: 26272 decicycles in idct, 1048171 runs, 405 skips The add version is not implemented, so the corresponding dsp function is set to NULL to make it clear in a code executing it. Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>pull/154/head
parent
e652f69b35
commit
4369b9dc7b
6 changed files with 136 additions and 33 deletions
@ -0,0 +1,58 @@ |
||||
;****************************************************************************** |
||||
;* x86-SIMD-optimized IDCT for prores |
||||
;* this is identical to "simple" IDCT written by Michael Niedermayer |
||||
;* except for the clip range |
||||
;* |
||||
;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com> |
||||
;* Copyright (c) 2015 Christophe Gisquet |
||||
;* |
||||
;* This file is part of FFmpeg. |
||||
;* |
||||
;* FFmpeg is free software; you can redistribute it and/or |
||||
;* modify it under the terms of the GNU Lesser General Public |
||||
;* License as published by the Free Software Foundation; either |
||||
;* version 2.1 of the License, or (at your option) any later version. |
||||
;* |
||||
;* FFmpeg is distributed in the hope that it will be useful, |
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
;* Lesser General Public License for more details. |
||||
;* |
||||
;* You should have received a copy of the GNU Lesser General Public |
||||
;* License along with FFmpeg; if not, write to the Free Software |
||||
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
;****************************************************************************** |
||||
|
||||
%include "libavutil/x86/x86util.asm" |
||||
|
||||
%if ARCH_X86_64 |
||||
|
||||
SECTION_RODATA |
||||
|
||||
cextern pw_16 |
||||
cextern pw_1023 |
||||
pd_round_12: times 4 dd 1<<(12-1) |
||||
pd_round_19: times 4 dd 1<<(19-1) |
||||
|
||||
%include "libavcodec/x86/simple_idct10_template.asm" |
||||
|
||||
section .text align=16 |
||||
|
||||
%macro idct_fn 0 |
||||
cglobal simple_idct10, 1, 1, 16 |
||||
IDCT_FN "", 12, "", 19 |
||||
RET |
||||
|
||||
cglobal simple_idct10_put, 3, 3, 16 |
||||
IDCT_FN "", 12, "", 19, 0, pw_1023 |
||||
RET |
||||
%endmacro |
||||
|
||||
INIT_XMM sse2 |
||||
idct_fn |
||||
%if HAVE_AVX_EXTERNAL |
||||
INIT_XMM avx |
||||
idct_fn |
||||
%endif |
||||
|
||||
%endif |
Loading…
Reference in new issue