When decoding coefficients, detect whether the block is DC-only, and take advantage of this knowledge to perform DC-only inverse transform. This is achieved by: - first, changing the 108x4 element modulo_three_table into a 108 element table (kind of base4), and accessing each value using mask and shifts. - then, checking low bits for 0 (as they represent the presence of higher frequency coefficients) Also provide x86 SIMD code for the DC-only inverse transform. Signed-off-by: Kostya Shishkov <kostya.shishkov@gmail.com>pull/3/head
parent
b2ce3b998b
commit
3faa303a47
7 changed files with 200 additions and 66 deletions
@ -0,0 +1,55 @@ |
||||
;****************************************************************************** |
||||
;* MMX/SSE2-optimized functions for the RV30 and RV40 decoders |
||||
;* Copyright (C) 2012 Christophe Gisquet <christophe.gisquet@gmail.com> |
||||
;* |
||||
;* This file is part of Libav. |
||||
;* |
||||
;* Libav is free software; you can redistribute it and/or |
||||
;* modify it under the terms of the GNU Lesser General Public |
||||
;* License as published by the Free Software Foundation; either |
||||
;* version 2.1 of the License, or (at your option) any later version. |
||||
;* |
||||
;* Libav is distributed in the hope that it will be useful, |
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
;* Lesser General Public License for more details. |
||||
;* |
||||
;* You should have received a copy of the GNU Lesser General Public |
||||
;* License along with Libav; if not, write to the Free Software |
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
;****************************************************************************** |
||||
|
||||
%include "x86inc.asm" |
||||
%include "x86util.asm" |
||||
|
||||
SECTION .text |
||||
|
||||
%macro IDCT_DC_NOROUND 1 |
||||
imul %1, 13*13*3 |
||||
sar %1, 11 |
||||
%endmacro |
||||
|
||||
%macro IDCT_DC_ROUND 1 |
||||
imul %1, 13*13 |
||||
add %1, 0x200 |
||||
sar %1, 10 |
||||
%endmacro |
||||
|
||||
%macro rv34_idct_dequant4x4_dc 1 |
||||
cglobal rv34_idct_dequant4x4_%1_mmx2, 1, 2, 0 |
||||
movsx r1, word [r0] |
||||
IDCT_DC r1 |
||||
movd mm0, r1 |
||||
pshufw mm0, mm0, 0 |
||||
movq [r0+ 0], mm0 |
||||
movq [r0+16], mm0 |
||||
movq [r0+32], mm0 |
||||
movq [r0+48], mm0 |
||||
REP_RET |
||||
%endmacro |
||||
|
||||
INIT_MMX |
||||
%define IDCT_DC IDCT_DC_ROUND |
||||
rv34_idct_dequant4x4_dc dc |
||||
%define IDCT_DC IDCT_DC_NOROUND |
||||
rv34_idct_dequant4x4_dc dc_noround |
@ -0,0 +1,40 @@ |
||||
/*
|
||||
* RV30/40 MMX/SSE2 optimizations |
||||
* Copyright (C) 2012 Christophe Gisquet <christophe.gisquet@gmail.com> |
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/x86_cpu.h" |
||||
#include "libavcodec/dsputil.h" |
||||
#include "libavcodec/rv34dsp.h" |
||||
|
||||
void ff_rv34_idct_dequant4x4_dc_mmx2(DCTELEM *block); |
||||
void ff_rv34_idct_dequant4x4_dc_noround_mmx2(DCTELEM *block); |
||||
|
||||
av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp) |
||||
{ |
||||
#if HAVE_YASM |
||||
int mm_flags = av_get_cpu_flags(); |
||||
|
||||
if (mm_flags & AV_CPU_FLAG_MMX2) { |
||||
c->rv34_inv_transform_dc_tab[0] = ff_rv34_idct_dequant4x4_dc_mmx2; |
||||
c->rv34_inv_transform_dc_tab[1] = ff_rv34_idct_dequant4x4_dc_noround_mmx2; |
||||
} |
||||
#endif |
||||
} |
Loading…
Reference in new issue