x86: dcadsp: implement int8x8_fmul_int32

For the callable function (as opposed to the inline one):
         C  SSE  SSE2  SSE4
Win32:  47   42   29    26
Win64:  30   33   25    23
The SSE version is neither compiled nor set for ARCH_X86_64, as the
inlinable function takes over.

Signed-off-by: Janne Grunau <janne-libav@jannau.net>
pull/43/merge
Christophe Gisquet 13 years ago committed by Janne Grunau
parent 2bd44cb705
commit 5b59a9fc61
  1. 3
      libavcodec/dcadec.c
  2. 1
      libavcodec/dcadsp.c
  3. 1
      libavcodec/dcadsp.h
  4. 2
      libavcodec/x86/Makefile
  5. 52
      libavcodec/x86/dca.h
  6. 90
      libavcodec/x86/dcadsp.asm
  7. 47
      libavcodec/x86/dcadsp_init.c

@ -50,6 +50,9 @@
#if ARCH_ARM
# include "arm/dca.h"
#endif
#if ARCH_X86
# include "x86/dca.h"
#endif
//#define TRACE

@ -88,4 +88,5 @@ av_cold void ff_dcadsp_init(DCADSPContext *s)
s->qmf_32_subbands = dca_qmf_32_subbands;
s->int8x8_fmul_int32 = int8x8_fmul_int32_c;
if (ARCH_ARM) ff_dcadsp_init_arm(s);
if (ARCH_X86) ff_dcadsp_init_x86(s);
}

@ -36,5 +36,6 @@ typedef struct DCADSPContext {
void ff_dcadsp_init(DCADSPContext *s);
void ff_dcadsp_init_arm(DCADSPContext *s);
void ff_dcadsp_init_x86(DCADSPContext *s);
#endif /* AVCODEC_DCADSP_H */

@ -4,6 +4,7 @@ OBJS += x86/constants.o \
OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp_init.o
OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_init.o
OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp.o
OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp_init.o
OBJS-$(CONFIG_DCT) += x86/dct_init.o
OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o
OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_init.o \
@ -54,6 +55,7 @@ YASM-OBJS += x86/deinterlace.o \
YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp.o
YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o
YASM-OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp.o
YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o
YASM-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil.o \
x86/fpel.o \

@ -0,0 +1,52 @@
/*
* Copyright (c) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#if ARCH_X86_64
# include "libavutil/x86/asm.h"
# include "libavutil/mem.h"
# define int8x8_fmul_int32 int8x8_fmul_int32
static inline void int8x8_fmul_int32(av_unused DCADSPContext *dsp,
float *dst, const int8_t *src, int scale)
{
DECLARE_ALIGNED(16, static const uint32_t, inverse16) = 0x3D800000;
__asm__ volatile (
"cvtsi2ss %2, %%xmm0 \n\t"
"mulss %3, %%xmm0 \n\t"
"movq (%1), %%xmm1 \n\t"
"punpcklbw %%xmm1, %%xmm1 \n\t"
"movaps %%xmm1, %%xmm2 \n\t"
"punpcklwd %%xmm1, %%xmm1 \n\t"
"punpckhwd %%xmm2, %%xmm2 \n\t"
"psrad $24, %%xmm1 \n\t"
"psrad $24, %%xmm2 \n\t"
"shufps $0, %%xmm0, %%xmm0 \n\t"
"cvtdq2ps %%xmm1, %%xmm1 \n\t"
"cvtdq2ps %%xmm2, %%xmm2 \n\t"
"mulps %%xmm0, %%xmm1 \n\t"
"mulps %%xmm0, %%xmm2 \n\t"
"movaps %%xmm1, 0(%0) \n\t"
"movaps %%xmm2, 16(%0) \n\t"
:: "r"(dst), "r"(src), "m"(scale), "m"(inverse16)
XMM_CLOBBERS_ONLY("xmm0", "xmm1", "xmm2")
);
}
#endif /* ARCH_X86_64 */

@ -0,0 +1,90 @@
;******************************************************************************
;* SSE-optimized functions for the DCA decoder
;* Copyright (C) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com>
;*
;* This file is part of Libav.
;*
;* Libav is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* Libav is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with Libav; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "libavutil/x86/x86util.asm"
SECTION_RODATA
pf_inv16: times 4 dd 0x3D800000 ; 1/16
SECTION_TEXT
; void int8x8_fmul_int32_sse2(float *dst, const int8_t *src, int scale)
%macro INT8X8_FMUL_INT32 0
cglobal int8x8_fmul_int32, 3,3,5, dst, src, scale
cvtsi2ss m0, scalem
mulss m0, [pf_inv16]
shufps m0, m0, 0
%if cpuflag(sse2)
%if cpuflag(sse4)
pmovsxbd m1, [srcq+0]
pmovsxbd m2, [srcq+4]
%else
movq m1, [srcq]
punpcklbw m1, m1
mova m2, m1
punpcklwd m1, m1
punpckhwd m2, m2
psrad m1, 24
psrad m2, 24
%endif
cvtdq2ps m1, m1
cvtdq2ps m2, m2
%else
movd mm0, [srcq+0]
movd mm1, [srcq+4]
punpcklbw mm0, mm0
punpcklbw mm1, mm1
movq mm2, mm0
movq mm3, mm1
punpcklwd mm0, mm0
punpcklwd mm1, mm1
punpckhwd mm2, mm2
punpckhwd mm3, mm3
psrad mm0, 24
psrad mm1, 24
psrad mm2, 24
psrad mm3, 24
cvtpi2ps m1, mm0
cvtpi2ps m2, mm1
cvtpi2ps m3, mm2
cvtpi2ps m4, mm3
shufps m0, m0, 0
emms
shufps m1, m3, q1010
shufps m2, m4, q1010
%endif
mulps m1, m0
mulps m2, m0
mova [dstq+ 0], m1
mova [dstq+16], m2
REP_RET
%endmacro
%if ARCH_X86_32
INIT_XMM sse
INT8X8_FMUL_INT32
%endif
INIT_XMM sse2
INT8X8_FMUL_INT32
INIT_XMM sse4
INT8X8_FMUL_INT32

@ -0,0 +1,47 @@
/*
* Copyright (c) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/dcadsp.h"
void ff_int8x8_fmul_int32_sse(float *dst, const int8_t *src, int scale);
void ff_int8x8_fmul_int32_sse2(float *dst, const int8_t *src, int scale);
void ff_int8x8_fmul_int32_sse4(float *dst, const int8_t *src, int scale);
av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
{
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_SSE(cpu_flags)) {
#if ARCH_X86_32
s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse;
#endif
}
if (EXTERNAL_SSE2(cpu_flags)) {
s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse2;
}
if (EXTERNAL_SSE4(cpu_flags)) {
s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse4;
}
}
Loading…
Cancel
Save