mirror of https://github.com/FFmpeg/FFmpeg.git
For the callable function (as opposed to the inline one): C SSE SSE2 SSE4 Win32: 47 42 29 26 Win64: 30 33 25 23 The SSE version is neither compiled nor set for ARCH_X86_64, as the inlinable function takes over. Signed-off-by: Janne Grunau <janne-libav@jannau.net>pull/43/merge
parent
2bd44cb705
commit
5b59a9fc61
7 changed files with 196 additions and 0 deletions
@ -0,0 +1,52 @@ |
||||
/*
|
||||
* Copyright (c) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com> |
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#if ARCH_X86_64 |
||||
# include "libavutil/x86/asm.h" |
||||
# include "libavutil/mem.h" |
||||
|
||||
# define int8x8_fmul_int32 int8x8_fmul_int32 |
||||
static inline void int8x8_fmul_int32(av_unused DCADSPContext *dsp, |
||||
float *dst, const int8_t *src, int scale) |
||||
{ |
||||
DECLARE_ALIGNED(16, static const uint32_t, inverse16) = 0x3D800000; |
||||
__asm__ volatile ( |
||||
"cvtsi2ss %2, %%xmm0 \n\t" |
||||
"mulss %3, %%xmm0 \n\t" |
||||
"movq (%1), %%xmm1 \n\t" |
||||
"punpcklbw %%xmm1, %%xmm1 \n\t" |
||||
"movaps %%xmm1, %%xmm2 \n\t" |
||||
"punpcklwd %%xmm1, %%xmm1 \n\t" |
||||
"punpckhwd %%xmm2, %%xmm2 \n\t" |
||||
"psrad $24, %%xmm1 \n\t" |
||||
"psrad $24, %%xmm2 \n\t" |
||||
"shufps $0, %%xmm0, %%xmm0 \n\t" |
||||
"cvtdq2ps %%xmm1, %%xmm1 \n\t" |
||||
"cvtdq2ps %%xmm2, %%xmm2 \n\t" |
||||
"mulps %%xmm0, %%xmm1 \n\t" |
||||
"mulps %%xmm0, %%xmm2 \n\t" |
||||
"movaps %%xmm1, 0(%0) \n\t" |
||||
"movaps %%xmm2, 16(%0) \n\t" |
||||
:: "r"(dst), "r"(src), "m"(scale), "m"(inverse16) |
||||
XMM_CLOBBERS_ONLY("xmm0", "xmm1", "xmm2") |
||||
); |
||||
} |
||||
|
||||
#endif /* ARCH_X86_64 */ |
@ -0,0 +1,90 @@ |
||||
;****************************************************************************** |
||||
;* SSE-optimized functions for the DCA decoder |
||||
;* Copyright (C) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com> |
||||
;* |
||||
;* This file is part of Libav. |
||||
;* |
||||
;* Libav is free software; you can redistribute it and/or |
||||
;* modify it under the terms of the GNU Lesser General Public |
||||
;* License as published by the Free Software Foundation; either |
||||
;* version 2.1 of the License, or (at your option) any later version. |
||||
;* |
||||
;* Libav is distributed in the hope that it will be useful, |
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
;* Lesser General Public License for more details. |
||||
;* |
||||
;* You should have received a copy of the GNU Lesser General Public |
||||
;* License along with Libav; if not, write to the Free Software |
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
;****************************************************************************** |
||||
|
||||
%include "libavutil/x86/x86util.asm" |
||||
|
||||
SECTION_RODATA |
||||
pf_inv16: times 4 dd 0x3D800000 ; 1/16 |
||||
|
||||
SECTION_TEXT |
||||
|
||||
; void int8x8_fmul_int32_sse2(float *dst, const int8_t *src, int scale) |
||||
%macro INT8X8_FMUL_INT32 0 |
||||
cglobal int8x8_fmul_int32, 3,3,5, dst, src, scale |
||||
cvtsi2ss m0, scalem |
||||
mulss m0, [pf_inv16] |
||||
shufps m0, m0, 0 |
||||
%if cpuflag(sse2) |
||||
%if cpuflag(sse4) |
||||
pmovsxbd m1, [srcq+0] |
||||
pmovsxbd m2, [srcq+4] |
||||
%else |
||||
movq m1, [srcq] |
||||
punpcklbw m1, m1 |
||||
mova m2, m1 |
||||
punpcklwd m1, m1 |
||||
punpckhwd m2, m2 |
||||
psrad m1, 24 |
||||
psrad m2, 24 |
||||
%endif |
||||
cvtdq2ps m1, m1 |
||||
cvtdq2ps m2, m2 |
||||
%else |
||||
movd mm0, [srcq+0] |
||||
movd mm1, [srcq+4] |
||||
punpcklbw mm0, mm0 |
||||
punpcklbw mm1, mm1 |
||||
movq mm2, mm0 |
||||
movq mm3, mm1 |
||||
punpcklwd mm0, mm0 |
||||
punpcklwd mm1, mm1 |
||||
punpckhwd mm2, mm2 |
||||
punpckhwd mm3, mm3 |
||||
psrad mm0, 24 |
||||
psrad mm1, 24 |
||||
psrad mm2, 24 |
||||
psrad mm3, 24 |
||||
cvtpi2ps m1, mm0 |
||||
cvtpi2ps m2, mm1 |
||||
cvtpi2ps m3, mm2 |
||||
cvtpi2ps m4, mm3 |
||||
shufps m0, m0, 0 |
||||
emms |
||||
shufps m1, m3, q1010 |
||||
shufps m2, m4, q1010 |
||||
%endif |
||||
mulps m1, m0 |
||||
mulps m2, m0 |
||||
mova [dstq+ 0], m1 |
||||
mova [dstq+16], m2 |
||||
REP_RET |
||||
%endmacro |
||||
|
||||
%if ARCH_X86_32 |
||||
INIT_XMM sse |
||||
INT8X8_FMUL_INT32 |
||||
%endif |
||||
|
||||
INIT_XMM sse2 |
||||
INT8X8_FMUL_INT32 |
||||
|
||||
INIT_XMM sse4 |
||||
INT8X8_FMUL_INT32 |
@ -0,0 +1,47 @@ |
||||
/*
|
||||
* Copyright (c) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com> |
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/x86/cpu.h" |
||||
#include "libavcodec/dcadsp.h" |
||||
|
||||
void ff_int8x8_fmul_int32_sse(float *dst, const int8_t *src, int scale); |
||||
void ff_int8x8_fmul_int32_sse2(float *dst, const int8_t *src, int scale); |
||||
void ff_int8x8_fmul_int32_sse4(float *dst, const int8_t *src, int scale); |
||||
|
||||
av_cold void ff_dcadsp_init_x86(DCADSPContext *s) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (EXTERNAL_SSE(cpu_flags)) { |
||||
#if ARCH_X86_32 |
||||
s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse; |
||||
#endif |
||||
} |
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) { |
||||
s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse2; |
||||
} |
||||
|
||||
if (EXTERNAL_SSE4(cpu_flags)) { |
||||
s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse4; |
||||
} |
||||
} |
Loading…
Reference in new issue