x86: float_dsp: add SSE version of vector_fmul_scalar()

pull/8/head
Justin Ruggles 13 years ago
parent 284ea790d8
commit 947f933687
  1. 29
      libavutil/x86/float_dsp.asm
  2. 4
      libavutil/x86/float_dsp_init.c

@ -85,3 +85,32 @@ INIT_XMM sse
VECTOR_FMAC_SCALAR
INIT_YMM avx
VECTOR_FMAC_SCALAR
;------------------------------------------------------------------------------
; void ff_vector_fmul_scalar(float *dst, const float *src, float mul, int len)
;------------------------------------------------------------------------------
%macro VECTOR_FMUL_SCALAR 0
%if UNIX64
cglobal vector_fmul_scalar, 3,3,2, dst, src, len
%else
cglobal vector_fmul_scalar, 4,4,3, dst, src, mul, len
%endif
%if ARCH_X86_32
movss m0, mulm
%elif WIN64
SWAP 0, 2
%endif
shufps m0, m0, 0
lea lenq, [lend*4-mmsize]
.loop:
mova m1, [srcq+lenq]
mulps m1, m0
mova [dstq+lenq], m1
sub lenq, mmsize
jge .loop
REP_RET
%endmacro
INIT_XMM sse
VECTOR_FMUL_SCALAR

@ -32,6 +32,9 @@ extern void ff_vector_fmac_scalar_sse(float *dst, const float *src, float mul,
extern void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul,
int len);
extern void ff_vector_fmul_scalar_sse(float *dst, const float *src, float mul,
int len);
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
{
int mm_flags = av_get_cpu_flags();
@ -39,6 +42,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
if (EXTERNAL_SSE(mm_flags)) {
fdsp->vector_fmul = ff_vector_fmul_sse;
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse;
}
if (EXTERNAL_AVX(mm_flags)) {
fdsp->vector_fmul = ff_vector_fmul_avx;

Loading…
Cancel
Save