mirror of https://github.com/FFmpeg/FFmpeg.git
* qatar/master:
float_dsp: ppc: add a separate header for Altivec function prototypes
ARM: fix float_dsp breakage from d5a7229
Add a float DSP framework to libavutil
PPC: Move types_altivec.h and util_altivec.h from libavcodec to libavutil
ARM: Move asm.S from libavcodec to libavutil
vc1dsp: mark put/avg_vc1_mspel_mc() always_inline
Merged-by: Michael Niedermayer <michaelni@gmx.at>
pull/59/head
commit
7e22514d98
92 changed files with 697 additions and 256 deletions
@ -1 +1,8 @@ |
||||
OBJS += arm/cpu.o \
|
||||
arm/float_dsp_init_arm.o \
|
||||
|
||||
ARMVFP-OBJS += arm/float_dsp_init_vfp.o \
|
||||
arm/float_dsp_vfp.o \
|
||||
|
||||
NEON-OBJS += arm/float_dsp_init_neon.o \
|
||||
arm/float_dsp_neon.o \
|
||||
|
@ -0,0 +1,29 @@ |
||||
/*
|
||||
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com> |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVUTIL_ARM_FLOAT_DSP_ARM_H |
||||
#define AVUTIL_ARM_FLOAT_DSP_ARM_H |
||||
|
||||
#include "libavutil/float_dsp.h" |
||||
|
||||
void ff_float_dsp_init_vfp (AVFloatDSPContext *fdsp); |
||||
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp); |
||||
|
||||
#endif /* AVUTIL_ARM_FLOAT_DSP_ARM_H */ |
@ -0,0 +1,33 @@ |
||||
/*
|
||||
* ARM optimized DSP utils |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/arm/cpu.h" |
||||
#include "libavutil/float_dsp.h" |
||||
#include "float_dsp_arm.h" |
||||
|
||||
void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (have_vfp(cpu_flags)) |
||||
ff_float_dsp_init_vfp(fdsp); |
||||
if (have_neon(cpu_flags)) |
||||
ff_float_dsp_init_neon(fdsp); |
||||
} |
@ -0,0 +1,32 @@ |
||||
/*
|
||||
* ARM NEON optimised Float DSP functions |
||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com> |
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include <stdint.h> |
||||
|
||||
#include "libavutil/float_dsp.h" |
||||
#include "float_dsp_arm.h" |
||||
|
||||
void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len); |
||||
|
||||
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) |
||||
{ |
||||
fdsp->vector_fmul = ff_vector_fmul_neon; |
||||
} |
@ -0,0 +1,34 @@ |
||||
/*
|
||||
* Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net> |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/arm/cpu.h" |
||||
#include "libavutil/float_dsp.h" |
||||
#include "float_dsp_arm.h" |
||||
|
||||
void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1, |
||||
int len); |
||||
|
||||
void ff_float_dsp_init_vfp(AVFloatDSPContext *fdsp) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (!have_vfpv3(cpu_flags)) |
||||
fdsp->vector_fmul = ff_vector_fmul_vfp; |
||||
} |
@ -0,0 +1,64 @@ |
||||
/* |
||||
* ARM NEON optimised Float DSP functions |
||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
#include "asm.S" |
||||
|
||||
preserve8 |
||||
|
||||
function ff_vector_fmul_neon, export=1 |
||||
subs r3, r3, #8 |
||||
vld1.32 {d0-d3}, [r1,:128]! |
||||
vld1.32 {d4-d7}, [r2,:128]! |
||||
vmul.f32 q8, q0, q2 |
||||
vmul.f32 q9, q1, q3 |
||||
beq 3f |
||||
bics ip, r3, #15 |
||||
beq 2f |
||||
1: subs ip, ip, #16 |
||||
vld1.32 {d0-d1}, [r1,:128]! |
||||
vld1.32 {d4-d5}, [r2,:128]! |
||||
vmul.f32 q10, q0, q2 |
||||
vld1.32 {d2-d3}, [r1,:128]! |
||||
vld1.32 {d6-d7}, [r2,:128]! |
||||
vmul.f32 q11, q1, q3 |
||||
vst1.32 {d16-d19},[r0,:128]! |
||||
vld1.32 {d0-d1}, [r1,:128]! |
||||
vld1.32 {d4-d5}, [r2,:128]! |
||||
vmul.f32 q8, q0, q2 |
||||
vld1.32 {d2-d3}, [r1,:128]! |
||||
vld1.32 {d6-d7}, [r2,:128]! |
||||
vmul.f32 q9, q1, q3 |
||||
vst1.32 {d20-d23},[r0,:128]! |
||||
bne 1b |
||||
ands r3, r3, #15 |
||||
beq 3f |
||||
2: vld1.32 {d0-d1}, [r1,:128]! |
||||
vld1.32 {d4-d5}, [r2,:128]! |
||||
vst1.32 {d16-d17},[r0,:128]! |
||||
vmul.f32 q8, q0, q2 |
||||
vld1.32 {d2-d3}, [r1,:128]! |
||||
vld1.32 {d6-d7}, [r2,:128]! |
||||
vst1.32 {d18-d19},[r0,:128]! |
||||
vmul.f32 q9, q1, q3 |
||||
3: vst1.32 {d16-d19},[r0,:128]! |
||||
bx lr |
||||
endfunc |
@ -0,0 +1,68 @@ |
||||
/* |
||||
* Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
|
||||
* |
||||
* This file is part of FFmpeg |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
#include "asm.S" |
||||
|
||||
/** |
||||
* Assume that len is a positive number and is multiple of 8 |
||||
*/ |
||||
@ void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1, int len)
|
||||
function ff_vector_fmul_vfp, export=1 |
||||
vpush {d8-d15} |
||||
fmrx r12, fpscr |
||||
orr r12, r12, #(3 << 16) /* set vector size to 4 */ |
||||
fmxr fpscr, r12 |
||||
|
||||
vldmia r1!, {s0-s3} |
||||
vldmia r2!, {s8-s11} |
||||
vldmia r1!, {s4-s7} |
||||
vldmia r2!, {s12-s15} |
||||
vmul.f32 s8, s0, s8 |
||||
1: |
||||
subs r3, r3, #16 |
||||
vmul.f32 s12, s4, s12 |
||||
itttt ge |
||||
vldmiage r1!, {s16-s19} |
||||
vldmiage r2!, {s24-s27} |
||||
vldmiage r1!, {s20-s23} |
||||
vldmiage r2!, {s28-s31} |
||||
it ge |
||||
vmulge.f32 s24, s16, s24 |
||||
vstmia r0!, {s8-s11} |
||||
vstmia r0!, {s12-s15} |
||||
it ge |
||||
vmulge.f32 s28, s20, s28 |
||||
itttt gt |
||||
vldmiagt r1!, {s0-s3} |
||||
vldmiagt r2!, {s8-s11} |
||||
vldmiagt r1!, {s4-s7} |
||||
vldmiagt r2!, {s12-s15} |
||||
ittt ge |
||||
vmulge.f32 s8, s0, s8 |
||||
vstmiage r0!, {s24-s27} |
||||
vstmiage r0!, {s28-s31} |
||||
bgt 1b |
||||
|
||||
bic r12, r12, #(7 << 16) /* set vector size back to 1 */ |
||||
fmxr fpscr, r12 |
||||
vpop {d8-d15} |
||||
bx lr |
||||
endfunc |
@ -0,0 +1,42 @@ |
||||
/*
|
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
|
||||
#include "float_dsp.h" |
||||
|
||||
static void vector_fmul_c(float *dst, const float *src0, const float *src1, |
||||
int len) |
||||
{ |
||||
int i; |
||||
for (i = 0; i < len; i++) |
||||
dst[i] = src0[i] * src1[i]; |
||||
} |
||||
|
||||
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) |
||||
{ |
||||
fdsp->vector_fmul = vector_fmul_c; |
||||
|
||||
#if ARCH_ARM |
||||
ff_float_dsp_init_arm(fdsp); |
||||
#elif ARCH_PPC |
||||
ff_float_dsp_init_ppc(fdsp, bit_exact); |
||||
#elif ARCH_X86 |
||||
ff_float_dsp_init_x86(fdsp); |
||||
#endif |
||||
} |
@ -0,0 +1,53 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVUTIL_FLOAT_DSP_H |
||||
#define AVUTIL_FLOAT_DSP_H |
||||
|
||||
typedef struct AVFloatDSPContext { |
||||
/**
|
||||
* Calculate the product of two vectors of floats and store the result in |
||||
* a vector of floats. |
||||
* |
||||
* @param dst output vector |
||||
* constraints: 32-byte aligned |
||||
* @param src0 first input vector |
||||
* constraints: 32-byte aligned |
||||
* @param src1 second input vector |
||||
* constraints: 32-byte aligned |
||||
* @param len number of elements in the input |
||||
* constraints: multiple of 16 |
||||
*/ |
||||
void (*vector_fmul)(float *dst, const float *src0, const float *src1, |
||||
int len); |
||||
} AVFloatDSPContext; |
||||
|
||||
/**
|
||||
* Initialize a float DSP context. |
||||
* |
||||
* @param fdsp float DSP context |
||||
* @param strict setting to non-zero avoids using functions which may not be IEEE-754 compliant |
||||
*/ |
||||
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int strict); |
||||
|
||||
|
||||
void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp); |
||||
void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict); |
||||
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp); |
||||
|
||||
#endif /* AVUTIL_FLOAT_DSP_H */ |
@ -1 +1,4 @@ |
||||
OBJS += ppc/cpu.o \
|
||||
ppc/float_dsp_init.o \
|
||||
|
||||
ALTIVEC-OBJS += ppc/float_dsp_altivec.o \
|
||||
|
@ -0,0 +1,38 @@ |
||||
/*
|
||||
* Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org> |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "util_altivec.h" |
||||
#include "float_dsp_altivec.h" |
||||
|
||||
void ff_vector_fmul_altivec(float *dst, const float *src0, const float *src1, |
||||
int len) |
||||
{ |
||||
int i; |
||||
vector float d0, d1, s, zero = (vector float)vec_splat_u32(0); |
||||
for (i = 0; i < len - 7; i += 8) { |
||||
d0 = vec_ld( 0, src0 + i); |
||||
s = vec_ld( 0, src1 + i); |
||||
d1 = vec_ld(16, src0 + i); |
||||
d0 = vec_madd(d0, s, zero); |
||||
d1 = vec_madd(d1, vec_ld(16, src1 + i), zero); |
||||
vec_st(d0, 0, dst + i); |
||||
vec_st(d1, 16, dst + i); |
||||
} |
||||
} |
@ -0,0 +1,27 @@ |
||||
/*
|
||||
* Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org> |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVUTIL_PPC_FLOAT_DSP_ALTIVEC_H |
||||
#define AVUTIL_PPC_FLOAT_DSP_ALTIVEC_H |
||||
|
||||
extern void ff_vector_fmul_altivec(float *dst, const float *src0, |
||||
const float *src1, int len); |
||||
|
||||
#endif /* AVUTIL_PPC_FLOAT_DSP_ALTIVEC_H */ |
@ -0,0 +1,36 @@ |
||||
/*
|
||||
* Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org> |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/float_dsp.h" |
||||
#include "float_dsp_altivec.h" |
||||
|
||||
void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int bit_exact) |
||||
{ |
||||
#if HAVE_ALTIVEC |
||||
int mm_flags = av_get_cpu_flags(); |
||||
|
||||
if (!(mm_flags & AV_CPU_FLAG_ALTIVEC)) |
||||
return; |
||||
|
||||
fdsp->vector_fmul = ff_vector_fmul_altivec; |
||||
#endif |
||||
} |
@ -1 +1,4 @@ |
||||
OBJS += x86/cpu.o \
|
||||
x86/float_dsp_init.o \
|
||||
|
||||
YASM-OBJS += x86/float_dsp.o \
|
||||
|
@ -0,0 +1,55 @@ |
||||
;***************************************************************************** |
||||
;* x86-optimized Float DSP functions |
||||
;* |
||||
;* This file is part of Libav. |
||||
;* |
||||
;* Libav is free software; you can redistribute it and/or |
||||
;* modify it under the terms of the GNU Lesser General Public |
||||
;* License as published by the Free Software Foundation; either |
||||
;* version 2.1 of the License, or (at your option) any later version. |
||||
;* |
||||
;* Libav is distributed in the hope that it will be useful, |
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
;* Lesser General Public License for more details. |
||||
;* |
||||
;* You should have received a copy of the GNU Lesser General Public |
||||
;* License along with Libav; if not, write to the Free Software |
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
;****************************************************************************** |
||||
|
||||
%include "x86inc.asm" |
||||
|
||||
SECTION .text |
||||
|
||||
;----------------------------------------------------------------------------- |
||||
; void vector_fmul(float *dst, const float *src0, const float *src1, int len) |
||||
;----------------------------------------------------------------------------- |
||||
%macro VECTOR_FMUL 0 |
||||
cglobal vector_fmul, 4,4,2, dst, src0, src1, len |
||||
lea lenq, [lend*4 - 2*mmsize] |
||||
ALIGN 16 |
||||
.loop |
||||
mova m0, [src0q + lenq] |
||||
mova m1, [src0q + lenq + mmsize] |
||||
mulps m0, m0, [src1q + lenq] |
||||
mulps m1, m1, [src1q + lenq + mmsize] |
||||
mova [dstq + lenq], m0 |
||||
mova [dstq + lenq + mmsize], m1 |
||||
|
||||
sub lenq, 2*mmsize |
||||
jge .loop |
||||
%if mmsize == 32 |
||||
vzeroupper |
||||
RET |
||||
%else |
||||
REP_RET |
||||
%endif |
||||
%endmacro |
||||
|
||||
INIT_XMM sse |
||||
VECTOR_FMUL |
||||
%if HAVE_AVX |
||||
INIT_YMM avx |
||||
VECTOR_FMUL |
||||
%endif |
@ -0,0 +1,41 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
|
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/float_dsp.h" |
||||
|
||||
extern void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1, |
||||
int len); |
||||
extern void ff_vector_fmul_avx(float *dst, const float *src0, const float *src1, |
||||
int len); |
||||
|
||||
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) |
||||
{ |
||||
#if HAVE_YASM |
||||
int mm_flags = av_get_cpu_flags(); |
||||
|
||||
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { |
||||
fdsp->vector_fmul = ff_vector_fmul_sse; |
||||
} |
||||
if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { |
||||
fdsp->vector_fmul = ff_vector_fmul_avx; |
||||
} |
||||
#endif |
||||
} |
Loading…
Reference in new issue