mirror of https://github.com/FFmpeg/FFmpeg.git
Move vector_fmul() from DSPContext to AVFloatDSPContext.pull/59/head
parent
98db4e2a4e
commit
d5a7229ba4
39 changed files with 602 additions and 188 deletions
@ -1 +1,8 @@ |
|||||||
OBJS += arm/cpu.o \
|
OBJS += arm/cpu.o \
|
||||||
|
arm/float_dsp_init_arm.o \
|
||||||
|
|
||||||
|
ARMVFP-OBJS += arm/float_dsp_init_vfp.o \
|
||||||
|
arm/float_dsp_vfp.o \
|
||||||
|
|
||||||
|
NEON-OBJS += arm/float_dsp_init_neon.o \
|
||||||
|
arm/float_dsp_neon.o \
|
||||||
|
@ -0,0 +1,29 @@ |
|||||||
|
/*
|
||||||
|
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com> |
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef AVUTIL_ARM_FLOAT_DSP_ARM_H |
||||||
|
#define AVUTIL_ARM_FLOAT_DSP_ARM_H |
||||||
|
|
||||||
|
#include "libavutil/float_dsp.h" |
||||||
|
|
||||||
|
void ff_dsputil_init_vfp (AVFloatDSPContext *fdsp); |
||||||
|
void ff_dsputil_init_neon(AVFloatDSPContext *fdsp); |
||||||
|
|
||||||
|
#endif /* AVUTIL_ARM_FLOAT_DSP_ARM_H */ |
@ -0,0 +1,33 @@ |
|||||||
|
/*
|
||||||
|
* ARM optimized DSP utils |
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "libavutil/arm/cpu.h" |
||||||
|
#include "libavutil/float_dsp.h" |
||||||
|
#include "float_dsp_arm.h" |
||||||
|
|
||||||
|
void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp) |
||||||
|
{ |
||||||
|
int cpu_flags = av_get_cpu_flags(); |
||||||
|
|
||||||
|
if (have_vfp(cpu_flags)) |
||||||
|
ff_dsputil_init_vfp(fdsp); |
||||||
|
if (have_neon(cpu_flags)) |
||||||
|
ff_dsputil_init_neon(fdsp); |
||||||
|
} |
@ -0,0 +1,32 @@ |
|||||||
|
/*
|
||||||
|
* ARM NEON optimised Float DSP functions |
||||||
|
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com> |
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
#include "libavutil/float_dsp.h" |
||||||
|
#include "float_dsp_arm.h" |
||||||
|
|
||||||
|
void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len); |
||||||
|
|
||||||
|
void ff_dsputil_init_neon(AVFloatDSPContext *fdsp) |
||||||
|
{ |
||||||
|
c->vector_fmul = ff_vector_fmul_neon; |
||||||
|
} |
@ -0,0 +1,34 @@ |
|||||||
|
/*
|
||||||
|
* Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net> |
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "libavutil/arm/cpu.h" |
||||||
|
#include "libavutil/float_dsp.h" |
||||||
|
#include "float_dsp_arm.h" |
||||||
|
|
||||||
|
void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1, |
||||||
|
int len); |
||||||
|
|
||||||
|
void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx) |
||||||
|
{ |
||||||
|
int cpu_flags = av_get_cpu_flags(); |
||||||
|
|
||||||
|
if (!have_vfpv3(cpu_flags)) |
||||||
|
c->vector_fmul = ff_vector_fmul_vfp; |
||||||
|
} |
@ -0,0 +1,64 @@ |
|||||||
|
/* |
||||||
|
* ARM NEON optimised Float DSP functions |
||||||
|
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "config.h" |
||||||
|
#include "asm.S" |
||||||
|
|
||||||
|
preserve8 |
||||||
|
|
||||||
|
function ff_vector_fmul_neon, export=1 |
||||||
|
subs r3, r3, #8 |
||||||
|
vld1.32 {d0-d3}, [r1,:128]! |
||||||
|
vld1.32 {d4-d7}, [r2,:128]! |
||||||
|
vmul.f32 q8, q0, q2 |
||||||
|
vmul.f32 q9, q1, q3 |
||||||
|
beq 3f |
||||||
|
bics ip, r3, #15 |
||||||
|
beq 2f |
||||||
|
1: subs ip, ip, #16 |
||||||
|
vld1.32 {d0-d1}, [r1,:128]! |
||||||
|
vld1.32 {d4-d5}, [r2,:128]! |
||||||
|
vmul.f32 q10, q0, q2 |
||||||
|
vld1.32 {d2-d3}, [r1,:128]! |
||||||
|
vld1.32 {d6-d7}, [r2,:128]! |
||||||
|
vmul.f32 q11, q1, q3 |
||||||
|
vst1.32 {d16-d19},[r0,:128]! |
||||||
|
vld1.32 {d0-d1}, [r1,:128]! |
||||||
|
vld1.32 {d4-d5}, [r2,:128]! |
||||||
|
vmul.f32 q8, q0, q2 |
||||||
|
vld1.32 {d2-d3}, [r1,:128]! |
||||||
|
vld1.32 {d6-d7}, [r2,:128]! |
||||||
|
vmul.f32 q9, q1, q3 |
||||||
|
vst1.32 {d20-d23},[r0,:128]! |
||||||
|
bne 1b |
||||||
|
ands r3, r3, #15 |
||||||
|
beq 3f |
||||||
|
2: vld1.32 {d0-d1}, [r1,:128]! |
||||||
|
vld1.32 {d4-d5}, [r2,:128]! |
||||||
|
vst1.32 {d16-d17},[r0,:128]! |
||||||
|
vmul.f32 q8, q0, q2 |
||||||
|
vld1.32 {d2-d3}, [r1,:128]! |
||||||
|
vld1.32 {d6-d7}, [r2,:128]! |
||||||
|
vst1.32 {d18-d19},[r0,:128]! |
||||||
|
vmul.f32 q9, q1, q3 |
||||||
|
3: vst1.32 {d16-d19},[r0,:128]! |
||||||
|
bx lr |
||||||
|
endfunc |
@ -0,0 +1,68 @@ |
|||||||
|
/* |
||||||
|
* Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
|
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "config.h" |
||||||
|
#include "asm.S" |
||||||
|
|
||||||
|
/** |
||||||
|
* Assume that len is a positive number and is multiple of 8 |
||||||
|
*/ |
||||||
|
@ void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1, int len)
|
||||||
|
function ff_vector_fmul_vfp, export=1 |
||||||
|
vpush {d8-d15} |
||||||
|
fmrx r12, fpscr |
||||||
|
orr r12, r12, #(3 << 16) /* set vector size to 4 */ |
||||||
|
fmxr fpscr, r12 |
||||||
|
|
||||||
|
vldmia r1!, {s0-s3} |
||||||
|
vldmia r2!, {s8-s11} |
||||||
|
vldmia r1!, {s4-s7} |
||||||
|
vldmia r2!, {s12-s15} |
||||||
|
vmul.f32 s8, s0, s8 |
||||||
|
1: |
||||||
|
subs r3, r3, #16 |
||||||
|
vmul.f32 s12, s4, s12 |
||||||
|
itttt ge |
||||||
|
vldmiage r1!, {s16-s19} |
||||||
|
vldmiage r2!, {s24-s27} |
||||||
|
vldmiage r1!, {s20-s23} |
||||||
|
vldmiage r2!, {s28-s31} |
||||||
|
it ge |
||||||
|
vmulge.f32 s24, s16, s24 |
||||||
|
vstmia r0!, {s8-s11} |
||||||
|
vstmia r0!, {s12-s15} |
||||||
|
it ge |
||||||
|
vmulge.f32 s28, s20, s28 |
||||||
|
itttt gt |
||||||
|
vldmiagt r1!, {s0-s3} |
||||||
|
vldmiagt r2!, {s8-s11} |
||||||
|
vldmiagt r1!, {s4-s7} |
||||||
|
vldmiagt r2!, {s12-s15} |
||||||
|
ittt ge |
||||||
|
vmulge.f32 s8, s0, s8 |
||||||
|
vstmiage r0!, {s24-s27} |
||||||
|
vstmiage r0!, {s28-s31} |
||||||
|
bgt 1b |
||||||
|
|
||||||
|
bic r12, r12, #(7 << 16) /* set vector size back to 1 */ |
||||||
|
fmxr fpscr, r12 |
||||||
|
vpop {d8-d15} |
||||||
|
bx lr |
||||||
|
endfunc |
@ -0,0 +1,42 @@ |
|||||||
|
/*
|
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "config.h" |
||||||
|
|
||||||
|
#include "float_dsp.h" |
||||||
|
|
||||||
|
static void vector_fmul_c(float *dst, const float *src0, const float *src1, |
||||||
|
int len) |
||||||
|
{ |
||||||
|
int i; |
||||||
|
for (i = 0; i < len; i++) |
||||||
|
dst[i] = src0[i] * src1[i]; |
||||||
|
} |
||||||
|
|
||||||
|
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) |
||||||
|
{ |
||||||
|
fdsp->vector_fmul = vector_fmul_c; |
||||||
|
|
||||||
|
#if ARCH_ARM |
||||||
|
ff_float_dsp_init_arm(fdsp); |
||||||
|
#elif ARCH_PPC |
||||||
|
ff_float_dsp_init_ppc(fdsp, bit_exact); |
||||||
|
#elif ARCH_X86 |
||||||
|
ff_float_dsp_init_x86(fdsp); |
||||||
|
#endif |
||||||
|
} |
@ -0,0 +1,53 @@ |
|||||||
|
/*
|
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef AVUTIL_FLOAT_DSP_H |
||||||
|
#define AVUTIL_FLOAT_DSP_H |
||||||
|
|
||||||
|
typedef struct AVFloatDSPContext { |
||||||
|
/**
|
||||||
|
* Calculate the product of two vectors of floats and store the result in |
||||||
|
* a vector of floats. |
||||||
|
* |
||||||
|
* @param dst output vector |
||||||
|
* constraints: 32-byte aligned |
||||||
|
* @param src0 first input vector |
||||||
|
* constraints: 32-byte aligned |
||||||
|
* @param src1 second input vector |
||||||
|
* constraints: 32-byte aligned |
||||||
|
* @param len number of elements in the input |
||||||
|
* constraints: multiple of 16 |
||||||
|
*/ |
||||||
|
void (*vector_fmul)(float *dst, const float *src0, const float *src1, |
||||||
|
int len); |
||||||
|
} AVFloatDSPContext; |
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize a float DSP context. |
||||||
|
* |
||||||
|
* @param fdsp float DSP context |
||||||
|
* @param strict setting to non-zero avoids using functions which may not be IEEE-754 compliant |
||||||
|
*/ |
||||||
|
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int strict); |
||||||
|
|
||||||
|
|
||||||
|
void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp); |
||||||
|
void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict); |
||||||
|
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp); |
||||||
|
|
||||||
|
#endif /* AVUTIL_FLOAT_DSP_H */ |
@ -1 +1,4 @@ |
|||||||
OBJS += ppc/cpu.o \
|
OBJS += ppc/cpu.o \
|
||||||
|
ppc/float_dsp_init.o \
|
||||||
|
|
||||||
|
ALTIVEC-OBJS += ppc/float_dsp_altivec.o \
|
||||||
|
@ -0,0 +1,37 @@ |
|||||||
|
/*
|
||||||
|
* Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org> |
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "util_altivec.h" |
||||||
|
|
||||||
|
void ff_vector_fmul_altivec(float *dst, const float *src0, const float *src1, |
||||||
|
int len) |
||||||
|
{ |
||||||
|
int i; |
||||||
|
vector float d0, d1, s, zero = (vector float)vec_splat_u32(0); |
||||||
|
for (i = 0; i < len - 7; i += 8) { |
||||||
|
d0 = vec_ld( 0, src0 + i); |
||||||
|
s = vec_ld( 0, src1 + i); |
||||||
|
d1 = vec_ld(16, src0 + i); |
||||||
|
d0 = vec_madd(d0, s, zero); |
||||||
|
d1 = vec_madd(d1, vec_ld(16, src1 + i), zero); |
||||||
|
vec_st(d0, 0, dst + i); |
||||||
|
vec_st(d1, 16, dst + i); |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,37 @@ |
|||||||
|
/*
|
||||||
|
* Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org> |
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "libavutil/cpu.h" |
||||||
|
#include "libavutil/float_dsp.h" |
||||||
|
|
||||||
|
extern void ff_vector_fmul_altivec(float *dst, const float *src0, |
||||||
|
const float *src1, int len); |
||||||
|
|
||||||
|
void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int bit_exact) |
||||||
|
{ |
||||||
|
#if HAVE_ALTIVEC |
||||||
|
int mm_flags = av_get_cpu_flags(); |
||||||
|
|
||||||
|
if (!(mm_flags & AV_CPU_FLAG_ALTIVEC)) |
||||||
|
return; |
||||||
|
|
||||||
|
fdsp->vector_fmul = ff_vector_fmul_altivec; |
||||||
|
#endif |
||||||
|
} |
@ -1 +1,4 @@ |
|||||||
OBJS += x86/cpu.o \
|
OBJS += x86/cpu.o \
|
||||||
|
x86/float_dsp_init.o \
|
||||||
|
|
||||||
|
YASM-OBJS += x86/float_dsp.o \
|
||||||
|
@ -0,0 +1,55 @@ |
|||||||
|
;***************************************************************************** |
||||||
|
;* x86-optimized Float DSP functions |
||||||
|
;* |
||||||
|
;* This file is part of Libav. |
||||||
|
;* |
||||||
|
;* Libav is free software; you can redistribute it and/or |
||||||
|
;* modify it under the terms of the GNU Lesser General Public |
||||||
|
;* License as published by the Free Software Foundation; either |
||||||
|
;* version 2.1 of the License, or (at your option) any later version. |
||||||
|
;* |
||||||
|
;* Libav is distributed in the hope that it will be useful, |
||||||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
;* Lesser General Public License for more details. |
||||||
|
;* |
||||||
|
;* You should have received a copy of the GNU Lesser General Public |
||||||
|
;* License along with Libav; if not, write to the Free Software |
||||||
|
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
;****************************************************************************** |
||||||
|
|
||||||
|
%include "x86inc.asm" |
||||||
|
|
||||||
|
SECTION .text |
||||||
|
|
||||||
|
;----------------------------------------------------------------------------- |
||||||
|
; void vector_fmul(float *dst, const float *src0, const float *src1, int len) |
||||||
|
;----------------------------------------------------------------------------- |
||||||
|
%macro VECTOR_FMUL 0 |
||||||
|
cglobal vector_fmul, 4,4,2, dst, src0, src1, len |
||||||
|
lea lenq, [lend*4 - 2*mmsize] |
||||||
|
ALIGN 16 |
||||||
|
.loop |
||||||
|
mova m0, [src0q + lenq] |
||||||
|
mova m1, [src0q + lenq + mmsize] |
||||||
|
mulps m0, m0, [src1q + lenq] |
||||||
|
mulps m1, m1, [src1q + lenq + mmsize] |
||||||
|
mova [dstq + lenq], m0 |
||||||
|
mova [dstq + lenq + mmsize], m1 |
||||||
|
|
||||||
|
sub lenq, 2*mmsize |
||||||
|
jge .loop |
||||||
|
%if mmsize == 32 |
||||||
|
vzeroupper |
||||||
|
RET |
||||||
|
%else |
||||||
|
REP_RET |
||||||
|
%endif |
||||||
|
%endmacro |
||||||
|
|
||||||
|
INIT_XMM sse |
||||||
|
VECTOR_FMUL |
||||||
|
%if HAVE_AVX |
||||||
|
INIT_YMM avx |
||||||
|
VECTOR_FMUL |
||||||
|
%endif |
@ -0,0 +1,41 @@ |
|||||||
|
/*
|
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "config.h" |
||||||
|
|
||||||
|
#include "libavutil/cpu.h" |
||||||
|
#include "libavutil/float_dsp.h" |
||||||
|
|
||||||
|
extern void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1, |
||||||
|
int len); |
||||||
|
extern void ff_vector_fmul_avx(float *dst, const float *src0, const float *src1, |
||||||
|
int len); |
||||||
|
|
||||||
|
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) |
||||||
|
{ |
||||||
|
#if HAVE_YASM |
||||||
|
int mm_flags = av_get_cpu_flags(); |
||||||
|
|
||||||
|
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { |
||||||
|
fdsp->vector_fmul = ff_vector_fmul_sse; |
||||||
|
} |
||||||
|
if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { |
||||||
|
fdsp->vector_fmul = ff_vector_fmul_avx; |
||||||
|
} |
||||||
|
#endif |
||||||
|
} |
Loading…
Reference in new issue