mirror of https://github.com/FFmpeg/FFmpeg.git
* commit 'fef906c77c09940a2fdad155b2adc05080e17eda': Move vorbis_inverse_coupling from dsputil to vorbisdspcontext. Conflicts: libavcodec/dsputil.c libavcodec/x86/dsputil_mmx.c Merged-by: Michael Niedermayer <michaelni@gmx.at>pull/8/head
commit
c62cb1112f
17 changed files with 358 additions and 167 deletions
@ -0,0 +1,36 @@ |
|||||||
|
/*
|
||||||
|
* ARM NEON optimised DSP functions |
||||||
|
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com> |
||||||
|
* |
||||||
|
* This file is part of FFmpeg. |
||||||
|
* |
||||||
|
* FFmpeg is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* FFmpeg is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with FFmpeg; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "libavutil/attributes.h" |
||||||
|
#include "libavutil/cpu.h" |
||||||
|
#include "libavutil/arm/cpu.h" |
||||||
|
#include "libavcodec/vorbisdsp.h" |
||||||
|
|
||||||
|
void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize); |
||||||
|
|
||||||
|
void ff_vorbisdsp_init_arm(VorbisDSPContext *c) |
||||||
|
{ |
||||||
|
int cpu_flags = av_get_cpu_flags(); |
||||||
|
|
||||||
|
if (have_neon(cpu_flags)) { |
||||||
|
c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon; |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,83 @@ |
|||||||
|
/* |
||||||
|
* ARM NEON optimised DSP functions |
||||||
|
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||||
|
* |
||||||
|
* This file is part of FFmpeg. |
||||||
|
* |
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* FFmpeg is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "libavutil/arm/asm.S" |
||||||
|
|
||||||
|
function ff_vorbis_inverse_coupling_neon, export=1 |
||||||
|
vmov.i32 q10, #1<<31 |
||||||
|
subs r2, r2, #4 |
||||||
|
mov r3, r0 |
||||||
|
mov r12, r1 |
||||||
|
beq 3f |
||||||
|
|
||||||
|
vld1.32 {d24-d25},[r1,:128]! |
||||||
|
vld1.32 {d22-d23},[r0,:128]! |
||||||
|
vcle.s32 q8, q12, #0 |
||||||
|
vand q9, q11, q10 |
||||||
|
veor q12, q12, q9 |
||||||
|
vand q2, q12, q8 |
||||||
|
vbic q3, q12, q8 |
||||||
|
vadd.f32 q12, q11, q2 |
||||||
|
vsub.f32 q11, q11, q3 |
||||||
|
1: vld1.32 {d2-d3}, [r1,:128]! |
||||||
|
vld1.32 {d0-d1}, [r0,:128]! |
||||||
|
vcle.s32 q8, q1, #0 |
||||||
|
vand q9, q0, q10 |
||||||
|
veor q1, q1, q9 |
||||||
|
vst1.32 {d24-d25},[r3, :128]! |
||||||
|
vst1.32 {d22-d23},[r12,:128]! |
||||||
|
vand q2, q1, q8 |
||||||
|
vbic q3, q1, q8 |
||||||
|
vadd.f32 q1, q0, q2 |
||||||
|
vsub.f32 q0, q0, q3 |
||||||
|
subs r2, r2, #8 |
||||||
|
ble 2f |
||||||
|
vld1.32 {d24-d25},[r1,:128]! |
||||||
|
vld1.32 {d22-d23},[r0,:128]! |
||||||
|
vcle.s32 q8, q12, #0 |
||||||
|
vand q9, q11, q10 |
||||||
|
veor q12, q12, q9 |
||||||
|
vst1.32 {d2-d3}, [r3, :128]! |
||||||
|
vst1.32 {d0-d1}, [r12,:128]! |
||||||
|
vand q2, q12, q8 |
||||||
|
vbic q3, q12, q8 |
||||||
|
vadd.f32 q12, q11, q2 |
||||||
|
vsub.f32 q11, q11, q3 |
||||||
|
b 1b |
||||||
|
|
||||||
|
2: vst1.32 {d2-d3}, [r3, :128]! |
||||||
|
vst1.32 {d0-d1}, [r12,:128]! |
||||||
|
it lt |
||||||
|
bxlt lr |
||||||
|
|
||||||
|
3: vld1.32 {d2-d3}, [r1,:128] |
||||||
|
vld1.32 {d0-d1}, [r0,:128] |
||||||
|
vcle.s32 q8, q1, #0 |
||||||
|
vand q9, q0, q10 |
||||||
|
veor q1, q1, q9 |
||||||
|
vand q2, q1, q8 |
||||||
|
vbic q3, q1, q8 |
||||||
|
vadd.f32 q1, q0, q2 |
||||||
|
vsub.f32 q0, q0, q3 |
||||||
|
vst1.32 {d2-d3}, [r0,:128]! |
||||||
|
vst1.32 {d0-d1}, [r1,:128]! |
||||||
|
bx lr |
||||||
|
endfunc |
@ -0,0 +1,62 @@ |
|||||||
|
/*
|
||||||
|
* Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org> |
||||||
|
* |
||||||
|
* This file is part of FFmpeg. |
||||||
|
* |
||||||
|
* FFmpeg is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* FFmpeg is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with FFmpeg; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "config.h" |
||||||
|
#if HAVE_ALTIVEC_H |
||||||
|
#include <altivec.h> |
||||||
|
#endif |
||||||
|
#include "libavutil/cpu.h" |
||||||
|
#include "libavutil/ppc/types_altivec.h" |
||||||
|
#include "libavutil/ppc/util_altivec.h" |
||||||
|
#include "libavcodec/vorbisdsp.h" |
||||||
|
|
||||||
|
#if HAVE_ALTIVEC |
||||||
|
static void vorbis_inverse_coupling_altivec(float *mag, float *ang, |
||||||
|
int blocksize) |
||||||
|
{ |
||||||
|
int i; |
||||||
|
vector float m, a; |
||||||
|
vector bool int t0, t1; |
||||||
|
const vector unsigned int v_31 = //XXX
|
||||||
|
vec_add(vec_add(vec_splat_u32(15),vec_splat_u32(15)),vec_splat_u32(1)); |
||||||
|
for (i = 0; i < blocksize; i += 4) { |
||||||
|
m = vec_ld(0, mag+i); |
||||||
|
a = vec_ld(0, ang+i); |
||||||
|
t0 = vec_cmple(m, (vector float)vec_splat_u32(0)); |
||||||
|
t1 = vec_cmple(a, (vector float)vec_splat_u32(0)); |
||||||
|
a = vec_xor(a, (vector float) vec_sl((vector unsigned int)t0, v_31)); |
||||||
|
t0 = (vector bool int)vec_and(a, t1); |
||||||
|
t1 = (vector bool int)vec_andc(a, t1); |
||||||
|
a = vec_sub(m, (vector float)t1); |
||||||
|
m = vec_add(m, (vector float)t0); |
||||||
|
vec_stl(a, 0, ang+i); |
||||||
|
vec_stl(m, 0, mag+i); |
||||||
|
} |
||||||
|
} |
||||||
|
#endif /* HAVE_ALTIVEC */ |
||||||
|
|
||||||
|
void ff_vorbisdsp_init_ppc(VorbisDSPContext* c) |
||||||
|
{ |
||||||
|
#if HAVE_ALTIVEC |
||||||
|
if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { |
||||||
|
c->vorbis_inverse_coupling = vorbis_inverse_coupling_altivec; |
||||||
|
} |
||||||
|
#endif /* HAVE_ALTIVEC */ |
||||||
|
} |
@ -0,0 +1,33 @@ |
|||||||
|
/*
|
||||||
|
* This file is part of FFmpeg. |
||||||
|
* |
||||||
|
* FFmpeg is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* FFmpeg is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with FFmpeg; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "config.h" |
||||||
|
#include "vorbisdsp.h" |
||||||
|
#include "vorbis.h" |
||||||
|
|
||||||
|
void ff_vorbisdsp_init(VorbisDSPContext *dsp) |
||||||
|
{ |
||||||
|
dsp->vorbis_inverse_coupling = ff_vorbis_inverse_coupling; |
||||||
|
|
||||||
|
if (ARCH_X86) |
||||||
|
ff_vorbisdsp_init_x86(dsp); |
||||||
|
if (ARCH_PPC) |
||||||
|
ff_vorbisdsp_init_ppc(dsp); |
||||||
|
if (ARCH_ARM) |
||||||
|
ff_vorbisdsp_init_arm(dsp); |
||||||
|
} |
@ -0,0 +1,34 @@ |
|||||||
|
/*
|
||||||
|
* This file is part of FFmpeg. |
||||||
|
* |
||||||
|
* FFmpeg is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* FFmpeg is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with FFmpeg; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef AVCODEC_VORBISDSP_H |
||||||
|
#define AVCODEC_VORBISDSP_H |
||||||
|
|
||||||
|
typedef struct VorbisDSPContext { |
||||||
|
/* assume len is a multiple of 4, and arrays are 16-byte aligned */ |
||||||
|
void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); |
||||||
|
} VorbisDSPContext; |
||||||
|
|
||||||
|
void ff_vorbisdsp_init(VorbisDSPContext *dsp); |
||||||
|
|
||||||
|
/* for internal use only */ |
||||||
|
void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp); |
||||||
|
void ff_vorbisdsp_init_arm(VorbisDSPContext *dsp); |
||||||
|
void ff_vorbisdsp_init_ppc(VorbisDSPContext *dsp); |
||||||
|
|
||||||
|
#endif /* AVCODEC_VORBISDSP_H */ |
@ -0,0 +1,101 @@ |
|||||||
|
/*
|
||||||
|
* Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu> |
||||||
|
* |
||||||
|
* This file is part of FFmpeg. |
||||||
|
* |
||||||
|
* FFmpeg is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* FFmpeg is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with FFmpeg; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "config.h" |
||||||
|
#include "libavutil/cpu.h" |
||||||
|
#include "libavcodec/vorbisdsp.h" |
||||||
|
#include "dsputil_mmx.h" // for ff_pdw_80000000 |
||||||
|
|
||||||
|
#if HAVE_INLINE_ASM |
||||||
|
#if ARCH_X86_32 |
||||||
|
static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize) |
||||||
|
{ |
||||||
|
int i; |
||||||
|
__asm__ volatile ("pxor %%mm7, %%mm7":); |
||||||
|
for (i = 0; i < blocksize; i += 2) { |
||||||
|
__asm__ volatile ( |
||||||
|
"movq %0, %%mm0 \n\t" |
||||||
|
"movq %1, %%mm1 \n\t" |
||||||
|
"movq %%mm0, %%mm2 \n\t" |
||||||
|
"movq %%mm1, %%mm3 \n\t" |
||||||
|
"pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
|
||||||
|
"pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
|
||||||
|
"pslld $31, %%mm2 \n\t" // keep only the sign bit
|
||||||
|
"pxor %%mm2, %%mm1 \n\t" |
||||||
|
"movq %%mm3, %%mm4 \n\t" |
||||||
|
"pand %%mm1, %%mm3 \n\t" |
||||||
|
"pandn %%mm1, %%mm4 \n\t" |
||||||
|
"pfadd %%mm0, %%mm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
|
||||||
|
"pfsub %%mm4, %%mm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
|
||||||
|
"movq %%mm3, %1 \n\t" |
||||||
|
"movq %%mm0, %0 \n\t" |
||||||
|
: "+m"(mag[i]), "+m"(ang[i]) |
||||||
|
:: "memory" |
||||||
|
); |
||||||
|
} |
||||||
|
__asm__ volatile ("femms"); |
||||||
|
} |
||||||
|
#endif |
||||||
|
|
||||||
|
static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize) |
||||||
|
{ |
||||||
|
int i; |
||||||
|
|
||||||
|
__asm__ volatile ( |
||||||
|
"movaps %0, %%xmm5 \n\t" |
||||||
|
:: "m"(ff_pdw_80000000[0]) |
||||||
|
); |
||||||
|
for (i = 0; i < blocksize; i += 4) { |
||||||
|
__asm__ volatile ( |
||||||
|
"movaps %0, %%xmm0 \n\t" |
||||||
|
"movaps %1, %%xmm1 \n\t" |
||||||
|
"xorps %%xmm2, %%xmm2 \n\t" |
||||||
|
"xorps %%xmm3, %%xmm3 \n\t" |
||||||
|
"cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
|
||||||
|
"cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
|
||||||
|
"andps %%xmm5, %%xmm2 \n\t" // keep only the sign bit
|
||||||
|
"xorps %%xmm2, %%xmm1 \n\t" |
||||||
|
"movaps %%xmm3, %%xmm4 \n\t" |
||||||
|
"andps %%xmm1, %%xmm3 \n\t" |
||||||
|
"andnps %%xmm1, %%xmm4 \n\t" |
||||||
|
"addps %%xmm0, %%xmm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
|
||||||
|
"subps %%xmm4, %%xmm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
|
||||||
|
"movaps %%xmm3, %1 \n\t" |
||||||
|
"movaps %%xmm0, %0 \n\t" |
||||||
|
: "+m"(mag[i]), "+m"(ang[i]) |
||||||
|
:: "memory" |
||||||
|
); |
||||||
|
} |
||||||
|
} |
||||||
|
#endif |
||||||
|
|
||||||
|
void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp) |
||||||
|
{ |
||||||
|
#if HAVE_INLINE_ASM |
||||||
|
int mm_flags = av_get_cpu_flags(); |
||||||
|
|
||||||
|
#if ARCH_X86_32 |
||||||
|
if (mm_flags & AV_CPU_FLAG_3DNOW) |
||||||
|
dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; |
||||||
|
#endif /* ARCH_X86_32 */ |
||||||
|
if (mm_flags & AV_CPU_FLAG_SSE) |
||||||
|
dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; |
||||||
|
#endif /* HAVE_INLINE_ASM */ |
||||||
|
} |
Loading…
Reference in new issue