Merge remote-tracking branch 'qatar/master'

* qatar/master:
  dcadsp: split lfe_dir cases

Conflicts:
	libavcodec/arm/dcadsp_init_arm.c

See: 45854df9a5
Merged-by: Michael Niedermayer <michaelni@gmx.at>
pull/43/merge
Michael Niedermayer 11 years ago
commit 5794e9fce2
  1. 46
      libavcodec/arm/dcadsp_init_arm.c
  2. 18
      libavcodec/arm/dcadsp_neon.S
  3. 32
      libavcodec/arm/dcadsp_vfp.S

@ -24,16 +24,22 @@
#include "libavutil/attributes.h"
#include "libavcodec/dcadsp.h"
void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs,
int decifactor, float scale);
void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs,
float scale);
void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs,
float scale);
void ff_dca_lfe_fir32_vfp(float *out, const float *in, const float *coefs,
float scale);
void ff_dca_lfe_fir64_vfp(float *out, const float *in, const float *coefs,
float scale);
void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act,
SynthFilterContext *synth, FFTContext *imdct,
float synth_buf_ptr[512],
int *synth_buf_offset, float synth_buf2[32],
const float window[512], float *samples_out,
float raXin[32], float scale);
void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs,
int decifactor, float scale);
void ff_synth_filter_float_vfp(FFTContext *imdct,
float *synth_buf_ptr, int *synth_buf_offset,
@ -47,42 +53,18 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
float out[32], const float in[32],
float scale);
static void lfe_fir0_vfp(float *out, const float *in, const float *coefs,
float scale)
{
ff_dca_lfe_fir_vfp(out, in, coefs, 32, scale);
}
static void lfe_fir1_vfp(float *out, const float *in, const float *coefs,
float scale)
{
ff_dca_lfe_fir_vfp(out, in, coefs, 64, scale);
}
static void lfe_fir0_neon(float *out, const float *in, const float *coefs,
float scale)
{
ff_dca_lfe_fir_neon(out, in, coefs, 32, scale);
}
static void lfe_fir1_neon(float *out, const float *in, const float *coefs,
float scale)
{
ff_dca_lfe_fir_neon(out, in, coefs, 64, scale);
}
av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
{
int cpu_flags = av_get_cpu_flags();
if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) {
s->lfe_fir[0] = lfe_fir0_vfp;
s->lfe_fir[1] = lfe_fir1_vfp;
s->lfe_fir[0] = ff_dca_lfe_fir32_vfp;
s->lfe_fir[1] = ff_dca_lfe_fir64_vfp;
s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp;
}
if (have_neon(cpu_flags)) {
s->lfe_fir[0] = lfe_fir0_neon;
s->lfe_fir[1] = lfe_fir1_neon;
s->lfe_fir[0] = ff_dca_lfe_fir0_neon;
s->lfe_fir[1] = ff_dca_lfe_fir1_neon;
}
}

@ -20,17 +20,23 @@
#include "libavutil/arm/asm.S"
function ff_dca_lfe_fir_neon, export=1
function ff_dca_lfe_fir0_neon, export=1
push {r4-r6,lr}
NOVFP vmov s0, r3 @ scale
mov r3, #32 @ decifactor
mov r6, #256/32
b dca_lfe_fir
endfunc
function ff_dca_lfe_fir1_neon, export=1
push {r4-r6,lr}
NOVFP vmov s0, r3 @ scale
mov r3, #64 @ decifactor
mov r6, #256/64
dca_lfe_fir:
add r4, r0, r3, lsl #2 @ out2
add r5, r2, #256*4-16 @ cf1
sub r1, r1, #12
cmp r3, #32
ite eq
moveq r6, #256/32
movne r6, #256/64
NOVFP vldr s0, [sp, #16] @ scale
mov lr, #-16
1:
vmov.f32 q2, #0.0 @ v0

@ -24,7 +24,6 @@
POUT .req a1
PIN .req a2
PCOEF .req a3
DECIFACTOR .req a4
OLDFPSCR .req a4
COUNTER .req ip
@ -129,6 +128,15 @@ POST3 .req s27
.endm
.macro dca_lfe_fir decifactor
function ff_dca_lfe_fir\decifactor\()_vfp, export=1
NOVFP vmov s0, r3
fmrx OLDFPSCR, FPSCR
ldr ip, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
fmxr FPSCR, ip
vldr IN0, [PIN, #-0*4]
vldr IN1, [PIN, #-1*4]
vldr IN2, [PIN, #-2*4]
vldr IN3, [PIN, #-3*4]
.if \decifactor == 32
.set JMAX, 8
vpush {s16-s31}
@ -165,32 +173,16 @@ POST3 .req s27
.endif
fmxr FPSCR, OLDFPSCR
bx lr
endfunc
.endm
/* void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs,
* int decifactor, float scale)
*/
function ff_dca_lfe_fir_vfp, export=1
teq DECIFACTOR, #32
fmrx OLDFPSCR, FPSCR
ldr ip, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
fmxr FPSCR, ip
NOVFP vldr s0, [sp]
vldr IN0, [PIN, #-0*4]
vldr IN1, [PIN, #-1*4]
vldr IN2, [PIN, #-2*4]
vldr IN3, [PIN, #-3*4]
beq 32f
64: dca_lfe_fir 64
dca_lfe_fir 64
.ltorg
32: dca_lfe_fir 32
endfunc
dca_lfe_fir 32
.unreq POUT
.unreq PIN
.unreq PCOEF
.unreq DECIFACTOR
.unreq OLDFPSCR
.unreq COUNTER

Loading…
Cancel
Save