mirror of https://github.com/FFmpeg/FFmpeg.git
* qatar/master: aacenc: Fix LONG_START windowing. aacenc: Fix a bug where deinterleaved samples were stored in the wrong place. avplay: use the correct array size for stride. lavc: extend doxy for avcodec_alloc_context3(). APIchanges: mention avcodec_alloc_context()/2/3 avcodec_align_dimensions2: set only 4 linesizes, not AV_NUM_DATA_POINTERS. aacsbr: ARM NEON optimised sbrdsp functions aacsbr: align some arrays aacsbr: move some simdable loops to function pointers cosmetics: Remove extra newlines at EOF Conflicts: libavcodec/utils.c libavfilter/formats.c libavutil/mem.c Merged-by: Michael Niedermayer <michaelni@gmx.at>pull/3/merge
commit
c065255bba
104 changed files with 851 additions and 240 deletions
@ -0,0 +1,70 @@ |
||||
/*
|
||||
* Copyright (c) 2012 Mans Rullgard |
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
#include "libavutil/attributes.h" |
||||
#include "libavcodec/sbrdsp.h" |
||||
|
||||
void ff_sbr_sum64x5_neon(float *z); |
||||
float ff_sbr_sum_square_neon(float (*x)[2], int n); |
||||
void ff_sbr_neg_odd_64_neon(float *x); |
||||
void ff_sbr_qmf_pre_shuffle_neon(float *z); |
||||
void ff_sbr_qmf_post_shuffle_neon(float W[32][2], const float *z); |
||||
void ff_sbr_qmf_deint_neg_neon(float *v, const float *src); |
||||
void ff_sbr_qmf_deint_bfly_neon(float *v, const float *src0, const float *src1); |
||||
void ff_sbr_hf_g_filt_neon(float (*Y)[2], const float (*X_high)[40][2], |
||||
const float *g_filt, int m_max, int ixh); |
||||
void ff_sbr_hf_gen_neon(float (*X_high)[2], const float (*X_low)[2], |
||||
const float alpha0[2], const float alpha1[2], |
||||
float bw, int start, int end); |
||||
void ff_sbr_autocorrelate_neon(const float x[40][2], float phi[3][2][2]); |
||||
|
||||
void ff_sbr_hf_apply_noise_0_neon(float Y[64][2], const float *s_m, |
||||
const float *q_filt, int noise, |
||||
int kx, int m_max); |
||||
void ff_sbr_hf_apply_noise_1_neon(float Y[64][2], const float *s_m, |
||||
const float *q_filt, int noise, |
||||
int kx, int m_max); |
||||
void ff_sbr_hf_apply_noise_2_neon(float Y[64][2], const float *s_m, |
||||
const float *q_filt, int noise, |
||||
int kx, int m_max); |
||||
void ff_sbr_hf_apply_noise_3_neon(float Y[64][2], const float *s_m, |
||||
const float *q_filt, int noise, |
||||
int kx, int m_max); |
||||
|
||||
av_cold void ff_sbrdsp_init_arm(SBRDSPContext *s) |
||||
{ |
||||
if (HAVE_NEON) { |
||||
s->sum64x5 = ff_sbr_sum64x5_neon; |
||||
s->sum_square = ff_sbr_sum_square_neon; |
||||
s->neg_odd_64 = ff_sbr_neg_odd_64_neon; |
||||
s->qmf_pre_shuffle = ff_sbr_qmf_pre_shuffle_neon; |
||||
s->qmf_post_shuffle = ff_sbr_qmf_post_shuffle_neon; |
||||
s->qmf_deint_neg = ff_sbr_qmf_deint_neg_neon; |
||||
s->qmf_deint_bfly = ff_sbr_qmf_deint_bfly_neon; |
||||
s->hf_g_filt = ff_sbr_hf_g_filt_neon; |
||||
s->hf_gen = ff_sbr_hf_gen_neon; |
||||
s->autocorrelate = ff_sbr_autocorrelate_neon; |
||||
s->hf_apply_noise[0] = ff_sbr_hf_apply_noise_0_neon; |
||||
s->hf_apply_noise[1] = ff_sbr_hf_apply_noise_1_neon; |
||||
s->hf_apply_noise[2] = ff_sbr_hf_apply_noise_2_neon; |
||||
s->hf_apply_noise[3] = ff_sbr_hf_apply_noise_3_neon; |
||||
} |
||||
} |
@ -0,0 +1,411 @@ |
||||
/* |
||||
* Copyright (c) 2012 Mans Rullgard |
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "asm.S" |
||||
|
||||
function ff_sbr_sum64x5_neon, export=1 |
||||
push {lr} |
||||
add r1, r0, # 64*4 |
||||
add r2, r0, #128*4 |
||||
add r3, r0, #192*4 |
||||
add lr, r0, #256*4 |
||||
mov r12, #64 |
||||
1: |
||||
vld1.32 {q0}, [r0,:128] |
||||
vld1.32 {q1}, [r1,:128]! |
||||
vadd.f32 q0, q0, q1 |
||||
vld1.32 {q2}, [r2,:128]! |
||||
vadd.f32 q0, q0, q2 |
||||
vld1.32 {q3}, [r3,:128]! |
||||
vadd.f32 q0, q0, q3 |
||||
vld1.32 {q8}, [lr,:128]! |
||||
vadd.f32 q0, q0, q8 |
||||
vst1.32 {q0}, [r0,:128]! |
||||
subs r12, #4 |
||||
bgt 1b |
||||
pop {pc} |
||||
endfunc |
||||
|
||||
function ff_sbr_sum_square_neon, export=1 |
||||
vmov.f32 q0, #0.0 |
||||
1: |
||||
vld1.32 {q1}, [r0,:128]! |
||||
vmla.f32 q0, q1, q1 |
||||
subs r1, r1, #2 |
||||
bgt 1b |
||||
vadd.f32 d0, d0, d1 |
||||
vpadd.f32 d0, d0, d0 |
||||
NOVFP vmov.32 r0, d0[0] |
||||
bx lr |
||||
endfunc |
||||
|
||||
function ff_sbr_neg_odd_64_neon, export=1 |
||||
mov r1, r0 |
||||
vmov.i32 q8, #1<<31 |
||||
vld2.32 {q0,q1}, [r0,:128]! |
||||
veor q1, q1, q8 |
||||
vld2.32 {q2,q3}, [r0,:128]! |
||||
.rept 3
|
||||
vst2.32 {q0,q1}, [r1,:128]! |
||||
veor q3, q3, q8 |
||||
vld2.32 {q0,q1}, [r0,:128]! |
||||
vst2.32 {q2,q3}, [r1,:128]! |
||||
veor q1, q1, q8 |
||||
vld2.32 {q2,q3}, [r0,:128]! |
||||
.endr |
||||
veor q3, q3, q8 |
||||
vst2.32 {q0,q1}, [r1,:128]! |
||||
vst2.32 {q2,q3}, [r1,:128]! |
||||
bx lr |
||||
endfunc |
||||
|
||||
function ff_sbr_qmf_pre_shuffle_neon, export=1 |
||||
add r1, r0, #60*4 |
||||
add r2, r0, #64*4 |
||||
vld1.32 {d0}, [r0,:64]! |
||||
vst1.32 {d0}, [r2,:64]! |
||||
mov r3, #-16 |
||||
mov r12, #24 |
||||
vmov.i32 q8, #1<<31 |
||||
vld1.32 {q0}, [r1,:128], r3 |
||||
vld1.32 {d2}, [r0,:64]! |
||||
1: |
||||
vld1.32 {d3,d4}, [r0,:128]! |
||||
vrev64.32 q0, q0 |
||||
vld1.32 {q9}, [r1,:128], r3 |
||||
veor q0, q0, q8 |
||||
vld1.32 {d5,d6}, [r0,:128]! |
||||
vswp d0, d1 |
||||
vrev64.32 q9, q9 |
||||
vst2.32 {q0,q1}, [r2,:64]! |
||||
vmov q10, q2 |
||||
veor q9, q9, q8 |
||||
vmov d2, d6 |
||||
vswp d18, d19 |
||||
vld1.32 {q0}, [r1,:128], r3 |
||||
vst2.32 {q9,q10}, [r2,:64]! |
||||
subs r12, r12, #8 |
||||
bgt 1b |
||||
vld1.32 {d3,d4}, [r0,:128]! |
||||
vrev64.32 q0, q0 |
||||
vld1.32 {q9}, [r1,:128], r3 |
||||
veor q0, q0, q8 |
||||
vld1.32 {d5}, [r0,:64]! |
||||
vswp d0, d1 |
||||
vrev64.32 q9, q9 |
||||
vst2.32 {q0,q1}, [r2,:64]! |
||||
vswp d4, d5 |
||||
veor q1, q9, q8 |
||||
vst2.32 {d3,d5}, [r2,:64]! |
||||
vst2.32 {d2[0],d4[0]}, [r2,:64]! |
||||
bx lr |
||||
endfunc |
||||
|
||||
function ff_sbr_qmf_post_shuffle_neon, export=1 |
||||
add r2, r1, #60*4 |
||||
mov r3, #-16 |
||||
mov r12, #32 |
||||
vmov.i32 q8, #1<<31 |
||||
vld1.32 {q0}, [r2,:128], r3 |
||||
vld1.32 {q1}, [r1,:128]! |
||||
1: |
||||
pld [r2, #-32] |
||||
vrev64.32 q0, q0 |
||||
vswp d2, d3 |
||||
veor q0, q0, q8 |
||||
vld1.32 {q2}, [r2,:128], r3 |
||||
vld1.32 {q3}, [r1,:128]! |
||||
vst2.32 {d1,d3}, [r0,:128]! |
||||
vst2.32 {d0,d2}, [r0,:128]! |
||||
pld [r2, #-32] |
||||
vrev64.32 q2, q2 |
||||
vswp d6, d7 |
||||
veor q2, q2, q8 |
||||
vld1.32 {q0}, [r2,:128], r3 |
||||
vld1.32 {q1}, [r1,:128]! |
||||
vst2.32 {d5,d7}, [r0,:128]! |
||||
vst2.32 {d4,d6}, [r0,:128]! |
||||
subs r12, r12, #8 |
||||
bgt 1b |
||||
bx lr |
||||
endfunc |
||||
|
||||
function ff_sbr_qmf_deint_neg_neon, export=1 |
||||
add r1, r1, #60*4 |
||||
add r2, r0, #62*4 |
||||
mov r3, #-16 |
||||
mov r12, #32 |
||||
vmov.i32 d2, #1<<31 |
||||
1: |
||||
vld2.32 {d0,d1}, [r1,:128], r3 |
||||
veor d0, d0, d2 |
||||
vrev64.32 d1, d1 |
||||
vst1.32 {d0}, [r2,:64] |
||||
vst1.32 {d1}, [r0,:64]! |
||||
sub r2, r2, #8 |
||||
subs r12, r12, #2 |
||||
bgt 1b |
||||
bx lr |
||||
endfunc |
||||
|
||||
function ff_sbr_qmf_deint_bfly_neon, export=1 |
||||
push {lr} |
||||
add r2, r2, #60*4 |
||||
add r3, r0, #124*4 |
||||
mov r12, #64 |
||||
mov lr, #-16 |
||||
1: |
||||
vld1.32 {q0}, [r1,:128]! |
||||
vld1.32 {q1}, [r2,:128], lr |
||||
vrev64.32 q2, q0 |
||||
vrev64.32 q3, q1 |
||||
vadd.f32 d3, d4, d3 |
||||
vadd.f32 d2, d5, d2 |
||||
vsub.f32 d0, d0, d7 |
||||
vsub.f32 d1, d1, d6 |
||||
vst1.32 {q1}, [r3,:128], lr |
||||
vst1.32 {q0}, [r0,:128]! |
||||
subs r12, r12, #4 |
||||
bgt 1b |
||||
pop {pc} |
||||
endfunc |
||||
|
||||
function ff_sbr_hf_g_filt_neon, export=1 |
||||
ldr r12, [sp] |
||||
add r1, r1, r12, lsl #3 |
||||
mov r12, #40*2*4 |
||||
sub r3, r3, #1 |
||||
vld2.32 {d2[],d3[]},[r2,:64]! |
||||
vld1.32 {d0}, [r1,:64], r12 |
||||
1: |
||||
vld1.32 {d1}, [r1,:64], r12 |
||||
vmul.f32 q3, q0, q1 |
||||
vld2.32 {d2[],d3[]},[r2,:64]! |
||||
vld1.32 {d0}, [r1,:64], r12 |
||||
vst1.32 {q3}, [r0,:64]! |
||||
subs r3, r3, #2 |
||||
bgt 1b |
||||
it lt |
||||
bxlt lr |
||||
vmul.f32 d0, d0, d2 |
||||
vst1.32 {d0}, [r0,:64]! |
||||
bx lr |
||||
endfunc |
||||
|
||||
function ff_sbr_hf_gen_neon, export=1 |
||||
NOVFP vld1.32 {d1[]}, [sp,:32] |
||||
VFP vdup.32 d1, d0[0] |
||||
vmul.f32 d0, d1, d1 |
||||
vld1.32 {d3}, [r2,:64] |
||||
vld1.32 {d2}, [r3,:64] |
||||
vmul.f32 q0, q0, q1 |
||||
ldrd r2, r3, [sp, #4*!HAVE_VFP_ARGS] |
||||
vtrn.32 d0, d1 |
||||
vneg.f32 d18, d1 |
||||
vtrn.32 d18, d1 |
||||
add r0, r0, r2, lsl #3 |
||||
add r1, r1, r2, lsl #3 |
||||
sub r1, r1, #2*8 |
||||
sub r3, r3, r2 |
||||
vld1.32 {q1}, [r1,:128]! |
||||
1: |
||||
vld1.32 {q3}, [r1,:128]! |
||||
vrev64.32 q2, q1 |
||||
vmov q8, q3 |
||||
vrev64.32 d20, d3 |
||||
vrev64.32 d21, d6 |
||||
vmla.f32 q3, q1, d0[0] |
||||
vmla.f32 d6, d4, d18 |
||||
vmla.f32 d7, d20, d18 |
||||
vmla.f32 d6, d3, d0[1] |
||||
vmla.f32 d7, d16, d0[1] |
||||
vmla.f32 d6, d5, d1 |
||||
vmla.f32 d7, d21, d1 |
||||
vmov q1, q8 |
||||
vst1.32 {q3}, [r0,:128]! |
||||
subs r3, r3, #2 |
||||
bgt 1b |
||||
bx lr |
||||
endfunc |
||||
|
||||
function ff_sbr_autocorrelate_neon, export=1 |
||||
vld1.32 {q0}, [r0,:128]! |
||||
vmov.f32 q1, #0.0 |
||||
vmov.f32 q3, #0.0 |
||||
vmov.f32 d20, #0.0 |
||||
vmul.f32 d21, d1, d1 |
||||
vmov q8, q0 |
||||
vmov q11, q0 |
||||
mov r12, #36 |
||||
1: |
||||
vld1.32 {q2}, [r0,:128]! |
||||
vrev64.32 q12, q2 |
||||
vmla.f32 q10, q2, q2 |
||||
vmla.f32 d2, d1, d4 |
||||
vmla.f32 d3, d1, d24 |
||||
vmla.f32 d6, d0, d4 |
||||
vmla.f32 d7, d0, d24 |
||||
vmla.f32 d2, d4, d5 |
||||
vmla.f32 d3, d4, d25 |
||||
vmla.f32 d6, d1, d5 |
||||
vmla.f32 d7, d1, d25 |
||||
vmov q0, q2 |
||||
subs r12, r12, #2 |
||||
bgt 1b |
||||
vld1.32 {q2}, [r0,:128]! |
||||
vrev64.32 q12, q2 |
||||
vmla.f32 d2, d1, d4 |
||||
vmla.f32 d3, d1, d24 |
||||
vmla.f32 d6, d0, d4 |
||||
vmla.f32 d7, d0, d24 |
||||
vadd.f32 d20, d20, d21 |
||||
vrev64.32 d18, d17 |
||||
vmla.f32 d6, d1, d5 |
||||
vmla.f32 d7, d1, d25 |
||||
vmov q0, q1 |
||||
vmla.f32 d0, d16, d17 |
||||
vmla.f32 d1, d16, d18 |
||||
vmla.f32 d2, d4, d5 |
||||
vmla.f32 d3, d4, d25 |
||||
vneg.f32 s15, s15 |
||||
vmov d21, d20 |
||||
vpadd.f32 d0, d0, d2 |
||||
vpadd.f32 d7, d6, d7 |
||||
vtrn.32 d1, d3 |
||||
vsub.f32 d6, d1, d3 |
||||
vmla.f32 d20, d22, d22 |
||||
vmla.f32 d21, d4, d4 |
||||
vtrn.32 d0, d6 |
||||
vpadd.f32 d20, d20, d21 |
||||
vst1.32 {q3}, [r1,:128]! |
||||
vst1.32 {d20[1]}, [r1,:32] |
||||
add r1, r1, #2*4 |
||||
vst1.32 {d0}, [r1,:64] |
||||
add r1, r1, #4*4 |
||||
vst1.32 {d20[0]}, [r1,:32] |
||||
bx lr |
||||
endfunc |
||||
|
||||
function ff_sbr_hf_apply_noise_0_neon, export=1 |
||||
vmov.i32 d3, #0 |
||||
.Lhf_apply_noise_0: |
||||
push {r4,lr} |
||||
ldr r12, [sp, #12] |
||||
movrel r4, X(ff_sbr_noise_table) |
||||
add r3, r3, #1 |
||||
bfc r3, #9, #23 |
||||
sub r12, r12, #1 |
||||
1: |
||||
add lr, r4, r3, lsl #3 |
||||
vld2.32 {q0}, [r0,:64] |
||||
vld2.32 {q3}, [lr,:64] |
||||
vld1.32 {d2}, [r1,:64]! |
||||
vld1.32 {d18}, [r2,:64]! |
||||
vceq.f32 d16, d2, #0 |
||||
veor d2, d2, d3 |
||||
vmov q2, q0 |
||||
vmla.f32 d0, d6, d18 |
||||
vmla.f32 d1, d7, d18 |
||||
vadd.f32 d4, d4, d2 |
||||
add r3, r3, #2 |
||||
bfc r3, #9, #23 |
||||
vbif d0, d4, d16 |
||||
vbif d1, d5, d16 |
||||
vst2.32 {q0}, [r0,:64]! |
||||
subs r12, r12, #2 |
||||
bgt 1b |
||||
blt 2f |
||||
add lr, r4, r3, lsl #3 |
||||
vld1.32 {d0}, [r0,:64] |
||||
vld1.32 {d6}, [lr,:64] |
||||
vld1.32 {d2[]}, [r1,:32]! |
||||
vld1.32 {d3[]}, [r2,:32]! |
||||
vceq.f32 d4, d2, #0 |
||||
veor d2, d2, d3 |
||||
vmov d1, d0 |
||||
vmla.f32 d0, d6, d3 |
||||
vadd.f32 s2, s2, s4 |
||||
vbif d0, d1, d4 |
||||
vst1.32 {d0}, [r0,:64]! |
||||
2: |
||||
pop {r4,pc} |
||||
endfunc |
||||
|
||||
function ff_sbr_hf_apply_noise_1_neon, export=1 |
||||
ldr r12, [sp] |
||||
push {r4,lr} |
||||
lsl r12, r12, #31 |
||||
eor lr, r12, #1<<31 |
||||
vmov d3, r12, lr |
||||
.Lhf_apply_noise_1: |
||||
ldr r12, [sp, #12] |
||||
movrel r4, X(ff_sbr_noise_table) |
||||
add r3, r3, #1 |
||||
bfc r3, #9, #23 |
||||
sub r12, r12, #1 |
||||
1: |
||||
add lr, r4, r3, lsl #3 |
||||
vld2.32 {q0}, [r0,:64] |
||||
vld2.32 {q3}, [lr,:64] |
||||
vld1.32 {d2}, [r1,:64]! |
||||
vld1.32 {d18}, [r2,:64]! |
||||
vceq.f32 d16, d2, #0 |
||||
veor d2, d2, d3 |
||||
vmov q2, q0 |
||||
vmla.f32 d0, d6, d18 |
||||
vmla.f32 d1, d7, d18 |
||||
vadd.f32 d5, d5, d2 |
||||
add r3, r3, #2 |
||||
bfc r3, #9, #23 |
||||
vbif d0, d4, d16 |
||||
vbif d1, d5, d16 |
||||
vst2.32 {q0}, [r0,:64]! |
||||
subs r12, r12, #2 |
||||
bgt 1b |
||||
blt 2f |
||||
add lr, r4, r3, lsl #3 |
||||
vld1.32 {d0}, [r0,:64] |
||||
vld1.32 {d6}, [lr,:64] |
||||
vld1.32 {d2[]}, [r1,:32]! |
||||
vld1.32 {d18[]}, [r2,:32]! |
||||
vceq.f32 d4, d2, #0 |
||||
veor d2, d2, d3 |
||||
vmov d1, d0 |
||||
vmla.f32 d0, d6, d18 |
||||
vadd.f32 s3, s3, s5 |
||||
vbif d0, d1, d4 |
||||
vst1.32 {d0}, [r0,:64]! |
||||
2: |
||||
pop {r4,pc} |
||||
endfunc |
||||
|
||||
function ff_sbr_hf_apply_noise_2_neon, export=1 |
||||
vmov.i32 d3, #1<<31 |
||||
b .Lhf_apply_noise_0 |
||||
endfunc |
||||
|
||||
function ff_sbr_hf_apply_noise_3_neon, export=1 |
||||
ldr r12, [sp] |
||||
push {r4,lr} |
||||
lsl r12, r12, #31 |
||||
eor lr, r12, #1<<31 |
||||
vmov d3, lr, r12 |
||||
b .Lhf_apply_noise_1 |
||||
endfunc |
@ -0,0 +1,241 @@ |
||||
/*
|
||||
* AAC Spectral Band Replication decoding functions |
||||
* Copyright (c) 2008-2009 Robert Swain ( rob opendot cl ) |
||||
* Copyright (c) 2009-2010 Alex Converse <alex.converse@gmail.com> |
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
#include "libavutil/attributes.h" |
||||
#include "sbrdsp.h" |
||||
|
||||
static void sbr_sum64x5_c(float *z) |
||||
{ |
||||
int k; |
||||
for (k = 0; k < 64; k++) { |
||||
float f = z[k] + z[k + 64] + z[k + 128] + z[k + 192] + z[k + 256]; |
||||
z[k] = f; |
||||
} |
||||
} |
||||
|
||||
static float sbr_sum_square_c(float (*x)[2], int n) |
||||
{ |
||||
float sum = 0.0f; |
||||
int i; |
||||
|
||||
for (i = 0; i < n; i++) |
||||
sum += x[i][0] * x[i][0] + x[i][1] * x[i][1]; |
||||
|
||||
return sum; |
||||
} |
||||
|
||||
static void sbr_neg_odd_64_c(float *x) |
||||
{ |
||||
int i; |
||||
for (i = 1; i < 64; i += 2) |
||||
x[i] = -x[i]; |
||||
} |
||||
|
||||
static void sbr_qmf_pre_shuffle_c(float *z) |
||||
{ |
||||
int k; |
||||
z[64] = z[0]; |
||||
z[65] = z[1]; |
||||
for (k = 1; k < 32; k++) { |
||||
z[64+2*k ] = -z[64 - k]; |
||||
z[64+2*k+1] = z[ k + 1]; |
||||
} |
||||
} |
||||
|
||||
static void sbr_qmf_post_shuffle_c(float W[32][2], const float *z) |
||||
{ |
||||
int k; |
||||
for (k = 0; k < 32; k++) { |
||||
W[k][0] = -z[63-k]; |
||||
W[k][1] = z[k]; |
||||
} |
||||
} |
||||
|
||||
static void sbr_qmf_deint_neg_c(float *v, const float *src) |
||||
{ |
||||
int i; |
||||
for (i = 0; i < 32; i++) { |
||||
v[ i] = src[63 - 2*i ]; |
||||
v[63 - i] = -src[63 - 2*i - 1]; |
||||
} |
||||
} |
||||
|
||||
static void sbr_qmf_deint_bfly_c(float *v, const float *src0, const float *src1) |
||||
{ |
||||
int i; |
||||
for (i = 0; i < 64; i++) { |
||||
v[ i] = src0[i] - src1[63 - i]; |
||||
v[127 - i] = src0[i] + src1[63 - i]; |
||||
} |
||||
} |
||||
|
||||
static av_always_inline void autocorrelate(const float x[40][2], |
||||
float phi[3][2][2], int lag) |
||||
{ |
||||
int i; |
||||
float real_sum = 0.0f; |
||||
float imag_sum = 0.0f; |
||||
if (lag) { |
||||
for (i = 1; i < 38; i++) { |
||||
real_sum += x[i][0] * x[i+lag][0] + x[i][1] * x[i+lag][1]; |
||||
imag_sum += x[i][0] * x[i+lag][1] - x[i][1] * x[i+lag][0]; |
||||
} |
||||
phi[2-lag][1][0] = real_sum + x[ 0][0] * x[lag][0] + x[ 0][1] * x[lag][1]; |
||||
phi[2-lag][1][1] = imag_sum + x[ 0][0] * x[lag][1] - x[ 0][1] * x[lag][0]; |
||||
if (lag == 1) { |
||||
phi[0][0][0] = real_sum + x[38][0] * x[39][0] + x[38][1] * x[39][1]; |
||||
phi[0][0][1] = imag_sum + x[38][0] * x[39][1] - x[38][1] * x[39][0]; |
||||
} |
||||
} else { |
||||
for (i = 1; i < 38; i++) { |
||||
real_sum += x[i][0] * x[i][0] + x[i][1] * x[i][1]; |
||||
} |
||||
phi[2][1][0] = real_sum + x[ 0][0] * x[ 0][0] + x[ 0][1] * x[ 0][1]; |
||||
phi[1][0][0] = real_sum + x[38][0] * x[38][0] + x[38][1] * x[38][1]; |
||||
} |
||||
} |
||||
|
||||
static void sbr_autocorrelate_c(const float x[40][2], float phi[3][2][2]) |
||||
{ |
||||
autocorrelate(x, phi, 0); |
||||
autocorrelate(x, phi, 1); |
||||
autocorrelate(x, phi, 2); |
||||
} |
||||
|
||||
static void sbr_hf_gen_c(float (*X_high)[2], const float (*X_low)[2], |
||||
const float alpha0[2], const float alpha1[2], |
||||
float bw, int start, int end) |
||||
{ |
||||
float alpha[4]; |
||||
int i; |
||||
|
||||
alpha[0] = alpha1[0] * bw * bw; |
||||
alpha[1] = alpha1[1] * bw * bw; |
||||
alpha[2] = alpha0[0] * bw; |
||||
alpha[3] = alpha0[1] * bw; |
||||
|
||||
for (i = start; i < end; i++) { |
||||
X_high[i][0] = |
||||
X_low[i - 2][0] * alpha[0] - |
||||
X_low[i - 2][1] * alpha[1] + |
||||
X_low[i - 1][0] * alpha[2] - |
||||
X_low[i - 1][1] * alpha[3] + |
||||
X_low[i][0]; |
||||
X_high[i][1] = |
||||
X_low[i - 2][1] * alpha[0] + |
||||
X_low[i - 2][0] * alpha[1] + |
||||
X_low[i - 1][1] * alpha[2] + |
||||
X_low[i - 1][0] * alpha[3] + |
||||
X_low[i][1]; |
||||
} |
||||
} |
||||
|
||||
static void sbr_hf_g_filt_c(float (*Y)[2], const float (*X_high)[40][2], |
||||
const float *g_filt, int m_max, int ixh) |
||||
{ |
||||
int m; |
||||
|
||||
for (m = 0; m < m_max; m++) { |
||||
Y[m][0] = X_high[m][ixh][0] * g_filt[m]; |
||||
Y[m][1] = X_high[m][ixh][1] * g_filt[m]; |
||||
} |
||||
} |
||||
|
||||
static av_always_inline void sbr_hf_apply_noise(float (*Y)[2], |
||||
const float *s_m, |
||||
const float *q_filt, |
||||
int noise, |
||||
float phi_sign0, |
||||
float phi_sign1, |
||||
int m_max) |
||||
{ |
||||
int m; |
||||
|
||||
for (m = 0; m < m_max; m++) { |
||||
float y0 = Y[m][0]; |
||||
float y1 = Y[m][1]; |
||||
noise = (noise + 1) & 0x1ff; |
||||
if (s_m[m]) { |
||||
y0 += s_m[m] * phi_sign0; |
||||
y1 += s_m[m] * phi_sign1; |
||||
} else { |
||||
y0 += q_filt[m] * ff_sbr_noise_table[noise][0]; |
||||
y1 += q_filt[m] * ff_sbr_noise_table[noise][1]; |
||||
} |
||||
Y[m][0] = y0; |
||||
Y[m][1] = y1; |
||||
phi_sign1 = -phi_sign1; |
||||
} |
||||
} |
||||
|
||||
static void sbr_hf_apply_noise_0(float (*Y)[2], const float *s_m, |
||||
const float *q_filt, int noise, |
||||
int kx, int m_max) |
||||
{ |
||||
sbr_hf_apply_noise(Y, s_m, q_filt, noise, 1.0, 0.0, m_max); |
||||
} |
||||
|
||||
static void sbr_hf_apply_noise_1(float (*Y)[2], const float *s_m, |
||||
const float *q_filt, int noise, |
||||
int kx, int m_max) |
||||
{ |
||||
float phi_sign = 1 - 2 * (kx & 1); |
||||
sbr_hf_apply_noise(Y, s_m, q_filt, noise, 0.0, phi_sign, m_max); |
||||
} |
||||
|
||||
static void sbr_hf_apply_noise_2(float (*Y)[2], const float *s_m, |
||||
const float *q_filt, int noise, |
||||
int kx, int m_max) |
||||
{ |
||||
sbr_hf_apply_noise(Y, s_m, q_filt, noise, -1.0, 0.0, m_max); |
||||
} |
||||
|
||||
static void sbr_hf_apply_noise_3(float (*Y)[2], const float *s_m, |
||||
const float *q_filt, int noise, |
||||
int kx, int m_max) |
||||
{ |
||||
float phi_sign = 1 - 2 * (kx & 1); |
||||
sbr_hf_apply_noise(Y, s_m, q_filt, noise, 0.0, -phi_sign, m_max); |
||||
} |
||||
|
||||
av_cold void ff_sbrdsp_init(SBRDSPContext *s) |
||||
{ |
||||
s->sum64x5 = sbr_sum64x5_c; |
||||
s->sum_square = sbr_sum_square_c; |
||||
s->neg_odd_64 = sbr_neg_odd_64_c; |
||||
s->qmf_pre_shuffle = sbr_qmf_pre_shuffle_c; |
||||
s->qmf_post_shuffle = sbr_qmf_post_shuffle_c; |
||||
s->qmf_deint_neg = sbr_qmf_deint_neg_c; |
||||
s->qmf_deint_bfly = sbr_qmf_deint_bfly_c; |
||||
s->autocorrelate = sbr_autocorrelate_c; |
||||
s->hf_gen = sbr_hf_gen_c; |
||||
s->hf_g_filt = sbr_hf_g_filt_c; |
||||
|
||||
s->hf_apply_noise[0] = sbr_hf_apply_noise_0; |
||||
s->hf_apply_noise[1] = sbr_hf_apply_noise_1; |
||||
s->hf_apply_noise[2] = sbr_hf_apply_noise_2; |
||||
s->hf_apply_noise[3] = sbr_hf_apply_noise_3; |
||||
|
||||
if (ARCH_ARM) |
||||
ff_sbrdsp_init_arm(s); |
||||
} |
@ -0,0 +1,48 @@ |
||||
/*
|
||||
* Copyright (c) 2012 Mans Rullgard |
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef LIBAVCODEC_SBRDSP_H |
||||
#define LIBAVCODEC_SBRDSP_H |
||||
|
||||
typedef struct SBRDSPContext { |
||||
void (*sum64x5)(float *z); |
||||
float (*sum_square)(float (*x)[2], int n); |
||||
void (*neg_odd_64)(float *x); |
||||
void (*qmf_pre_shuffle)(float *z); |
||||
void (*qmf_post_shuffle)(float W[32][2], const float *z); |
||||
void (*qmf_deint_neg)(float *v, const float *src); |
||||
void (*qmf_deint_bfly)(float *v, const float *src0, const float *src1); |
||||
void (*autocorrelate)(const float x[40][2], float phi[3][2][2]); |
||||
void (*hf_gen)(float (*X_high)[2], const float (*X_low)[2], |
||||
const float alpha0[2], const float alpha1[2], |
||||
float bw, int start, int end); |
||||
void (*hf_g_filt)(float (*Y)[2], const float (*X_high)[40][2], |
||||
const float *g_filt, int m_max, int ixh); |
||||
void (*hf_apply_noise[4])(float (*Y)[2], const float *s_m, |
||||
const float *q_filt, int noise, |
||||
int kx, int m_max); |
||||
} SBRDSPContext; |
||||
|
||||
extern const float ff_sbr_noise_table[][2]; |
||||
|
||||
void ff_sbrdsp_init(SBRDSPContext *s); |
||||
void ff_sbrdsp_init_arm(SBRDSPContext *s); |
||||
|
||||
#endif |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue