mirror of https://github.com/FFmpeg/FFmpeg.git
* qatar/master: (28 commits) dfa: use more meaningful return codes eatgv: check vector_bits eatgv: check motion vectors Mark a number of variables only used in av_dlog() calls as av_unused. dvdec: drop const qualifier from variable to eliminate a warning avcodec: Improve comment for thread_safe_callbacks to avoid misinterpretation. tests/utils: don't ignore the return value of fwrite() lavfi/formats: use sizeof(var) instead of sizeof(type). lavfi: remove avfilter_default_config_input_link() declaration lavfi: always enable the scale filter and depend on sws. vf_split: support user-specifiable number of outputs. avconv: remove stray useless comment. mpegmux: add stuffing to avoid incomplete PCM frames rtsp: avoid const warnings from strtol() call avserver: check return value of ftruncate() lagarith: make offset array type unsigned dfa: add some checks to ensure that decoder won't write past frame end aacps: NEON optimisations aacps: align some arrays aacps: move some loops to function pointers ... Conflicts: configure doc/filters.texi libavcodec/dfa.c libavcodec/eatgv.c libavfilter/Makefile libavfilter/allfilters.c libavfilter/avfilter.h libavfilter/formats.c libavfilter/vf_split.c Merged-by: Michael Niedermayer <michaelni@gmx.at>pull/30/merge
commit
715c8a5a50
30 changed files with 991 additions and 386 deletions
@ -0,0 +1,214 @@ |
||||
/*
|
||||
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com> |
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
#include "libavutil/attributes.h" |
||||
#include "aacpsdsp.h" |
||||
|
||||
static void ps_add_squares_c(float *dst, const float (*src)[2], int n) |
||||
{ |
||||
int i; |
||||
for (i = 0; i < n; i++) |
||||
dst[i] += src[i][0] * src[i][0] + src[i][1] * src[i][1]; |
||||
} |
||||
|
||||
static void ps_mul_pair_single_c(float (*dst)[2], float (*src0)[2], float *src1, |
||||
int n) |
||||
{ |
||||
int i; |
||||
for (i = 0; i < n; i++) { |
||||
dst[i][0] = src0[i][0] * src1[i]; |
||||
dst[i][1] = src0[i][1] * src1[i]; |
||||
} |
||||
} |
||||
|
||||
static void ps_hybrid_analysis_c(float (*out)[2], float (*in)[2], |
||||
const float (*filter)[8][2], |
||||
int stride, int n) |
||||
{ |
||||
int i, j; |
||||
|
||||
for (i = 0; i < n; i++) { |
||||
float sum_re = filter[i][6][0] * in[6][0]; |
||||
float sum_im = filter[i][6][0] * in[6][1]; |
||||
|
||||
for (j = 0; j < 6; j++) { |
||||
float in0_re = in[j][0]; |
||||
float in0_im = in[j][1]; |
||||
float in1_re = in[12-j][0]; |
||||
float in1_im = in[12-j][1]; |
||||
sum_re += filter[i][j][0] * (in0_re + in1_re) - |
||||
filter[i][j][1] * (in0_im - in1_im); |
||||
sum_im += filter[i][j][0] * (in0_im + in1_im) + |
||||
filter[i][j][1] * (in0_re - in1_re); |
||||
} |
||||
out[i * stride][0] = sum_re; |
||||
out[i * stride][1] = sum_im; |
||||
} |
||||
} |
||||
|
||||
static void ps_hybrid_analysis_ileave_c(float (*out)[32][2], float L[2][38][64], |
||||
int i, int len) |
||||
{ |
||||
int j; |
||||
|
||||
for (; i < 64; i++) { |
||||
for (j = 0; j < len; j++) { |
||||
out[i][j][0] = L[0][j][i]; |
||||
out[i][j][1] = L[1][j][i]; |
||||
} |
||||
} |
||||
} |
||||
|
||||
static void ps_hybrid_synthesis_deint_c(float out[2][38][64], |
||||
float (*in)[32][2], |
||||
int i, int len) |
||||
{ |
||||
int n; |
||||
|
||||
for (; i < 64; i++) { |
||||
for (n = 0; n < len; n++) { |
||||
out[0][n][i] = in[i][n][0]; |
||||
out[1][n][i] = in[i][n][1]; |
||||
} |
||||
} |
||||
} |
||||
|
||||
static void ps_decorrelate_c(float (*out)[2], float (*delay)[2], |
||||
float (*ap_delay)[PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2], |
||||
const float phi_fract[2], float (*Q_fract)[2], |
||||
const float *transient_gain, |
||||
float g_decay_slope, |
||||
int len) |
||||
{ |
||||
static const float a[] = { 0.65143905753106f, |
||||
0.56471812200776f, |
||||
0.48954165955695f }; |
||||
float ag[PS_AP_LINKS]; |
||||
int m, n; |
||||
|
||||
for (m = 0; m < PS_AP_LINKS; m++) |
||||
ag[m] = a[m] * g_decay_slope; |
||||
|
||||
for (n = 0; n < len; n++) { |
||||
float in_re = delay[n][0] * phi_fract[0] - delay[n][1] * phi_fract[1]; |
||||
float in_im = delay[n][0] * phi_fract[1] + delay[n][1] * phi_fract[0]; |
||||
for (m = 0; m < PS_AP_LINKS; m++) { |
||||
float a_re = ag[m] * in_re; |
||||
float a_im = ag[m] * in_im; |
||||
float link_delay_re = ap_delay[m][n+2-m][0]; |
||||
float link_delay_im = ap_delay[m][n+2-m][1]; |
||||
float fractional_delay_re = Q_fract[m][0]; |
||||
float fractional_delay_im = Q_fract[m][1]; |
||||
float apd_re = in_re; |
||||
float apd_im = in_im; |
||||
in_re = link_delay_re * fractional_delay_re - |
||||
link_delay_im * fractional_delay_im - a_re; |
||||
in_im = link_delay_re * fractional_delay_im + |
||||
link_delay_im * fractional_delay_re - a_im; |
||||
ap_delay[m][n+5][0] = apd_re + ag[m] * in_re; |
||||
ap_delay[m][n+5][1] = apd_im + ag[m] * in_im; |
||||
} |
||||
out[n][0] = transient_gain[n] * in_re; |
||||
out[n][1] = transient_gain[n] * in_im; |
||||
} |
||||
} |
||||
|
||||
static void ps_stereo_interpolate_c(float (*l)[2], float (*r)[2], |
||||
float h[2][4], float h_step[2][4], |
||||
int len) |
||||
{ |
||||
float h0 = h[0][0]; |
||||
float h1 = h[0][1]; |
||||
float h2 = h[0][2]; |
||||
float h3 = h[0][3]; |
||||
float hs0 = h_step[0][0]; |
||||
float hs1 = h_step[0][1]; |
||||
float hs2 = h_step[0][2]; |
||||
float hs3 = h_step[0][3]; |
||||
int n; |
||||
|
||||
for (n = 0; n < len; n++) { |
||||
//l is s, r is d
|
||||
float l_re = l[n][0]; |
||||
float l_im = l[n][1]; |
||||
float r_re = r[n][0]; |
||||
float r_im = r[n][1]; |
||||
h0 += hs0; |
||||
h1 += hs1; |
||||
h2 += hs2; |
||||
h3 += hs3; |
||||
l[n][0] = h0 * l_re + h2 * r_re; |
||||
l[n][1] = h0 * l_im + h2 * r_im; |
||||
r[n][0] = h1 * l_re + h3 * r_re; |
||||
r[n][1] = h1 * l_im + h3 * r_im; |
||||
} |
||||
} |
||||
|
||||
static void ps_stereo_interpolate_ipdopd_c(float (*l)[2], float (*r)[2], |
||||
float h[2][4], float h_step[2][4], |
||||
int len) |
||||
{ |
||||
float h00 = h[0][0], h10 = h[1][0]; |
||||
float h01 = h[0][1], h11 = h[1][1]; |
||||
float h02 = h[0][2], h12 = h[1][2]; |
||||
float h03 = h[0][3], h13 = h[1][3]; |
||||
float hs00 = h_step[0][0], hs10 = h_step[1][0]; |
||||
float hs01 = h_step[0][1], hs11 = h_step[1][1]; |
||||
float hs02 = h_step[0][2], hs12 = h_step[1][2]; |
||||
float hs03 = h_step[0][3], hs13 = h_step[1][3]; |
||||
int n; |
||||
|
||||
for (n = 0; n < len; n++) { |
||||
//l is s, r is d
|
||||
float l_re = l[n][0]; |
||||
float l_im = l[n][1]; |
||||
float r_re = r[n][0]; |
||||
float r_im = r[n][1]; |
||||
h00 += hs00; |
||||
h01 += hs01; |
||||
h02 += hs02; |
||||
h03 += hs03; |
||||
h10 += hs10; |
||||
h11 += hs11; |
||||
h12 += hs12; |
||||
h13 += hs13; |
||||
|
||||
l[n][0] = h00 * l_re + h02 * r_re - h10 * l_im - h12 * r_im; |
||||
l[n][1] = h00 * l_im + h02 * r_im + h10 * l_re + h12 * r_re; |
||||
r[n][0] = h01 * l_re + h03 * r_re - h11 * l_im - h13 * r_im; |
||||
r[n][1] = h01 * l_im + h03 * r_im + h11 * l_re + h13 * r_re; |
||||
} |
||||
} |
||||
|
||||
av_cold void ff_psdsp_init(PSDSPContext *s) |
||||
{ |
||||
s->add_squares = ps_add_squares_c; |
||||
s->mul_pair_single = ps_mul_pair_single_c; |
||||
s->hybrid_analysis = ps_hybrid_analysis_c; |
||||
s->hybrid_analysis_ileave = ps_hybrid_analysis_ileave_c; |
||||
s->hybrid_synthesis_deint = ps_hybrid_synthesis_deint_c; |
||||
s->decorrelate = ps_decorrelate_c; |
||||
s->stereo_interpolate[0] = ps_stereo_interpolate_c; |
||||
s->stereo_interpolate[1] = ps_stereo_interpolate_ipdopd_c; |
||||
|
||||
if (ARCH_ARM) |
||||
ff_psdsp_init_arm(s); |
||||
} |
@ -0,0 +1,53 @@ |
||||
/*
|
||||
* Copyright (c) 2012 Mans Rullgard |
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef LIBAVCODEC_AACPSDSP_H |
||||
#define LIBAVCODEC_AACPSDSP_H |
||||
|
||||
#define PS_QMF_TIME_SLOTS 32 |
||||
#define PS_AP_LINKS 3 |
||||
#define PS_MAX_AP_DELAY 5 |
||||
|
||||
typedef struct PSDSPContext { |
||||
void (*add_squares)(float *dst, const float (*src)[2], int n); |
||||
void (*mul_pair_single)(float (*dst)[2], float (*src0)[2], float *src1, |
||||
int n); |
||||
void (*hybrid_analysis)(float (*out)[2], float (*in)[2], |
||||
const float (*filter)[8][2], |
||||
int stride, int n); |
||||
void (*hybrid_analysis_ileave)(float (*out)[32][2], float L[2][38][64], |
||||
int i, int len); |
||||
void (*hybrid_synthesis_deint)(float out[2][38][64], float (*in)[32][2], |
||||
int i, int len); |
||||
void (*decorrelate)(float (*out)[2], float (*delay)[2], |
||||
float (*ap_delay)[PS_QMF_TIME_SLOTS+PS_MAX_AP_DELAY][2], |
||||
const float phi_fract[2], float (*Q_fract)[2], |
||||
const float *transient_gain, |
||||
float g_decay_slope, |
||||
int len); |
||||
void (*stereo_interpolate[2])(float (*l)[2], float (*r)[2], |
||||
float h[2][4], float h_step[2][4], |
||||
int len); |
||||
} PSDSPContext; |
||||
|
||||
void ff_psdsp_init(PSDSPContext *s); |
||||
void ff_psdsp_init_arm(PSDSPContext *s); |
||||
|
||||
#endif /* LIBAVCODEC_AACPSDSP_H */ |
@ -0,0 +1,57 @@ |
||||
/*
|
||||
* Copyright (c) 2012 Mans Rullgard |
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
|
||||
#include "libavutil/arm/cpu.h" |
||||
#include "libavutil/attributes.h" |
||||
#include "libavcodec/aacpsdsp.h" |
||||
|
||||
void ff_ps_add_squares_neon(float *dst, const float (*src)[2], int n); |
||||
void ff_ps_mul_pair_single_neon(float (*dst)[2], float (*src0)[2], |
||||
float *src1, int n); |
||||
void ff_ps_hybrid_analysis_neon(float (*out)[2], float (*in)[2], |
||||
const float (*filter)[8][2], |
||||
int stride, int n); |
||||
void ff_ps_hybrid_analysis_ileave_neon(float (*out)[32][2], float L[2][38][64], |
||||
int i, int len); |
||||
void ff_ps_hybrid_synthesis_deint_neon(float out[2][38][64], float (*in)[32][2], |
||||
int i, int len); |
||||
void ff_ps_decorrelate_neon(float (*out)[2], float (*delay)[2], |
||||
float (*ap_delay)[PS_QMF_TIME_SLOTS+PS_MAX_AP_DELAY][2], |
||||
const float phi_fract[2], float (*Q_fract)[2], |
||||
const float *transient_gain, float g_decay_slope, |
||||
int len); |
||||
void ff_ps_stereo_interpolate_neon(float (*l)[2], float (*r)[2], |
||||
float h[2][4], float h_step[2][4], |
||||
int len); |
||||
|
||||
av_cold void ff_psdsp_init_arm(PSDSPContext *s) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (have_neon(cpu_flags)) { |
||||
s->add_squares = ff_ps_add_squares_neon; |
||||
s->mul_pair_single = ff_ps_mul_pair_single_neon; |
||||
s->hybrid_synthesis_deint = ff_ps_hybrid_synthesis_deint_neon; |
||||
s->hybrid_analysis = ff_ps_hybrid_analysis_neon; |
||||
s->stereo_interpolate[0] = ff_ps_stereo_interpolate_neon; |
||||
} |
||||
} |
@ -0,0 +1,272 @@ |
||||
/* |
||||
* Copyright (c) 2012 Mans Rullgard |
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "asm.S" |
||||
|
||||
function ff_ps_add_squares_neon, export=1 |
||||
mov r3, r0 |
||||
sub r2, r2, #4 |
||||
vld1.32 {q0}, [r1,:128]! |
||||
vmul.f32 q0, q0, q0 |
||||
vld1.32 {q2}, [r1,:128]! |
||||
vmul.f32 q2, q2, q2 |
||||
vld1.32 {q1}, [r0,:128]! |
||||
1: |
||||
vpadd.f32 d6, d0, d1 |
||||
vld1.32 {q0}, [r1,:128]! |
||||
vpadd.f32 d7, d4, d5 |
||||
vmul.f32 q0, q0, q0 |
||||
vld1.32 {q2}, [r1,:128]! |
||||
vadd.f32 q3, q1, q3 |
||||
vld1.32 {q1}, [r0,:128]! |
||||
vmul.f32 q2, q2, q2 |
||||
vst1.32 {q3}, [r3,:128]! |
||||
subs r2, r2, #4 |
||||
bgt 1b |
||||
vpadd.f32 d6, d0, d1 |
||||
vpadd.f32 d7, d4, d5 |
||||
vadd.f32 q1, q1, q3 |
||||
vst1.32 {q1}, [r3,:128]! |
||||
bx lr |
||||
endfunc |
||||
|
||||
function ff_ps_mul_pair_single_neon, export=1 |
||||
sub r3, r3, #4 |
||||
tst r1, #8 |
||||
bne 2f |
||||
vld1.32 {q0}, [r1,:128]! |
||||
1: |
||||
vld1.32 {q3}, [r2,:128]! |
||||
vmul.f32 d4, d0, d6[0] |
||||
vmul.f32 d5, d1, d6[1] |
||||
vld1.32 {q1}, [r1,:128]! |
||||
vmul.f32 d6, d2, d7[0] |
||||
vmul.f32 d7, d3, d7[1] |
||||
vld1.32 {q0}, [r1,:128]! |
||||
vst1.32 {q2,q3}, [r0,:128]! |
||||
subs r3, r3, #4 |
||||
bgt 1b |
||||
vld1.32 {q3}, [r2,:128]! |
||||
vmul.f32 d4, d0, d6[0] |
||||
vmul.f32 d5, d1, d6[1] |
||||
vld1.32 {q1}, [r1,:128]! |
||||
vmul.f32 d6, d2, d7[0] |
||||
vmul.f32 d7, d3, d7[1] |
||||
vst1.32 {q2,q3}, [r0,:128]! |
||||
bx lr |
||||
2: |
||||
vld1.32 {d0}, [r1,:64]! |
||||
vld1.32 {d1,d2}, [r1,:128]! |
||||
1: |
||||
vld1.32 {q3}, [r2,:128]! |
||||
vmul.f32 d4, d0, d6[0] |
||||
vmul.f32 d5, d1, d6[1] |
||||
vld1.32 {d0,d1}, [r1,:128]! |
||||
vmul.f32 d6, d2, d7[0] |
||||
vmul.f32 d7, d0, d7[1] |
||||
vmov d0, d1 |
||||
vld1.32 {d1,d2}, [r1,:128]! |
||||
vst1.32 {q2,q3}, [r0,:128]! |
||||
subs r3, r3, #4 |
||||
bgt 1b |
||||
vld1.32 {q3}, [r2,:128]! |
||||
vmul.f32 d4, d0, d6[0] |
||||
vmul.f32 d5, d1, d6[1] |
||||
vld1.32 {d0}, [r1,:64]! |
||||
vmul.f32 d6, d2, d7[0] |
||||
vmul.f32 d7, d0, d7[1] |
||||
vst1.32 {q2,q3}, [r0,:128]! |
||||
bx lr |
||||
endfunc |
||||
|
||||
function ff_ps_hybrid_synthesis_deint_neon, export=1 |
||||
push {r4-r8,lr} |
||||
add r0, r0, r2, lsl #2 |
||||
add r1, r1, r2, lsl #5+1+2 |
||||
rsb r2, r2, #64 |
||||
mov r5, #64*4 |
||||
mov lr, r0 |
||||
add r4, r0, #38*64*4 |
||||
mov r12, r3 |
||||
2: |
||||
vld1.32 {d0,d1}, [r1,:128]! |
||||
vst1.32 {d0[0]}, [lr,:32], r5 |
||||
vst1.32 {d0[1]}, [r4,:32], r5 |
||||
vst1.32 {d1[0]}, [lr,:32], r5 |
||||
vst1.32 {d1[1]}, [r4,:32], r5 |
||||
subs r12, r12, #2 |
||||
bgt 2b |
||||
add r0, r0, #4 |
||||
sub r2, r2, #1 |
||||
tst r2, #2 |
||||
bne 6f |
||||
1: |
||||
mov lr, r0 |
||||
add r4, r0, #38*64*4 |
||||
add r6, r1, # 32*2*4 |
||||
add r7, r1, #2*32*2*4 |
||||
add r8, r1, #3*32*2*4 |
||||
mov r12, r3 |
||||
2: |
||||
vld1.32 {d0,d1}, [r1,:128]! |
||||
vld1.32 {d2,d3}, [r6,:128]! |
||||
vld1.32 {d4,d5}, [r7,:128]! |
||||
vld1.32 {d6,d7}, [r8,:128]! |
||||
vst4.32 {d0[0],d2[0],d4[0],d6[0]}, [lr,:128], r5 |
||||
vst4.32 {d0[1],d2[1],d4[1],d6[1]}, [r4,:128], r5 |
||||
vst4.32 {d1[0],d3[0],d5[0],d7[0]}, [lr,:128], r5 |
||||
vst4.32 {d1[1],d3[1],d5[1],d7[1]}, [r4,:128], r5 |
||||
subs r12, r12, #2 |
||||
bgt 2b |
||||
add r0, r0, #16 |
||||
add r1, r1, #3*32*2*4 |
||||
subs r2, r2, #4 |
||||
bgt 1b |
||||
pop {r4-r8,pc} |
||||
6: |
||||
mov lr, r0 |
||||
add r4, r0, #38*64*4 |
||||
add r6, r1, #32*2*4 |
||||
mov r12, r3 |
||||
2: |
||||
vld1.32 {d0,d1}, [r1,:128]! |
||||
vld1.32 {d2,d3}, [r6,:128]! |
||||
vst2.32 {d0[0],d2[0]}, [lr,:64], r5 |
||||
vst2.32 {d0[1],d2[1]}, [r4,:64], r5 |
||||
vst2.32 {d1[0],d3[0]}, [lr,:64], r5 |
||||
vst2.32 {d1[1],d3[1]}, [r4,:64], r5 |
||||
subs r12, r12, #2 |
||||
bgt 2b |
||||
add r0, r0, #8 |
||||
add r1, r1, #32*2*4 |
||||
sub r2, r2, #2 |
||||
b 1b |
||||
endfunc |
||||
|
||||
function ff_ps_hybrid_analysis_neon, export=1 |
||||
vldm r1, {d19-d31} |
||||
ldr r12, [sp] |
||||
lsl r3, r3, #3 |
||||
vadd.f32 d16, d19, d31 |
||||
vadd.f32 d17, d20, d30 |
||||
vsub.f32 d18, d19, d31 |
||||
vsub.f32 d19, d20, d30 |
||||
vsub.f32 d0, d21, d29 |
||||
vsub.f32 d1, d22, d28 |
||||
vadd.f32 d2, d21, d29 |
||||
vadd.f32 d3, d22, d28 |
||||
vadd.f32 d20, d23, d27 |
||||
vadd.f32 d21, d24, d26 |
||||
vsub.f32 d22, d23, d27 |
||||
vsub.f32 d23, d24, d26 |
||||
vmov.i32 d6, #1<<31 |
||||
vmov.i32 d7, #0 |
||||
vmov.f32 q14, #0.0 |
||||
vmov.f32 q15, #0.0 |
||||
vtrn.32 d6, d7 |
||||
vrev64.32 q9, q9 |
||||
vrev64.32 q0, q0 |
||||
vrev64.32 q11, q11 |
||||
veor q9, q9, q3 |
||||
veor q0, q0, q3 |
||||
veor q11, q11, q3 |
||||
vld1.32 {q13}, [r2,:128]! |
||||
vtrn.32 q8, q9 |
||||
vtrn.32 q1, q0 |
||||
vtrn.32 q10, q11 |
||||
sub r12, r12, #1 |
||||
vmla.f32 q14, q8, q13 |
||||
vld1.32 {q2}, [r2,:128]! |
||||
vmla.f32 q15, q9, q13 |
||||
1: |
||||
vmla.f32 q14, q1, q2 |
||||
vld1.32 {q13}, [r2,:128]! |
||||
vmla.f32 q15, q0, q2 |
||||
vmla.f32 q14, q10, q13 |
||||
vld1.32 {q2}, [r2,:128]! |
||||
vmla.f32 q15, q11, q13 |
||||
vld1.32 {q13}, [r2,:128]! |
||||
vadd.f32 d6, d28, d29 |
||||
vadd.f32 d7, d30, d31 |
||||
vmov.f32 q14, #0.0 |
||||
vmov.f32 q15, #0.0 |
||||
vmla.f32 q14, q8, q13 |
||||
vpadd.f32 d6, d6, d7 |
||||
vmla.f32 q15, q9, q13 |
||||
vmla.f32 d6, d25, d4[0] |
||||
vld1.32 {q2}, [r2,:128]! |
||||
vst1.32 {d6}, [r0,:64], r3 |
||||
subs r12, r12, #1 |
||||
bgt 1b |
||||
vmla.f32 q14, q1, q2 |
||||
vld1.32 {q13}, [r2,:128]! |
||||
vmla.f32 q15, q0, q2 |
||||
vmla.f32 q14, q10, q13 |
||||
vld1.32 {q2}, [r2,:128]! |
||||
vmla.f32 q15, q11, q13 |
||||
vadd.f32 d6, d28, d29 |
||||
vadd.f32 d7, d30, d31 |
||||
vpadd.f32 d6, d6, d7 |
||||
vmla.f32 d6, d25, d4[0] |
||||
vst1.32 {d6}, [r0,:64], r3 |
||||
bx lr |
||||
endfunc |
||||
|
||||
function ff_ps_stereo_interpolate_neon, export=1 |
||||
vld1.32 {q0}, [r2] |
||||
vld1.32 {q14}, [r3] |
||||
vadd.f32 q15, q14, q14 |
||||
mov r2, r0 |
||||
mov r3, r1 |
||||
ldr r12, [sp] |
||||
vadd.f32 q1, q0, q14 |
||||
vadd.f32 q0, q0, q15 |
||||
vld1.32 {q2}, [r0,:64]! |
||||
vld1.32 {q3}, [r1,:64]! |
||||
subs r12, r12, #1 |
||||
beq 2f |
||||
1: |
||||
vmul.f32 d16, d4, d2[0] |
||||
vmul.f32 d17, d5, d0[0] |
||||
vmul.f32 d18, d4, d2[1] |
||||
vmul.f32 d19, d5, d0[1] |
||||
vmla.f32 d16, d6, d3[0] |
||||
vmla.f32 d17, d7, d1[0] |
||||
vmla.f32 d18, d6, d3[1] |
||||
vmla.f32 d19, d7, d1[1] |
||||
vadd.f32 q1, q1, q15 |
||||
vadd.f32 q0, q0, q15 |
||||
vld1.32 {q2}, [r0,:64]! |
||||
vld1.32 {q3}, [r1,:64]! |
||||
vst1.32 {q8}, [r2,:64]! |
||||
vst1.32 {q9}, [r3,:64]! |
||||
subs r12, r12, #2 |
||||
bgt 1b |
||||
it lt |
||||
bxlt lr |
||||
2: |
||||
vmul.f32 d16, d4, d2[0] |
||||
vmul.f32 d18, d4, d2[1] |
||||
vmla.f32 d16, d6, d3[0] |
||||
vmla.f32 d18, d6, d3[1] |
||||
vst1.32 {d16}, [r2,:64]! |
||||
vst1.32 {d18}, [r3,:64]! |
||||
bx lr |
||||
endfunc |
Loading…
Reference in new issue