mirror of https://github.com/FFmpeg/FFmpeg.git
This was originally based on libsbc, and was fully integrated into ffmpeg.pull/281/head
parent
f1e490b1ad
commit
f677718bc8
6 changed files with 1070 additions and 0 deletions
@ -0,0 +1,245 @@ |
||||
/* |
||||
* Bluetooth low-complexity, subband codec (SBC) |
||||
* |
||||
* Copyright (C) 2017 Aurelien Jacobs <aurel@gnuage.org>
|
||||
* Copyright (C) 2008-2010 Nokia Corporation |
||||
* Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org>
|
||||
* Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch>
|
||||
* Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com>
|
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
/** |
||||
* @file
|
||||
* SBC ARMv6 optimizations. The instructions are scheduled for ARM11 pipeline. |
||||
*/ |
||||
|
||||
#include "libavutil/arm/asm.S" |
||||
|
||||
function ff_sbc_analyze_4_armv6, export=1 |
||||
@ r0 = in, r1 = out, r2 = consts
|
||||
push {r1, r3-r7, lr} |
||||
push {r8-r12, r14} |
||||
ldrd r4, r5, [r0, #0] |
||||
ldrd r6, r7, [r2, #0] |
||||
ldrd r8, r9, [r0, #16] |
||||
ldrd r10, r11, [r2, #16] |
||||
mov r14, #0x8000 |
||||
smlad r3, r4, r6, r14 |
||||
smlad r12, r5, r7, r14 |
||||
ldrd r4, r5, [r0, #32] |
||||
ldrd r6, r7, [r2, #32] |
||||
smlad r3, r8, r10, r3 |
||||
smlad r12, r9, r11, r12 |
||||
ldrd r8, r9, [r0, #48] |
||||
ldrd r10, r11, [r2, #48] |
||||
smlad r3, r4, r6, r3 |
||||
smlad r12, r5, r7, r12 |
||||
ldrd r4, r5, [r0, #64] |
||||
ldrd r6, r7, [r2, #64] |
||||
smlad r3, r8, r10, r3 |
||||
smlad r12, r9, r11, r12 |
||||
ldrd r8, r9, [r0, #8] |
||||
ldrd r10, r11, [r2, #8] |
||||
smlad r3, r4, r6, r3 @ t1[0] is done
|
||||
smlad r12, r5, r7, r12 @ t1[1] is done
|
||||
ldrd r4, r5, [r0, #24] |
||||
ldrd r6, r7, [r2, #24] |
||||
pkhtb r3, r12, r3, asr #16 @ combine t1[0] and t1[1]
|
||||
smlad r12, r8, r10, r14 |
||||
smlad r14, r9, r11, r14 |
||||
ldrd r8, r9, [r0, #40] |
||||
ldrd r10, r11, [r2, #40] |
||||
smlad r12, r4, r6, r12 |
||||
smlad r14, r5, r7, r14 |
||||
ldrd r4, r5, [r0, #56] |
||||
ldrd r6, r7, [r2, #56] |
||||
smlad r12, r8, r10, r12 |
||||
smlad r14, r9, r11, r14 |
||||
ldrd r8, r9, [r0, #72] |
||||
ldrd r10, r11, [r2, #72] |
||||
smlad r12, r4, r6, r12 |
||||
smlad r14, r5, r7, r14 |
||||
ldrd r4, r5, [r2, #80] @ start loading cos table
|
||||
smlad r12, r8, r10, r12 @ t1[2] is done
|
||||
smlad r14, r9, r11, r14 @ t1[3] is done
|
||||
ldrd r6, r7, [r2, #88] |
||||
ldrd r8, r9, [r2, #96] |
||||
ldrd r10, r11, [r2, #104] @ cos table fully loaded
|
||||
pkhtb r12, r14, r12, asr #16 @ combine t1[2] and t1[3]
|
||||
smuad r4, r3, r4 |
||||
smuad r5, r3, r5 |
||||
smlad r4, r12, r8, r4 |
||||
smlad r5, r12, r9, r5 |
||||
smuad r6, r3, r6 |
||||
smuad r7, r3, r7 |
||||
smlad r6, r12, r10, r6 |
||||
smlad r7, r12, r11, r7 |
||||
pop {r8-r12, r14} |
||||
stmia r1, {r4, r5, r6, r7} |
||||
pop {r1, r3-r7, pc} |
||||
endfunc |
||||
|
||||
function ff_sbc_analyze_8_armv6, export=1 |
||||
@ r0 = in, r1 = out, r2 = consts
|
||||
push {r1, r3-r7, lr} |
||||
push {r8-r12, r14} |
||||
ldrd r4, r5, [r0, #24] |
||||
ldrd r6, r7, [r2, #24] |
||||
ldrd r8, r9, [r0, #56] |
||||
ldrd r10, r11, [r2, #56] |
||||
mov r14, #0x8000 |
||||
smlad r3, r4, r6, r14 |
||||
smlad r12, r5, r7, r14 |
||||
ldrd r4, r5, [r0, #88] |
||||
ldrd r6, r7, [r2, #88] |
||||
smlad r3, r8, r10, r3 |
||||
smlad r12, r9, r11, r12 |
||||
ldrd r8, r9, [r0, #120] |
||||
ldrd r10, r11, [r2, #120] |
||||
smlad r3, r4, r6, r3 |
||||
smlad r12, r5, r7, r12 |
||||
ldrd r4, r5, [r0, #152] |
||||
ldrd r6, r7, [r2, #152] |
||||
smlad r3, r8, r10, r3 |
||||
smlad r12, r9, r11, r12 |
||||
ldrd r8, r9, [r0, #16] |
||||
ldrd r10, r11, [r2, #16] |
||||
smlad r3, r4, r6, r3 @ t1[6] is done
|
||||
smlad r12, r5, r7, r12 @ t1[7] is done
|
||||
ldrd r4, r5, [r0, #48] |
||||
ldrd r6, r7, [r2, #48] |
||||
pkhtb r3, r12, r3, asr #16 @ combine t1[6] and t1[7]
|
||||
str r3, [sp, #-4]! @ save to stack
|
||||
smlad r3, r8, r10, r14 |
||||
smlad r12, r9, r11, r14 |
||||
ldrd r8, r9, [r0, #80] |
||||
ldrd r10, r11, [r2, #80] |
||||
smlad r3, r4, r6, r3 |
||||
smlad r12, r5, r7, r12 |
||||
ldrd r4, r5, [r0, #112] |
||||
ldrd r6, r7, [r2, #112] |
||||
smlad r3, r8, r10, r3 |
||||
smlad r12, r9, r11, r12 |
||||
ldrd r8, r9, [r0, #144] |
||||
ldrd r10, r11, [r2, #144] |
||||
smlad r3, r4, r6, r3 |
||||
smlad r12, r5, r7, r12 |
||||
ldrd r4, r5, [r0, #0] |
||||
ldrd r6, r7, [r2, #0] |
||||
smlad r3, r8, r10, r3 @ t1[4] is done
|
||||
smlad r12, r9, r11, r12 @ t1[5] is done
|
||||
ldrd r8, r9, [r0, #32] |
||||
ldrd r10, r11, [r2, #32] |
||||
pkhtb r3, r12, r3, asr #16 @ combine t1[4] and t1[5]
|
||||
str r3, [sp, #-4]! @ save to stack
|
||||
smlad r3, r4, r6, r14 |
||||
smlad r12, r5, r7, r14 |
||||
ldrd r4, r5, [r0, #64] |
||||
ldrd r6, r7, [r2, #64] |
||||
smlad r3, r8, r10, r3 |
||||
smlad r12, r9, r11, r12 |
||||
ldrd r8, r9, [r0, #96] |
||||
ldrd r10, r11, [r2, #96] |
||||
smlad r3, r4, r6, r3 |
||||
smlad r12, r5, r7, r12 |
||||
ldrd r4, r5, [r0, #128] |
||||
ldrd r6, r7, [r2, #128] |
||||
smlad r3, r8, r10, r3 |
||||
smlad r12, r9, r11, r12 |
||||
ldrd r8, r9, [r0, #8] |
||||
ldrd r10, r11, [r2, #8] |
||||
smlad r3, r4, r6, r3 @ t1[0] is done
|
||||
smlad r12, r5, r7, r12 @ t1[1] is done
|
||||
ldrd r4, r5, [r0, #40] |
||||
ldrd r6, r7, [r2, #40] |
||||
pkhtb r3, r12, r3, asr #16 @ combine t1[0] and t1[1]
|
||||
smlad r12, r8, r10, r14 |
||||
smlad r14, r9, r11, r14 |
||||
ldrd r8, r9, [r0, #72] |
||||
ldrd r10, r11, [r2, #72] |
||||
smlad r12, r4, r6, r12 |
||||
smlad r14, r5, r7, r14 |
||||
ldrd r4, r5, [r0, #104] |
||||
ldrd r6, r7, [r2, #104] |
||||
smlad r12, r8, r10, r12 |
||||
smlad r14, r9, r11, r14 |
||||
ldrd r8, r9, [r0, #136] |
||||
ldrd r10, r11, [r2, #136]! |
||||
smlad r12, r4, r6, r12 |
||||
smlad r14, r5, r7, r14 |
||||
ldrd r4, r5, [r2, #(160 - 136 + 0)] |
||||
smlad r12, r8, r10, r12 @ t1[2] is done
|
||||
smlad r14, r9, r11, r14 @ t1[3] is done
|
||||
ldrd r6, r7, [r2, #(160 - 136 + 8)] |
||||
smuad r4, r3, r4 |
||||
smuad r5, r3, r5 |
||||
pkhtb r12, r14, r12, asr #16 @ combine t1[2] and t1[3]
|
||||
@ r3 = t2[0:1]
|
||||
@ r12 = t2[2:3]
|
||||
pop {r0, r14} @ t2[4:5], t2[6:7]
|
||||
ldrd r8, r9, [r2, #(160 - 136 + 32)] |
||||
smuad r6, r3, r6 |
||||
smuad r7, r3, r7 |
||||
ldrd r10, r11, [r2, #(160 - 136 + 40)] |
||||
smlad r4, r12, r8, r4 |
||||
smlad r5, r12, r9, r5 |
||||
ldrd r8, r9, [r2, #(160 - 136 + 64)] |
||||
smlad r6, r12, r10, r6 |
||||
smlad r7, r12, r11, r7 |
||||
ldrd r10, r11, [r2, #(160 - 136 + 72)] |
||||
smlad r4, r0, r8, r4 |
||||
smlad r5, r0, r9, r5 |
||||
ldrd r8, r9, [r2, #(160 - 136 + 96)] |
||||
smlad r6, r0, r10, r6 |
||||
smlad r7, r0, r11, r7 |
||||
ldrd r10, r11, [r2, #(160 - 136 + 104)] |
||||
smlad r4, r14, r8, r4 |
||||
smlad r5, r14, r9, r5 |
||||
ldrd r8, r9, [r2, #(160 - 136 + 16 + 0)] |
||||
smlad r6, r14, r10, r6 |
||||
smlad r7, r14, r11, r7 |
||||
ldrd r10, r11, [r2, #(160 - 136 + 16 + 8)] |
||||
stmia r1!, {r4, r5} |
||||
smuad r4, r3, r8 |
||||
smuad r5, r3, r9 |
||||
ldrd r8, r9, [r2, #(160 - 136 + 16 + 32)] |
||||
stmia r1!, {r6, r7} |
||||
smuad r6, r3, r10 |
||||
smuad r7, r3, r11 |
||||
ldrd r10, r11, [r2, #(160 - 136 + 16 + 40)] |
||||
smlad r4, r12, r8, r4 |
||||
smlad r5, r12, r9, r5 |
||||
ldrd r8, r9, [r2, #(160 - 136 + 16 + 64)] |
||||
smlad r6, r12, r10, r6 |
||||
smlad r7, r12, r11, r7 |
||||
ldrd r10, r11, [r2, #(160 - 136 + 16 + 72)] |
||||
smlad r4, r0, r8, r4 |
||||
smlad r5, r0, r9, r5 |
||||
ldrd r8, r9, [r2, #(160 - 136 + 16 + 96)] |
||||
smlad r6, r0, r10, r6 |
||||
smlad r7, r0, r11, r7 |
||||
ldrd r10, r11, [r2, #(160 - 136 + 16 + 104)] |
||||
smlad r4, r14, r8, r4 |
||||
smlad r5, r14, r9, r5 |
||||
smlad r6, r14, r10, r6 |
||||
smlad r7, r14, r11, r7 |
||||
pop {r8-r12, r14} |
||||
stmia r1!, {r4, r5, r6, r7} |
||||
pop {r1, r3-r7, pc} |
||||
endfunc |
@ -0,0 +1,105 @@ |
||||
/*
|
||||
* Bluetooth low-complexity, subband codec (SBC) |
||||
* |
||||
* Copyright (C) 2017 Aurelien Jacobs <aurel@gnuage.org> |
||||
* Copyright (C) 2008-2010 Nokia Corporation |
||||
* Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org> |
||||
* Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch> |
||||
* Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com> |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
/**
|
||||
* @file |
||||
* SBC ARMv6 optimization for some basic "building bricks" |
||||
*/ |
||||
|
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/arm/cpu.h" |
||||
#include "libavcodec/sbcdsp.h" |
||||
|
||||
void ff_sbc_analyze_4_armv6(const int16_t *in, int32_t *out, const int16_t *consts); |
||||
void ff_sbc_analyze_8_armv6(const int16_t *in, int32_t *out, const int16_t *consts); |
||||
|
||||
void ff_sbc_analyze_4_neon(const int16_t *in, int32_t *out, const int16_t *consts); |
||||
void ff_sbc_analyze_8_neon(const int16_t *in, int32_t *out, const int16_t *consts); |
||||
void ff_sbc_calc_scalefactors_neon(int32_t sb_sample_f[16][2][8], |
||||
uint32_t scale_factor[2][8], |
||||
int blocks, int channels, int subbands); |
||||
int ff_sbc_calc_scalefactors_j_neon(int32_t sb_sample_f[16][2][8], |
||||
uint32_t scale_factor[2][8], |
||||
int blocks, int subbands); |
||||
int ff_sbc_enc_process_input_4s_neon(int position, const uint8_t *pcm, |
||||
int16_t X[2][SBC_X_BUFFER_SIZE], |
||||
int nsamples, int nchannels); |
||||
int ff_sbc_enc_process_input_8s_neon(int position, const uint8_t *pcm, |
||||
int16_t X[2][SBC_X_BUFFER_SIZE], |
||||
int nsamples, int nchannels); |
||||
|
||||
DECLARE_ALIGNED(SBC_ALIGN, int32_t, ff_sbcdsp_joint_bits_mask)[8] = { |
||||
8, 4, 2, 1, 128, 64, 32, 16 |
||||
}; |
||||
|
||||
#if HAVE_BIGENDIAN |
||||
#define PERM(a, b, c, d) { \ |
||||
(a * 2) + 1, (a * 2) + 0, \
|
||||
(b * 2) + 1, (b * 2) + 0, \
|
||||
(c * 2) + 1, (c * 2) + 0, \
|
||||
(d * 2) + 1, (d * 2) + 0 \
|
||||
} |
||||
#else |
||||
#define PERM(a, b, c, d) { \ |
||||
(a * 2) + 0, (a * 2) + 1, \
|
||||
(b * 2) + 0, (b * 2) + 1, \
|
||||
(c * 2) + 0, (c * 2) + 1, \
|
||||
(d * 2) + 0, (d * 2) + 1 \
|
||||
} |
||||
#endif |
||||
|
||||
DECLARE_ALIGNED(SBC_ALIGN, uint8_t, ff_sbc_input_perm_4)[2][8] = { |
||||
PERM(7, 3, 6, 4), |
||||
PERM(0, 2, 1, 5) |
||||
}; |
||||
|
||||
DECLARE_ALIGNED(SBC_ALIGN, uint8_t, ff_sbc_input_perm_8)[4][8] = { |
||||
PERM(15, 7, 14, 8), |
||||
PERM(13, 9, 12, 10), |
||||
PERM(11, 3, 6, 0), |
||||
PERM( 5, 1, 4, 2) |
||||
}; |
||||
|
||||
av_cold void ff_sbcdsp_init_arm(SBCDSPContext *s) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (have_armv6(cpu_flags)) { |
||||
s->sbc_analyze_4 = ff_sbc_analyze_4_armv6; |
||||
s->sbc_analyze_8 = ff_sbc_analyze_8_armv6; |
||||
} |
||||
|
||||
if (have_neon(cpu_flags)) { |
||||
s->sbc_analyze_4 = ff_sbc_analyze_4_neon; |
||||
s->sbc_analyze_8 = ff_sbc_analyze_8_neon; |
||||
s->sbc_calc_scalefactors = ff_sbc_calc_scalefactors_neon; |
||||
s->sbc_calc_scalefactors_j = ff_sbc_calc_scalefactors_j_neon; |
||||
if (s->increment != 1) { |
||||
s->sbc_enc_process_input_4s = ff_sbc_enc_process_input_4s_neon; |
||||
s->sbc_enc_process_input_8s = ff_sbc_enc_process_input_8s_neon; |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,714 @@ |
||||
/* |
||||
* Bluetooth low-complexity, subband codec (SBC) |
||||
* |
||||
* Copyright (C) 2017 Aurelien Jacobs <aurel@gnuage.org>
|
||||
* Copyright (C) 2008-2010 Nokia Corporation |
||||
* Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org>
|
||||
* Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch>
|
||||
* Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com>
|
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
/** |
||||
* @file
|
||||
* SBC ARM NEON optimizations |
||||
*/ |
||||
|
||||
#include "libavutil/arm/asm.S" |
||||
#include "neon.S" |
||||
|
||||
#define SBC_PROTO_FIXED_SCALE 16 |
||||
|
||||
function ff_sbc_analyze_4_neon, export=1 |
||||
/* TODO: merge even and odd cases (or even merge all four calls to this |
||||
* function) in order to have only aligned reads from 'in' array |
||||
* and reduce number of load instructions */ |
||||
vld1.16 {d4, d5}, [r0, :64]! |
||||
vld1.16 {d8, d9}, [r2, :128]! |
||||
|
||||
vmull.s16 q0, d4, d8 |
||||
vld1.16 {d6, d7}, [r0, :64]! |
||||
vmull.s16 q1, d5, d9 |
||||
vld1.16 {d10, d11}, [r2, :128]! |
||||
|
||||
vmlal.s16 q0, d6, d10 |
||||
vld1.16 {d4, d5}, [r0, :64]! |
||||
vmlal.s16 q1, d7, d11 |
||||
vld1.16 {d8, d9}, [r2, :128]! |
||||
|
||||
vmlal.s16 q0, d4, d8 |
||||
vld1.16 {d6, d7}, [r0, :64]! |
||||
vmlal.s16 q1, d5, d9 |
||||
vld1.16 {d10, d11}, [r2, :128]! |
||||
|
||||
vmlal.s16 q0, d6, d10 |
||||
vld1.16 {d4, d5}, [r0, :64]! |
||||
vmlal.s16 q1, d7, d11 |
||||
vld1.16 {d8, d9}, [r2, :128]! |
||||
|
||||
vmlal.s16 q0, d4, d8 |
||||
vmlal.s16 q1, d5, d9 |
||||
|
||||
vpadd.s32 d0, d0, d1 |
||||
vpadd.s32 d1, d2, d3 |
||||
|
||||
vrshrn.s32 d0, q0, SBC_PROTO_FIXED_SCALE |
||||
|
||||
vld1.16 {d2, d3, d4, d5}, [r2, :128]! |
||||
|
||||
vdup.i32 d1, d0[1] /* TODO: can be eliminated */ |
||||
vdup.i32 d0, d0[0] /* TODO: can be eliminated */ |
||||
|
||||
vmull.s16 q3, d2, d0 |
||||
vmull.s16 q4, d3, d0 |
||||
vmlal.s16 q3, d4, d1 |
||||
vmlal.s16 q4, d5, d1 |
||||
|
||||
vpadd.s32 d0, d6, d7 /* TODO: can be eliminated */ |
||||
vpadd.s32 d1, d8, d9 /* TODO: can be eliminated */ |
||||
|
||||
vst1.32 {d0, d1}, [r1, :128] |
||||
|
||||
bx lr |
||||
endfunc |
||||
|
||||
function ff_sbc_analyze_8_neon, export=1 |
||||
/* TODO: merge even and odd cases (or even merge all four calls to this |
||||
* function) in order to have only aligned reads from 'in' array |
||||
* and reduce number of load instructions */ |
||||
vld1.16 {d4, d5}, [r0, :64]! |
||||
vld1.16 {d8, d9}, [r2, :128]! |
||||
|
||||
vmull.s16 q6, d4, d8 |
||||
vld1.16 {d6, d7}, [r0, :64]! |
||||
vmull.s16 q7, d5, d9 |
||||
vld1.16 {d10, d11}, [r2, :128]! |
||||
vmull.s16 q8, d6, d10 |
||||
vld1.16 {d4, d5}, [r0, :64]! |
||||
vmull.s16 q9, d7, d11 |
||||
vld1.16 {d8, d9}, [r2, :128]! |
||||
|
||||
vmlal.s16 q6, d4, d8 |
||||
vld1.16 {d6, d7}, [r0, :64]! |
||||
vmlal.s16 q7, d5, d9 |
||||
vld1.16 {d10, d11}, [r2, :128]! |
||||
vmlal.s16 q8, d6, d10 |
||||
vld1.16 {d4, d5}, [r0, :64]! |
||||
vmlal.s16 q9, d7, d11 |
||||
vld1.16 {d8, d9}, [r2, :128]! |
||||
|
||||
vmlal.s16 q6, d4, d8 |
||||
vld1.16 {d6, d7}, [r0, :64]! |
||||
vmlal.s16 q7, d5, d9 |
||||
vld1.16 {d10, d11}, [r2, :128]! |
||||
vmlal.s16 q8, d6, d10 |
||||
vld1.16 {d4, d5}, [r0, :64]! |
||||
vmlal.s16 q9, d7, d11 |
||||
vld1.16 {d8, d9}, [r2, :128]! |
||||
|
||||
vmlal.s16 q6, d4, d8 |
||||
vld1.16 {d6, d7}, [r0, :64]! |
||||
vmlal.s16 q7, d5, d9 |
||||
vld1.16 {d10, d11}, [r2, :128]! |
||||
vmlal.s16 q8, d6, d10 |
||||
vld1.16 {d4, d5}, [r0, :64]! |
||||
vmlal.s16 q9, d7, d11 |
||||
vld1.16 {d8, d9}, [r2, :128]! |
||||
|
||||
vmlal.s16 q6, d4, d8 |
||||
vld1.16 {d6, d7}, [r0, :64]! |
||||
vmlal.s16 q7, d5, d9 |
||||
vld1.16 {d10, d11}, [r2, :128]! |
||||
|
||||
vmlal.s16 q8, d6, d10 |
||||
vmlal.s16 q9, d7, d11 |
||||
|
||||
vpadd.s32 d0, d12, d13 |
||||
vpadd.s32 d1, d14, d15 |
||||
vpadd.s32 d2, d16, d17 |
||||
vpadd.s32 d3, d18, d19 |
||||
|
||||
vrshr.s32 q0, q0, SBC_PROTO_FIXED_SCALE |
||||
vrshr.s32 q1, q1, SBC_PROTO_FIXED_SCALE |
||||
vmovn.s32 d0, q0 |
||||
vmovn.s32 d1, q1 |
||||
|
||||
vdup.i32 d3, d1[1] /* TODO: can be eliminated */ |
||||
vdup.i32 d2, d1[0] /* TODO: can be eliminated */ |
||||
vdup.i32 d1, d0[1] /* TODO: can be eliminated */ |
||||
vdup.i32 d0, d0[0] /* TODO: can be eliminated */ |
||||
|
||||
vld1.16 {d4, d5}, [r2, :128]! |
||||
vmull.s16 q6, d4, d0 |
||||
vld1.16 {d6, d7}, [r2, :128]! |
||||
vmull.s16 q7, d5, d0 |
||||
vmull.s16 q8, d6, d0 |
||||
vmull.s16 q9, d7, d0 |
||||
|
||||
vld1.16 {d4, d5}, [r2, :128]! |
||||
vmlal.s16 q6, d4, d1 |
||||
vld1.16 {d6, d7}, [r2, :128]! |
||||
vmlal.s16 q7, d5, d1 |
||||
vmlal.s16 q8, d6, d1 |
||||
vmlal.s16 q9, d7, d1 |
||||
|
||||
vld1.16 {d4, d5}, [r2, :128]! |
||||
vmlal.s16 q6, d4, d2 |
||||
vld1.16 {d6, d7}, [r2, :128]! |
||||
vmlal.s16 q7, d5, d2 |
||||
vmlal.s16 q8, d6, d2 |
||||
vmlal.s16 q9, d7, d2 |
||||
|
||||
vld1.16 {d4, d5}, [r2, :128]! |
||||
vmlal.s16 q6, d4, d3 |
||||
vld1.16 {d6, d7}, [r2, :128]! |
||||
vmlal.s16 q7, d5, d3 |
||||
vmlal.s16 q8, d6, d3 |
||||
vmlal.s16 q9, d7, d3 |
||||
|
||||
vpadd.s32 d0, d12, d13 /* TODO: can be eliminated */ |
||||
vpadd.s32 d1, d14, d15 /* TODO: can be eliminated */ |
||||
vpadd.s32 d2, d16, d17 /* TODO: can be eliminated */ |
||||
vpadd.s32 d3, d18, d19 /* TODO: can be eliminated */ |
||||
|
||||
vst1.32 {d0, d1, d2, d3}, [r1, :128] |
||||
|
||||
bx lr |
||||
endfunc |
||||
|
||||
function ff_sbc_calc_scalefactors_neon, export=1 |
||||
@ parameters
|
||||
@ r0 = sb_sample_f
|
||||
@ r1 = scale_factor
|
||||
@ r2 = blocks
|
||||
@ r3 = channels
|
||||
@ r4 = subbands
|
||||
@ local variables
|
||||
@ r5 = in_loop_1
|
||||
@ r6 = in
|
||||
@ r7 = out_loop_1
|
||||
@ r8 = out
|
||||
@ r9 = ch
|
||||
@ r10 = sb
|
||||
@ r11 = inc
|
||||
@ r12 = blk
|
||||
|
||||
push {r1-r2, r4-r12} |
||||
ldr r4, [sp, #44] |
||||
mov r11, #64 |
||||
|
||||
mov r9, #0 |
||||
1: |
||||
add r5, r0, r9, lsl#5 |
||||
add r7, r1, r9, lsl#5 |
||||
|
||||
mov r10, #0 |
||||
2: |
||||
add r6, r5, r10, lsl#2 |
||||
add r8, r7, r10, lsl#2 |
||||
mov r12, r2 |
||||
|
||||
vmov.s32 q0, #0 |
||||
vmov.s32 q1, #0x8000 @ 1 << SCALE_OUT_BITS
|
||||
vmov.s32 q14, #1 |
||||
vmov.s32 q15, #16 @ 31 - SCALE_OUT_BITS
|
||||
vadd.s32 q1, q1, q14 |
||||
3: |
||||
vld1.32 {d16, d17}, [r6, :128], r11 |
||||
vabs.s32 q8, q8 |
||||
vld1.32 {d18, d19}, [r6, :128], r11 |
||||
vabs.s32 q9, q9 |
||||
vld1.32 {d20, d21}, [r6, :128], r11 |
||||
vabs.s32 q10, q10 |
||||
vld1.32 {d22, d23}, [r6, :128], r11 |
||||
vabs.s32 q11, q11 |
||||
vmax.s32 q0, q0, q8 |
||||
vmax.s32 q1, q1, q9 |
||||
vmax.s32 q0, q0, q10 |
||||
vmax.s32 q1, q1, q11 |
||||
subs r12, r12, #4 |
||||
bgt 3b |
||||
vmax.s32 q0, q0, q1 |
||||
vsub.s32 q0, q0, q14 |
||||
vclz.s32 q0, q0 |
||||
vsub.s32 q0, q15, q0 |
||||
vst1.32 {d0, d1}, [r8, :128] |
||||
|
||||
add r10, r10, #4 |
||||
cmp r10, r4 |
||||
blt 2b |
||||
|
||||
add r9, r9, #1 |
||||
cmp r9, r3 |
||||
blt 1b |
||||
|
||||
pop {r1-r2, r4-r12} |
||||
bx lr |
||||
endfunc |
||||
|
||||
/* |
||||
* constants: q13 = (31 - SCALE_OUT_BITS) |
||||
* q14 = 1 |
||||
* input: q0 - ((1 << SCALE_OUT_BITS) + 1) |
||||
* r5 - samples for channel 0 |
||||
* r6 - samples for shannel 1 |
||||
* output: q0, q1 - scale factors without joint stereo |
||||
* q2, q3 - scale factors with joint stereo |
||||
* q15 - joint stereo selection mask |
||||
*/ |
||||
.macro calc_scalefactors
|
||||
vmov.s32 q1, q0 |
||||
vmov.s32 q2, q0 |
||||
vmov.s32 q3, q0 |
||||
mov r3, r2 |
||||
1: |
||||
vld1.32 {d18, d19}, [r6, :128], r11 |
||||
vbic.s32 q11, q9, q14 |
||||
vld1.32 {d16, d17}, [r5, :128], r11 |
||||
vhadd.s32 q10, q8, q11 |
||||
vhsub.s32 q11, q8, q11 |
||||
vabs.s32 q8, q8 |
||||
vabs.s32 q9, q9 |
||||
vabs.s32 q10, q10 |
||||
vabs.s32 q11, q11 |
||||
vmax.s32 q0, q0, q8 |
||||
vmax.s32 q1, q1, q9 |
||||
vmax.s32 q2, q2, q10 |
||||
vmax.s32 q3, q3, q11 |
||||
subs r3, r3, #1 |
||||
bgt 1b |
||||
vsub.s32 q0, q0, q14 |
||||
vsub.s32 q1, q1, q14 |
||||
vsub.s32 q2, q2, q14 |
||||
vsub.s32 q3, q3, q14 |
||||
vclz.s32 q0, q0 |
||||
vclz.s32 q1, q1 |
||||
vclz.s32 q2, q2 |
||||
vclz.s32 q3, q3 |
||||
vsub.s32 q0, q13, q0 |
||||
vsub.s32 q1, q13, q1 |
||||
vsub.s32 q2, q13, q2 |
||||
vsub.s32 q3, q13, q3 |
||||
.endm |
||||
|
||||
/* |
||||
* constants: q14 = 1 |
||||
* input: q15 - joint stereo selection mask |
||||
* r5 - value set by calc_scalefactors macro |
||||
* r6 - value set by calc_scalefactors macro |
||||
*/ |
||||
.macro update_joint_stereo_samples
|
||||
sub r8, r6, r11 |
||||
sub r7, r5, r11 |
||||
sub r6, r6, r11, asl #1 |
||||
sub r5, r5, r11, asl #1 |
||||
vld1.32 {d18, d19}, [r6, :128] |
||||
vbic.s32 q11, q9, q14 |
||||
vld1.32 {d16, d17}, [r5, :128] |
||||
vld1.32 {d2, d3}, [r8, :128] |
||||
vbic.s32 q3, q1, q14 |
||||
vld1.32 {d0, d1}, [r7, :128] |
||||
vhsub.s32 q10, q8, q11 |
||||
vhadd.s32 q11, q8, q11 |
||||
vhsub.s32 q2, q0, q3 |
||||
vhadd.s32 q3, q0, q3 |
||||
vbif.s32 q10, q9, q15 |
||||
vbif.s32 d22, d16, d30 |
||||
sub r11, r10, r11, asl #1 |
||||
sub r3, r2, #2 |
||||
2: |
||||
vbif.s32 d23, d17, d31 |
||||
vst1.32 {d20, d21}, [r6, :128], r11 |
||||
vbif.s32 d4, d2, d30 |
||||
vld1.32 {d18, d19}, [r6, :128] |
||||
vbif.s32 d5, d3, d31 |
||||
vst1.32 {d22, d23}, [r5, :128], r11 |
||||
vbif.s32 d6, d0, d30 |
||||
vld1.32 {d16, d17}, [r5, :128] |
||||
vbif.s32 d7, d1, d31 |
||||
vst1.32 {d4, d5}, [r8, :128], r11 |
||||
vbic.s32 q11, q9, q14 |
||||
vld1.32 {d2, d3}, [r8, :128] |
||||
vst1.32 {d6, d7}, [r7, :128], r11 |
||||
vbic.s32 q3, q1, q14 |
||||
vld1.32 {d0, d1}, [r7, :128] |
||||
vhsub.s32 q10, q8, q11 |
||||
vhadd.s32 q11, q8, q11 |
||||
vhsub.s32 q2, q0, q3 |
||||
vhadd.s32 q3, q0, q3 |
||||
vbif.s32 q10, q9, q15 |
||||
vbif.s32 d22, d16, d30 |
||||
subs r3, r3, #2 |
||||
bgt 2b |
||||
sub r11, r10, r11, asr #1 |
||||
vbif.s32 d23, d17, d31 |
||||
vst1.32 {d20, d21}, [r6, :128] |
||||
vbif.s32 q2, q1, q15 |
||||
vst1.32 {d22, d23}, [r5, :128] |
||||
vbif.s32 q3, q0, q15 |
||||
vst1.32 {d4, d5}, [r8, :128] |
||||
vst1.32 {d6, d7}, [r7, :128] |
||||
.endm |
||||
|
||||
function ff_sbc_calc_scalefactors_j_neon, export=1 |
||||
@ parameters
|
||||
@ r0 = in = sb_sample_f
|
||||
@ r1 = out = scale_factor
|
||||
@ r2 = blocks
|
||||
@ r3 = subbands
|
||||
@ local variables
|
||||
@ r4 = consts = ff_sbcdsp_joint_bits_mask
|
||||
@ r5 = in0
|
||||
@ r6 = in1
|
||||
@ r7 = out0
|
||||
@ r8 = out1
|
||||
@ r10 = zero
|
||||
@ r11 = inc
|
||||
@ return r0 = joint
|
||||
|
||||
push {r3-r11} |
||||
movrelx r4, X(ff_sbcdsp_joint_bits_mask) |
||||
mov r10, #0 |
||||
mov r11, #64 |
||||
|
||||
vmov.s32 q14, #1 |
||||
vmov.s32 q13, #16 @ 31 - SCALE_OUT_BITS
|
||||
|
||||
cmp r3, #4 |
||||
bne 8f |
||||
|
||||
4: @ 4 subbands
|
||||
add r5, r0, #0 |
||||
add r6, r0, #32 |
||||
add r7, r1, #0 |
||||
add r8, r1, #32 |
||||
vmov.s32 q0, #0x8000 @ 1 << SCALE_OUT_BITS
|
||||
vadd.s32 q0, q0, q14 |
||||
|
||||
calc_scalefactors |
||||
|
||||
@ check whether to use joint stereo for subbands 0, 1, 2
|
||||
vadd.s32 q15, q0, q1 |
||||
vadd.s32 q9, q2, q3 |
||||
vmov.s32 d31[1], r10 @ last subband -> no joint
|
||||
vld1.32 {d16, d17}, [r4, :128]! |
||||
vcgt.s32 q15, q15, q9 |
||||
|
||||
@ calculate and save to memory 'joint' variable
|
||||
@ update and save scale factors to memory
|
||||
vand.s32 q8, q8, q15 |
||||
vbit.s32 q0, q2, q15 |
||||
vpadd.s32 d16, d16, d17 |
||||
vbit.s32 q1, q3, q15 |
||||
vpadd.s32 d16, d16, d16 |
||||
vst1.32 {d0, d1}, [r7, :128] |
||||
vst1.32 {d2, d3}, [r8, :128] |
||||
vmov.32 r0, d16[0] |
||||
|
||||
update_joint_stereo_samples |
||||
b 9f |
||||
|
||||
8: @ 8 subbands
|
||||
add r5, r0, #16 |
||||
add r6, r0, #48 |
||||
add r7, r1, #16 |
||||
add r8, r1, #48 |
||||
vmov.s32 q0, #0x8000 @ 1 << SCALE_OUT_BITS
|
||||
vadd.s32 q0, q0, q14 |
||||
|
||||
calc_scalefactors |
||||
|
||||
@ check whether to use joint stereo for subbands 4, 5, 6
|
||||
vadd.s32 q15, q0, q1 |
||||
vadd.s32 q9, q2, q3 |
||||
vmov.s32 d31[1], r10 @ last subband -> no joint
|
||||
vld1.32 {d16, d17}, [r4, :128]! |
||||
vcgt.s32 q15, q15, q9 |
||||
|
||||
@ calculate part of 'joint' variable and save it to d24
|
||||
@ update and save scale factors to memory
|
||||
vand.s32 q8, q8, q15 |
||||
vbit.s32 q0, q2, q15 |
||||
vpadd.s32 d16, d16, d17 |
||||
vbit.s32 q1, q3, q15 |
||||
vst1.32 {d0, d1}, [r7, :128] |
||||
vst1.32 {d2, d3}, [r8, :128] |
||||
vpadd.s32 d24, d16, d16 |
||||
|
||||
update_joint_stereo_samples |
||||
|
||||
add r5, r0, #0 |
||||
add r6, r0, #32 |
||||
add r7, r1, #0 |
||||
add r8, r1, #32 |
||||
vmov.s32 q0, #0x8000 @ 1 << SCALE_OUT_BITS
|
||||
vadd.s32 q0, q0, q14 |
||||
|
||||
calc_scalefactors |
||||
|
||||
@ check whether to use joint stereo for subbands 0, 1, 2, 3
|
||||
vadd.s32 q15, q0, q1 |
||||
vadd.s32 q9, q2, q3 |
||||
vld1.32 {d16, d17}, [r4, :128]! |
||||
vcgt.s32 q15, q15, q9 |
||||
|
||||
@ combine last part of 'joint' with d24 and save to memory
|
||||
@ update and save scale factors to memory
|
||||
vand.s32 q8, q8, q15 |
||||
vbit.s32 q0, q2, q15 |
||||
vpadd.s32 d16, d16, d17 |
||||
vbit.s32 q1, q3, q15 |
||||
vpadd.s32 d16, d16, d16 |
||||
vst1.32 {d0, d1}, [r7, :128] |
||||
vadd.s32 d16, d16, d24 |
||||
vst1.32 {d2, d3}, [r8, :128] |
||||
vmov.32 r0, d16[0] |
||||
|
||||
update_joint_stereo_samples |
||||
9: |
||||
pop {r3-r11} |
||||
bx lr |
||||
endfunc |
||||
|
||||
function ff_sbc_enc_process_input_4s_neon, export=1 |
||||
@ parameters
|
||||
@ r0 = positioin
|
||||
@ r1 = pcm
|
||||
@ r2 = X
|
||||
@ r3 = nsamples
|
||||
@ r4 = nchannels
|
||||
@ local variables
|
||||
@ r5 = ff_sbc_input_perm_4
|
||||
@ r6 = src / x
|
||||
@ r7 = dst / y
|
||||
|
||||
push {r1, r3-r7} |
||||
ldr r4, [sp, #24] |
||||
movrelx r5, X(ff_sbc_input_perm_4) |
||||
|
||||
@ handle X buffer wraparound
|
||||
cmp r0, r3 |
||||
bge 1f @ if (position < nsamples)
|
||||
add r7, r2, #576 @ &X[0][SBC_X_BUFFER_SIZE - 40]
|
||||
add r6, r2, r0, lsl#1 @ &X[0][position]
|
||||
vld1.16 {d0, d1, d2, d3}, [r6, :128]! |
||||
vst1.16 {d0, d1, d2, d3}, [r7, :128]! |
||||
vld1.16 {d0, d1, d2, d3}, [r6, :128]! |
||||
vst1.16 {d0, d1, d2, d3}, [r7, :128]! |
||||
vld1.16 {d0}, [r6, :64]! |
||||
vst1.16 {d0}, [r7, :64]! |
||||
cmp r4, #1 |
||||
ble 2f @ if (nchannels > 1)
|
||||
add r7, r2, #1232 @ &X[1][SBC_X_BUFFER_SIZE - 40]
|
||||
add r6, r2, #656 |
||||
add r6, r6, r0, lsl#1 @ &X[1][position]
|
||||
vld1.16 {d0, d1, d2, d3}, [r6, :128]! |
||||
vst1.16 {d0, d1, d2, d3}, [r7, :128]! |
||||
vld1.16 {d0, d1, d2, d3}, [r6, :128]! |
||||
vst1.16 {d0, d1, d2, d3}, [r7, :128]! |
||||
vld1.16 {d0}, [r6, :64]! |
||||
vst1.16 {d0}, [r7, :64]! |
||||
2: |
||||
mov r0, #288 @ SBC_X_BUFFER_SIZE - 40
|
||||
1: |
||||
|
||||
add r6, r2, r0, lsl#1 @ &X[0][position]
|
||||
add r7, r6, #656 @ &X[1][position]
|
||||
|
||||
cmp r4, #1 |
||||
ble 8f @ if (nchannels > 1)
|
||||
tst r1, #1 |
||||
beq 7f @ if (pcm & 1)
|
||||
@ poor 'pcm' alignment
|
||||
vld1.8 {d0, d1}, [r5, :128] |
||||
1: |
||||
sub r6, r6, #16 |
||||
sub r7, r7, #16 |
||||
sub r0, r0, #8 |
||||
vld1.8 {d4, d5}, [r1]! |
||||
vuzp.16 d4, d5 |
||||
vld1.8 {d20, d21}, [r1]! |
||||
vuzp.16 d20, d21 |
||||
vswp d5, d20 |
||||
vtbl.8 d16, {d4, d5}, d0 |
||||
vtbl.8 d17, {d4, d5}, d1 |
||||
vtbl.8 d18, {d20, d21}, d0 |
||||
vtbl.8 d19, {d20, d21}, d1 |
||||
vst1.16 {d16, d17}, [r6, :128] |
||||
vst1.16 {d18, d19}, [r7, :128] |
||||
subs r3, r3, #8 |
||||
bgt 1b |
||||
b 9f |
||||
7: |
||||
@ proper 'pcm' alignment
|
||||
vld1.8 {d0, d1}, [r5, :128] |
||||
1: |
||||
sub r6, r6, #16 |
||||
sub r7, r7, #16 |
||||
sub r0, r0, #8 |
||||
vld2.16 {d4, d5}, [r1]! |
||||
vld2.16 {d20, d21}, [r1]! |
||||
vswp d5, d20 |
||||
vtbl.8 d16, {d4, d5}, d0 |
||||
vtbl.8 d17, {d4, d5}, d1 |
||||
vtbl.8 d18, {d20, d21}, d0 |
||||
vtbl.8 d19, {d20, d21}, d1 |
||||
vst1.16 {d16, d17}, [r6, :128] |
||||
vst1.16 {d18, d19}, [r7, :128] |
||||
subs r3, r3, #8 |
||||
bgt 1b |
||||
b 9f |
||||
8: |
||||
@ mono
|
||||
vld1.8 {d0, d1}, [r5, :128] |
||||
1: |
||||
sub r6, r6, #16 |
||||
sub r0, r0, #8 |
||||
vld1.8 {d4, d5}, [r1]! |
||||
vtbl.8 d16, {d4, d5}, d0 |
||||
vtbl.8 d17, {d4, d5}, d1 |
||||
vst1.16 {d16, d17}, [r6, :128] |
||||
subs r3, r3, #8 |
||||
bgt 1b |
||||
9: |
||||
pop {r1, r3-r7} |
||||
bx lr |
||||
endfunc |
||||
|
||||
function ff_sbc_enc_process_input_8s_neon, export=1 |
||||
@ parameters
|
||||
@ r0 = positioin
|
||||
@ r1 = pcm
|
||||
@ r2 = X
|
||||
@ r3 = nsamples
|
||||
@ r4 = nchannels
|
||||
@ local variables
|
||||
@ r5 = ff_sbc_input_perm_8
|
||||
@ r6 = src
|
||||
@ r7 = dst
|
||||
|
||||
push {r1, r3-r7} |
||||
ldr r4, [sp, #24] |
||||
movrelx r5, X(ff_sbc_input_perm_8) |
||||
|
||||
@ handle X buffer wraparound
|
||||
cmp r0, r3 |
||||
bge 1f @ if (position < nsamples)
|
||||
add r7, r2, #512 @ &X[0][SBC_X_BUFFER_SIZE - 72]
|
||||
add r6, r2, r0, lsl#1 @ &X[0][position]
|
||||
vld1.16 {d0, d1, d2, d3}, [r6, :128]! |
||||
vst1.16 {d0, d1, d2, d3}, [r7, :128]! |
||||
vld1.16 {d0, d1, d2, d3}, [r6, :128]! |
||||
vst1.16 {d0, d1, d2, d3}, [r7, :128]! |
||||
vld1.16 {d0, d1, d2, d3}, [r6, :128]! |
||||
vst1.16 {d0, d1, d2, d3}, [r7, :128]! |
||||
vld1.16 {d0, d1, d2, d3}, [r6, :128]! |
||||
vst1.16 {d0, d1, d2, d3}, [r7, :128]! |
||||
vld1.16 {d0, d1}, [r6, :128]! |
||||
vst1.16 {d0, d1}, [r7, :128]! |
||||
cmp r4, #1 |
||||
ble 2f @ if (nchannels > 1)
|
||||
add r7, r2, #1168 @ &X[1][SBC_X_BUFFER_SIZE - 72]
|
||||
add r6, r2, #656 |
||||
add r6, r6, r0, lsl#1 @ &X[1][position]
|
||||
vld1.16 {d0, d1, d2, d3}, [r6, :128]! |
||||
vst1.16 {d0, d1, d2, d3}, [r7, :128]! |
||||
vld1.16 {d0, d1, d2, d3}, [r6, :128]! |
||||
vst1.16 {d0, d1, d2, d3}, [r7, :128]! |
||||
vld1.16 {d0, d1, d2, d3}, [r6, :128]! |
||||
vst1.16 {d0, d1, d2, d3}, [r7, :128]! |
||||
vld1.16 {d0, d1, d2, d3}, [r6, :128]! |
||||
vst1.16 {d0, d1, d2, d3}, [r7, :128]! |
||||
vld1.16 {d0, d1}, [r6, :128]! |
||||
vst1.16 {d0, d1}, [r7, :128]! |
||||
2: |
||||
mov r0, #256 @ SBC_X_BUFFER_SIZE - 72
|
||||
1: |
||||
|
||||
add r6, r2, r0, lsl#1 @ &X[0][position]
|
||||
add r7, r6, #656 @ &X[1][position]
|
||||
|
||||
cmp r4, #1 |
||||
ble 8f @ if (nchannels > 1)
|
||||
tst r1, #1 |
||||
beq 7f @ if (pcm & 1)
|
||||
@ poor 'pcm' alignment
|
||||
vld1.8 {d0, d1, d2, d3}, [r5, :128] |
||||
1: |
||||
sub r6, r6, #32 |
||||
sub r7, r7, #32 |
||||
sub r0, r0, #16 |
||||
vld1.8 {d4, d5, d6, d7}, [r1]! |
||||
vuzp.16 q2, q3 |
||||
vld1.8 {d20, d21, d22, d23}, [r1]! |
||||
vuzp.16 q10, q11 |
||||
vswp q3, q10 |
||||
vtbl.8 d16, {d4, d5, d6, d7}, d0 |
||||
vtbl.8 d17, {d4, d5, d6, d7}, d1 |
||||
vtbl.8 d18, {d4, d5, d6, d7}, d2 |
||||
vtbl.8 d19, {d4, d5, d6, d7}, d3 |
||||
vst1.16 {d16, d17, d18, d19}, [r6, :128] |
||||
vtbl.8 d16, {d20, d21, d22, d23}, d0 |
||||
vtbl.8 d17, {d20, d21, d22, d23}, d1 |
||||
vtbl.8 d18, {d20, d21, d22, d23}, d2 |
||||
vtbl.8 d19, {d20, d21, d22, d23}, d3 |
||||
vst1.16 {d16, d17, d18, d19}, [r7, :128] |
||||
subs r3, r3, #16 |
||||
bgt 1b |
||||
b 9f |
||||
7: |
||||
@ proper 'pcm' alignment
|
||||
vld1.8 {d0, d1, d2, d3}, [r5, :128] |
||||
1: |
||||
sub r6, r6, #32 |
||||
sub r7, r7, #32 |
||||
sub r0, r0, #16 |
||||
vld2.16 {d4, d5, d6, d7}, [r1]! |
||||
vld2.16 {d20, d21, d22, d23}, [r1]! |
||||
vswp q3, q10 |
||||
vtbl.8 d16, {d4, d5, d6, d7}, d0 |
||||
vtbl.8 d17, {d4, d5, d6, d7}, d1 |
||||
vtbl.8 d18, {d4, d5, d6, d7}, d2 |
||||
vtbl.8 d19, {d4, d5, d6, d7}, d3 |
||||
vst1.16 {d16, d17, d18, d19}, [r6, :128] |
||||
vtbl.8 d16, {d20, d21, d22, d23}, d0 |
||||
vtbl.8 d17, {d20, d21, d22, d23}, d1 |
||||
vtbl.8 d18, {d20, d21, d22, d23}, d2 |
||||
vtbl.8 d19, {d20, d21, d22, d23}, d3 |
||||
vst1.16 {d16, d17, d18, d19}, [r7, :128] |
||||
subs r3, r3, #16 |
||||
bgt 1b |
||||
b 9f |
||||
8: |
||||
@ mono
|
||||
vld1.8 {d0, d1, d2, d3}, [r5, :128] |
||||
1: |
||||
sub r6, r6, #32 |
||||
sub r0, r0, #16 |
||||
vld1.8 {d4, d5, d6, d7}, [r1]! |
||||
vtbl.8 d16, {d4, d5, d6, d7}, d0 |
||||
vtbl.8 d17, {d4, d5, d6, d7}, d1 |
||||
vtbl.8 d18, {d4, d5, d6, d7}, d2 |
||||
vtbl.8 d19, {d4, d5, d6, d7}, d3 |
||||
vst1.16 {d16, d17, d18, d19}, [r6, :128] |
||||
subs r3, r3, #16 |
||||
bgt 1b |
||||
9: |
||||
pop {r1, r3-r7} |
||||
bx lr |
||||
endfunc |
Loading…
Reference in new issue