|
|
|
@ -609,3 +609,43 @@ function ff_float_to_int16_interleave_neon, export=1 |
|
|
|
|
vcvt.s32.f32 q1, q1, #16 |
|
|
|
|
b 6b |
|
|
|
|
.endfunc |
|
|
|
|
|
|
|
|
|
function ff_vector_fmul_neon, export=1 |
|
|
|
|
mov r3, r0 |
|
|
|
|
subs r2, r2, #8 |
|
|
|
|
vld1.64 {d0-d3}, [r0,:128]! |
|
|
|
|
vld1.64 {d4-d7}, [r1,:128]! |
|
|
|
|
vmul.f32 q8, q0, q2 |
|
|
|
|
vmul.f32 q9, q1, q3 |
|
|
|
|
beq 3f |
|
|
|
|
bics ip, r2, #15 |
|
|
|
|
beq 2f |
|
|
|
|
1: subs ip, ip, #16 |
|
|
|
|
vld1.64 {d0-d1}, [r0,:128]! |
|
|
|
|
vld1.64 {d4-d5}, [r1,:128]! |
|
|
|
|
vmul.f32 q10, q0, q2 |
|
|
|
|
vld1.64 {d2-d3}, [r0,:128]! |
|
|
|
|
vld1.64 {d6-d7}, [r1,:128]! |
|
|
|
|
vmul.f32 q11, q1, q3 |
|
|
|
|
vst1.64 {d16-d19},[r3,:128]! |
|
|
|
|
vld1.64 {d0-d1}, [r0,:128]! |
|
|
|
|
vld1.64 {d4-d5}, [r1,:128]! |
|
|
|
|
vmul.f32 q8, q0, q2 |
|
|
|
|
vld1.64 {d2-d3}, [r0,:128]! |
|
|
|
|
vld1.64 {d6-d7}, [r1,:128]! |
|
|
|
|
vmul.f32 q9, q1, q3 |
|
|
|
|
vst1.64 {d20-d23},[r3,:128]! |
|
|
|
|
bne 1b |
|
|
|
|
ands r2, r2, #15 |
|
|
|
|
beq 3f |
|
|
|
|
2: vld1.64 {d0-d1}, [r0,:128]! |
|
|
|
|
vld1.64 {d4-d5}, [r1,:128]! |
|
|
|
|
vst1.64 {d16-d17},[r3,:128]! |
|
|
|
|
vmul.f32 q8, q0, q2 |
|
|
|
|
vld1.64 {d2-d3}, [r0,:128]! |
|
|
|
|
vld1.64 {d6-d7}, [r1,:128]! |
|
|
|
|
vst1.64 {d18-d19},[r3,:128]! |
|
|
|
|
vmul.f32 q9, q1, q3 |
|
|
|
|
3: vst1.64 {d16-d19},[r3,:128]! |
|
|
|
|
bx lr |
|
|
|
|
.endfunc |
|
|
|
|