@ -67,6 +67,210 @@ function ff_vector_fmul_vfp, export=1
bx l r
endfunc
/ * *
* ARM V F P i m p l e m e n t a t i o n o f ' v e c t o r _ f m u l _ w i n d o w _ c ' f u n c t i o n
* Assume t h a t l e n i s a p o s i t i v e n o n - z e r o n u m b e r
* /
@ void ff_vector_fmul_window_vfp(float *dst, const float *src0,
@ const float *src1, const float *win, int len)
function f f _ v e c t o r _ f m u l _ w i n d o w _ v f p , e x p o r t =1
DST0 . r e q a1
SRC0 . r e q a2
SRC1 . r e q a3
WIN0 . r e q a4
LEN . r e q v1
DST1 . r e q v2
WIN1 . r e q v3
OLDFPSCR . r e q i p
push { v1 - v3 ,l r }
ldr L E N , [ s p , #4 * 4 + 0 ]
vpush { s16 - s31 }
fmrx O L D F P S C R , F P S C R
add D S T 1 , D S T 0 , L E N , l s l #3
add S R C 1 , S R C 1 , L E N , l s l #2
add W I N 1 , W I N 0 , L E N , l s l #3
tst L E N , #7
beq 4 f @ common case: len is a multiple of 8
ldr l r , =0x03000000 @ RunFast mode, scalar mode
fmxr F P S C R , l r
tst L E N , #1
beq 1 f
vldmdb W I N 1 ! , { s0 }
vldmia S R C 0 ! , { s8 }
vldmia W I N 0 ! , { s16 }
vmul. f s24 , s0 , s8
vldmdb S R C 1 ! , { s20 }
vmul. f s8 , s16 , s8
vmls. f s24 , s16 , s20
vmla. f s8 , s0 , s20
vstmia D S T 0 ! , { s24 }
vstmdb D S T 1 ! , { s8 }
1 :
tst L E N , #2
beq 2 f
vldmdb W I N 1 ! , { s0 }
vldmdb W I N 1 ! , { s1 }
vldmia S R C 0 ! , { s8 - s9 }
vldmia W I N 0 ! , { s16 - s17 }
vmul. f s24 , s0 , s8
vmul. f s25 , s1 , s9
vldmdb S R C 1 ! , { s20 }
vldmdb S R C 1 ! , { s21 }
vmul. f s8 , s16 , s8
vmul. f s9 , s17 , s9
vmls. f s24 , s16 , s20
vmls. f s25 , s17 , s21
vmla. f s8 , s0 , s20
vmla. f s9 , s1 , s21
vstmia D S T 0 ! , { s24 - s25 }
vstmdb D S T 1 ! , { s8 }
vstmdb D S T 1 ! , { s9 }
2 :
tst L E N , #4
beq 3 f
vldmdb W I N 1 ! , { s0 }
vldmdb W I N 1 ! , { s1 }
vldmdb W I N 1 ! , { s2 }
vldmdb W I N 1 ! , { s3 }
vldmia S R C 0 ! , { s8 - s11 }
vldmia W I N 0 ! , { s16 - s19 }
vmul. f s24 , s0 , s8
vmul. f s25 , s1 , s9
vmul. f s26 , s2 , s10
vmul. f s27 , s3 , s11
vldmdb S R C 1 ! , { s20 }
vldmdb S R C 1 ! , { s21 }
vldmdb S R C 1 ! , { s22 }
vldmdb S R C 1 ! , { s23 }
vmul. f s8 , s16 , s8
vmul. f s9 , s17 , s9
vmul. f s10 , s18 , s10
vmul. f s11 , s19 , s11
vmls. f s24 , s16 , s20
vmls. f s25 , s17 , s21
vmls. f s26 , s18 , s22
vmls. f s27 , s19 , s23
vmla. f s8 , s0 , s20
vmla. f s9 , s1 , s21
vmla. f s10 , s2 , s22
vmla. f s11 , s3 , s23
vstmia D S T 0 ! , { s24 - s27 }
vstmdb D S T 1 ! , { s8 }
vstmdb D S T 1 ! , { s9 }
vstmdb D S T 1 ! , { s10 }
vstmdb D S T 1 ! , { s11 }
3 :
bics L E N , L E N , #7
beq 7 f
4 :
ldr l r , =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
fmxr F P S C R , l r
vldmdb W I N 1 ! , { s0 }
vldmdb W I N 1 ! , { s1 }
vldmdb W I N 1 ! , { s2 }
vldmdb W I N 1 ! , { s3 }
vldmia S R C 0 ! , { s8 - s11 }
vldmia W I N 0 ! , { s16 - s19 }
vmul. f s24 , s0 , s8 @ vector * vector
vldmdb S R C 1 ! , { s20 }
vldmdb S R C 1 ! , { s21 }
vldmdb S R C 1 ! , { s22 }
vldmdb S R C 1 ! , { s23 }
vmul. f s8 , s16 , s8 @ vector * vector
vmls. f s24 , s16 , s20 @ vector * vector
vldmdb W I N 1 ! , { s4 }
vldmdb W I N 1 ! , { s5 }
vldmdb W I N 1 ! , { s6 }
vldmdb W I N 1 ! , { s7 }
vldmia S R C 0 ! , { s12 - s13 }
vmla. f s8 , s0 , s20 @ vector * vector
vldmia S R C 0 ! , { s14 - s15 }
subs L E N , L E N , #8
beq 6 f
5 : vldmia W I N 0 ! , { s20 - s23 }
vmul. f s28 , s4 , s12 @ vector * vector
vstmia D S T 0 ! , { s24 - s25 }
vldmdb S R C 1 ! , { s16 }
vldmdb S R C 1 ! , { s17 }
vldmdb S R C 1 ! , { s18 }
vldmdb S R C 1 ! , { s19 }
vmul. f s12 , s20 , s12 @ vector * vector
vstmia D S T 0 ! , { s26 - s27 }
vstmdb D S T 1 ! , { s8 }
vstmdb D S T 1 ! , { s9 }
vstmdb D S T 1 ! , { s10 }
vstmdb D S T 1 ! , { s11 }
vmls. f s28 , s20 , s16 @ vector * vector
vldmdb W I N 1 ! , { s0 }
vldmdb W I N 1 ! , { s1 }
vldmdb W I N 1 ! , { s2 }
vldmdb W I N 1 ! , { s3 }
vldmia S R C 0 ! , { s8 - s9 }
vmla. f s12 , s4 , s16 @ vector * vector
vldmia S R C 0 ! , { s10 - s11 }
subs L E N , L E N , #8
vldmia W I N 0 ! , { s16 - s19 }
vmul. f s24 , s0 , s8 @ vector * vector
vstmia D S T 0 ! , { s28 - s29 }
vldmdb S R C 1 ! , { s20 }
vldmdb S R C 1 ! , { s21 }
vldmdb S R C 1 ! , { s22 }
vldmdb S R C 1 ! , { s23 }
vmul. f s8 , s16 , s8 @ vector * vector
vstmia D S T 0 ! , { s30 - s31 }
vstmdb D S T 1 ! , { s12 }
vstmdb D S T 1 ! , { s13 }
vstmdb D S T 1 ! , { s14 }
vstmdb D S T 1 ! , { s15 }
vmls. f s24 , s16 , s20 @ vector * vector
vldmdb W I N 1 ! , { s4 }
vldmdb W I N 1 ! , { s5 }
vldmdb W I N 1 ! , { s6 }
vldmdb W I N 1 ! , { s7 }
vldmia S R C 0 ! , { s12 - s13 }
vmla. f s8 , s0 , s20 @ vector * vector
vldmia S R C 0 ! , { s14 - s15 }
bne 5 b
6 : vldmia W I N 0 ! , { s20 - s23 }
vmul. f s28 , s4 , s12 @ vector * vector
vstmia D S T 0 ! , { s24 - s25 }
vldmdb S R C 1 ! , { s16 }
vldmdb S R C 1 ! , { s17 }
vldmdb S R C 1 ! , { s18 }
vldmdb S R C 1 ! , { s19 }
vmul. f s12 , s20 , s12 @ vector * vector
vstmia D S T 0 ! , { s26 - s27 }
vstmdb D S T 1 ! , { s8 }
vstmdb D S T 1 ! , { s9 }
vstmdb D S T 1 ! , { s10 }
vstmdb D S T 1 ! , { s11 }
vmls. f s28 , s20 , s16 @ vector * vector
vmla. f s12 , s4 , s16 @ vector * vector
vstmia D S T 0 ! , { s28 - s31 }
vstmdb D S T 1 ! , { s12 }
vstmdb D S T 1 ! , { s13 }
vstmdb D S T 1 ! , { s14 }
vstmdb D S T 1 ! , { s15 }
7 :
fmxr F P S C R , O L D F P S C R
vpop { s16 - s31 }
pop { v1 - v3 ,p c }
.unreq DST0
.unreq SRC0
.unreq SRC1
.unreq WIN0
.unreq LEN
.unreq OLDFPSCR
.unreq DST1
.unreq WIN1
endfunc
/ * *
* ARM V F P o p t i m i z e d i m p l e m e n t a t i o n o f ' v e c t o r _ f m u l _ r e v e r s e _ c ' f u n c t i o n .
* Assume t h a t l e n i s a p o s i t i v e n u m b e r a n d i s m u l t i p l e o f 8