@ -339,3 +339,119 @@ function ff_vector_fmul_reverse_vfp, export=1
vpop { d8 - d15 }
bx l r
endfunc
/ * *
* ARM V F P i m p l e m e n t a t i o n o f ' b u t t e r f l i e s _ f l o a t _ c ' f u n c t i o n
* Assume t h a t l e n i s a p o s i t i v e n o n - z e r o n u m b e r
* /
@ void ff_butterflies_float_vfp(float *restrict v1, float *restrict v2, int len)
function f f _ b u t t e r f l i e s _ f l o a t _ v f p , e x p o r t =1
BASE1 . r e q a1
BASE2 . r e q a2
LEN . r e q a3
OLDFPSCR . r e q a4
vpush { s16 - s31 }
fmrx O L D F P S C R , F P S C R
tst L E N , #7
beq 4 f @ common case: len is a multiple of 8
ldr i p , =0x03000000 @ RunFast mode, scalar mode
fmxr F P S C R , i p
tst L E N , #1
beq 1 f
vldmia B A S E 1 ! , { s0 }
vldmia B A S E 2 ! , { s8 }
vadd. f s16 , s0 , s8
vsub. f s24 , s0 , s8
vstr s16 , [ B A S E 1 , #0 - 4 * 1 ]
vstr s24 , [ B A S E 2 , #0 - 4 * 1 ]
1 :
tst L E N , #2
beq 2 f
vldmia B A S E 1 ! , { s0 - s1 }
vldmia B A S E 2 ! , { s8 - s9 }
vadd. f s16 , s0 , s8
vadd. f s17 , s1 , s9
vsub. f s24 , s0 , s8
vsub. f s25 , s1 , s9
vstr d8 , [ B A S E 1 , #0 - 8 * 1 ] @ s16,s17
vstr d12 , [ B A S E 2 , #0 - 8 * 1 ] @ s24,s25
2 :
tst L E N , #4
beq 3 f
vldmia B A S E 1 ! , { s0 - s1 }
vldmia B A S E 2 ! , { s8 - s9 }
vldmia B A S E 1 ! , { s2 - s3 }
vldmia B A S E 2 ! , { s10 - s11 }
vadd. f s16 , s0 , s8
vadd. f s17 , s1 , s9
vsub. f s24 , s0 , s8
vsub. f s25 , s1 , s9
vadd. f s18 , s2 , s10
vadd. f s19 , s3 , s11
vsub. f s26 , s2 , s10
vsub. f s27 , s3 , s11
vstr d8 , [ B A S E 1 , #0 - 1 6 * 1 ] @ s16,s17
vstr d12 , [ B A S E 2 , #0 - 1 6 * 1 ] @ s24,s25
vstr d9 , [ B A S E 1 , #8 - 1 6 * 1 ] @ s18,s19
vstr d13 , [ B A S E 2 , #8 - 1 6 * 1 ] @ s26,s27
3 :
bics L E N , L E N , #7
beq 7 f
4 :
ldr i p , =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
fmxr F P S C R , i p
vldmia B A S E 1 ! , { s0 - s1 }
vldmia B A S E 2 ! , { s8 - s9 }
vldmia B A S E 1 ! , { s2 - s3 }
vldmia B A S E 2 ! , { s10 - s11 }
vadd. f s16 , s0 , s8
vldmia B A S E 1 ! , { s4 - s5 }
vldmia B A S E 2 ! , { s12 - s13 }
vldmia B A S E 1 ! , { s6 - s7 }
vldmia B A S E 2 ! , { s14 - s15 }
vsub. f s24 , s0 , s8
vadd. f s20 , s4 , s12
subs L E N , L E N , #8
beq 6 f
5 : vldmia B A S E 1 ! , { s0 - s3 }
vldmia B A S E 2 ! , { s8 - s11 }
vsub. f s28 , s4 , s12
vstr d8 , [ B A S E 1 , #0 - 1 6 * 3 ] @ s16,s17
vstr d9 , [ B A S E 1 , #8 - 1 6 * 3 ] @ s18,s19
vstr d12 , [ B A S E 2 , #0 - 1 6 * 3 ] @ s24,s25
vstr d13 , [ B A S E 2 , #8 - 1 6 * 3 ] @ s26,s27
vadd. f s16 , s0 , s8
vldmia B A S E 1 ! , { s4 - s7 }
vldmia B A S E 2 ! , { s12 - s15 }
vsub. f s24 , s0 , s8
vstr d10 , [ B A S E 1 , #0 - 1 6 * 3 ] @ s20,s21
vstr d11 , [ B A S E 1 , #8 - 1 6 * 3 ] @ s22,s23
vstr d14 , [ B A S E 2 , #0 - 1 6 * 3 ] @ s28,s29
vstr d15 , [ B A S E 2 , #8 - 1 6 * 3 ] @ s30,s31
vadd. f s20 , s4 , s12
subs L E N , L E N , #8
bne 5 b
6 : vsub. f s28 , s4 , s12
vstr d8 , [ B A S E 1 , #0 - 1 6 * 2 ] @ s16,s17
vstr d9 , [ B A S E 1 , #8 - 1 6 * 2 ] @ s18,s19
vstr d12 , [ B A S E 2 , #0 - 1 6 * 2 ] @ s24,s25
vstr d13 , [ B A S E 2 , #8 - 1 6 * 2 ] @ s26,s27
vstr d10 , [ B A S E 1 , #0 - 1 6 * 1 ] @ s20,s21
vstr d11 , [ B A S E 1 , #8 - 1 6 * 1 ] @ s22,s23
vstr d14 , [ B A S E 2 , #0 - 1 6 * 1 ] @ s28,s29
vstr d15 , [ B A S E 2 , #8 - 1 6 * 1 ] @ s30,s31
7 :
fmxr F P S C R , O L D F P S C R
vpop { s16 - s31 }
bx l r
.unreq BASE1
.unreq BASE2
.unreq LEN
.unreq OLDFPSCR
endfunc