@ -49,24 +49,6 @@
.endif
.endm
# if A R C H _ P P C 6 4
# define P T R . q u a d
.macro LOAD_PTR ra, r b a s e , o f f s e t
ld \ r a ,( \ o f f s e t ) * 8 ( \ r b a s e )
.endm
.macro STORE_PTR ra, r b a s e , o f f s e t
std \ r a ,( \ o f f s e t ) * 8 ( \ r b a s e )
.endm
# else
# define P T R . i n t
.macro LOAD_PTR ra, r b a s e , o f f s e t
lwz \ r a ,( \ o f f s e t ) * 4 ( \ r b a s e )
.endm
.macro STORE_PTR ra, r b a s e , o f f s e t
stw \ r a ,( \ o f f s e t ) * 4 ( \ r b a s e )
.endm
# endif
.macro FFT4 a0 , a1 , a2 , a3 / / i n : 0 - 1 o u t : 2 - 3
vperm \ a2 ,\ a0 ,\ a1 ,v20 / / v c p r m ( 0 ,1 ,s2 ,s1 ) / / { r0 ,i 0 ,r3 ,i 2 }
vperm \ a3 ,\ a0 ,\ a1 ,v21 / / v c p r m ( 2 ,3 ,s0 ,s3 ) / / { r1 ,i 1 ,r2 ,i 3 }
@ -314,18 +296,105 @@ fft_pass\suffix\()_altivec:
blr
.endm
# define M _ S Q R T 1 _ 2 0 . 7 0 7 1 0 6 7 8 1 1 8 6 5 4 7 5 2 4 4 0 / * 1 / s q r t ( 2 ) * /
# define W O R D _ 0 0 x00 ,0 x01 ,0 x02 ,0 x03
# define W O R D _ 1 0 x04 ,0 x05 ,0 x06 ,0 x07
# define W O R D _ 2 0 x08 ,0 x09 ,0 x0 a ,0 x0 b
# define W O R D _ 3 0 x0 c ,0 x0 d ,0 x0 e ,0 x0 f
# define W O R D _ s0 0 x10 ,0 x11 ,0 x12 ,0 x13
# define W O R D _ s1 0 x14 ,0 x15 ,0 x16 ,0 x17
# define W O R D _ s2 0 x18 ,0 x19 ,0 x1 a ,0 x1 b
# define W O R D _ s3 0 x1 c ,0 x1 d ,0 x1 e ,0 x1 f
# define v c p r m ( a , b , c , d ) . b y t e W O R D _ ## a , W O R D _ # # b , W O R D _ # # c , W O R D _ # # d
.rodata
.align 4
fft_data :
.float 0 , 0 , 0 , 0
.float 1 , 0 .92387953 , M_ S Q R T 1 _ 2 , 0 . 3 8 2 6 8 3 4 3
.float 0 , 0 .38268343 , M_ S Q R T 1 _ 2 , 0 . 9 2 3 8 7 9 5 3
.float - M_ S Q R T 1 _ 2 , M _ S Q R T 1 _ 2 , M _ S Q R T 1 _ 2 ,- M _ S Q R T 1 _ 2
.float M_ S Q R T 1 _ 2 , M _ S Q R T 1 _ 2 , M _ S Q R T 1 _ 2 , M _ S Q R T 1 _ 2
vcprm( s0 ,3 ,2 ,1 )
vcprm( 0 ,1 ,s2 ,s1 )
vcprm( 2 ,3 ,s0 ,s3 )
vcprm( 2 ,s3 ,3 ,s2 )
vcprm( 0 ,1 ,s0 ,s1 )
vcprm( 2 ,3 ,s2 ,s3 )
vcprm( 2 ,3 ,0 ,1 )
vcprm( 1 ,2 ,s3 ,s0 )
vcprm( 0 ,3 ,s2 ,s1 )
vcprm( 0 ,2 ,s1 ,s3 )
vcprm( 1 ,3 ,s0 ,s2 )
.macro lvm b, r , r e g s : v a r a r g
lvx \ r , 0 , \ b
addi \ b , \ b , 1 6
.ifnb \ regs
lvm \ b , \ r e g s
.endif
.endm
.macro stvm b, r , r e g s : v a r a r g
stvx \ r , 0 , \ b
addi \ b , \ b , 1 6
.ifnb \ regs
stvm \ b , \ r e g s
.endif
.endm
.macro fft_calc interleave
extfunc f f _ f f t _ c a l c \ i n t e r l e a v e \ ( ) _ a l t i v e c
mflr r0
stp r0 , 2 * P S ( r1 )
stpu r1 , - ( 1 6 0 + 1 6 * P S ) ( r1 )
addi r6 , r1 , 1 6 * P S
stvm r6 , v20 , v21 , v22 , v23 , v24 , v25 , v26 , v27 , v28 , v29
mfvrsave r0
stw r0 , 1 5 * P S ( r1 )
li r6 , 0 x f f f f f f f c
mtvrsave r6
movrel r6 , f f t _ d a t a
lvm r6 , v14 , v15 , v16 , v17 , v18 , v19 , v20 , v21
lvm r6 , v22 , v23 , v24 , v25 , v26 , v27 , v28 , v29
li r9 , 1 6
movrel r12 , X ( f f _ c o s _ t a b s )
movrel r6 , f f t _ d i s p a t c h _ t a b \ i n t e r l e a v e \ ( ) _ a l t i v e c
lwz r3 , 0 ( r3 )
subi r3 , r3 , 2
slwi r3 , r3 , 2 + A R C H _ P P C 6 4
lpx r3 , r3 , r6
mtctr r3
mr r3 , r4
bctrl
addi r6 , r1 , 1 6 * P S
lvm r6 , v20 , v21 , v22 , v23 , v24 , v25 , v26 , v27 , v28 , v29
lwz r6 , 1 5 * P S ( r1 )
mtvrsave r6
lp r1 , 0 ( r1 )
lp r0 , 2 * P S ( r1 )
mtlr r0
blr
.endm
.macro DECL_FFT suffix, b i t s , n , n 2 , n 4
fft\ n \ s u f f i x \ ( ) _ a l t i v e c :
mflr r0
STORE_ P T R r0 ,r1 ,\ b i t s - 5
stp r0 ,P S * ( \ b i t s - 3 ) ( r1 )
bl f f t \ n 2 \ ( ) _ a l t i v e c
addi2 r3 ,\ n * 4
bl f f t \ n 4 \ ( ) _ a l t i v e c
addi2 r3 ,\ n * 2
bl f f t \ n 4 \ ( ) _ a l t i v e c
addi2 r3 ,\ n * - 6
LOAD_ P T R r0 ,r1 ,\ b i t s - 5
LOAD_ P T R r4 ,r12 ,\ b i t s
lp r0 ,P S * ( \ b i t s - 3 ) ( r1 )
lp r4 ,\ b i t s * P S ( r12 )
mtlr r0
li r5 ,\ n / 1 6
b f f t _ p a s s \ s u f f i x \ ( ) _ a l t i v e c
@ -350,9 +419,11 @@ fft\n\suffix\()_altivec:
DECL_ F F T \ s u f f i x ,1 5 ,3 2 7 6 8 ,1 6 3 8 4 , 8 1 9 2
DECL_ F F T \ s u f f i x ,1 6 ,6 5 5 3 6 ,3 2 7 6 8 ,1 6 3 8 4
fft_ c a l c \ s u f f i x
.rodata
.global EXTERN_ A S M \ ( ) f f _ f f t _ d i s p a t c h \ s u f f i x \ ( ) _ a l t i v e c
EXTERN_ A S M \ ( ) f f _ f f t _ d i s p a t c h \ s u f f i x \ ( ) _ a l t i v e c :
.align 3
fft_ d i s p a t c h _ t a b \ s u f f i x \ ( ) _ a l t i v e c :
PTR f f t 4 \ s u f f i x \ ( ) _ a l t i v e c
PTR f f t 8 \ s u f f i x \ ( ) _ a l t i v e c
PTR f f t 1 6 \ s u f f i x \ ( ) _ a l t i v e c