@ -710,7 +710,7 @@ function idct16x16_dc_add_neon
ret
endfunc
.macro idct16
function i d c t 1 6
dmbutterfly0 v16 , v24 , v16 , v24 , v4 , v5 , v6 , v7 , v8 , v9 / / v16 = t 0 a , v24 = t 1 a
dmbutterfly v20 , v28 , v0 . s [ 2 ] , v0 . s [ 3 ] , v4 , v5 , v6 , v7 / / v20 = t 2 a , v28 = t 3 a
dmbutterfly v18 , v30 , v1 . s [ 0 ] , v1 . s [ 1 ] , v4 , v5 , v6 , v7 / / v18 = t 4 a , v30 = t 7 a
@ -753,9 +753,10 @@ endfunc
butterfly_ 4 s v19 , v28 , v5 , v28 / / v19 = o u t [ 3 ] , v28 = o u t [ 1 2 ]
butterfly_ 4 s v20 , v27 , v6 , v27 / / v20 = o u t [ 4 ] , v27 = o u t [ 1 1 ]
butterfly_ 4 s v21 , v26 , v26 , v9 / / v21 = o u t [ 5 ] , v26 = o u t [ 1 0 ]
.endm
ret
endfunc
.macro iadst16
function i a d s t 1 6
ld1 { v0 . 8 h ,v1 . 8 h } , [ x11 ]
sxtl v2 . 4 s , v1 . 4 h
sxtl2 v3 . 4 s , v1 . 8 h
@ -830,7 +831,8 @@ endfunc
mov v16 . 1 6 b , v2 . 1 6 b
mov v30 . 1 6 b , v4 . 1 6 b
.endm
ret
endfunc
/ / Helper m a c r o s ; we can't use these expressions directly within
/ / e. g . . i r p d u e t o t h e e x t r a c o n c a t e n a t i o n \ ( ) . T h e r e f o r e w r a p
@ -857,12 +859,14 @@ endfunc
/ / x9 = i n p u t s t r i d e
.macro itxfm16_1d_funcs txfm
function \ t x f m \ ( ) 1 6 _ 1 d _ 4 x16 _ p a s s1 _ n e o n
mov x14 , x30
movi v4 . 4 s , #0
.irp i, 1 6 , 1 7 , 1 8 , 1 9 , 2 0 , 2 1 , 2 2 , 2 3 , 2 4 , 2 5 , 2 6 , 2 7 , 2 8 , 2 9 , 3 0 , 3 1
load_ c l e a r \ i , x2 , x9
.endr
\ txfm\ ( ) 1 6
bl \ t x f m \ ( ) 1 6
/ / Do f o u r 4 x4 t r a n s p o s e s . O r i g i n a l l y , v16 - v31 c o n t a i n t h e
/ / 1 6 rows. A f t e r w a r d s , v16 - v19 , v20 - v23 , v24 - v27 a n d v28 - v31
@ -878,7 +882,7 @@ function \txfm\()16_1d_4x16_pass1_neon
.irp i, 1 6 , 2 0 , 2 4 , 2 8 , 1 7 , 2 1 , 2 5 , 2 9 , 1 8 , 2 2 , 2 6 , 3 0 , 1 9 , 2 3 , 2 7 , 3 1
store \ i , x0 , #16
.endr
ret
br x14
1 :
/ / Special c a s e : F o r t h e l a s t i n p u t c o l u m n ( x1 = = 1 2 ) ,
/ / which w o u l d b e s t o r e d a s t h e l a s t r o w i n t h e t e m p b u f f e r ,
@ -906,7 +910,7 @@ function \txfm\()16_1d_4x16_pass1_neon
mov v29 . 1 6 b , v17 . 1 6 b
mov v30 . 1 6 b , v18 . 1 6 b
mov v31 . 1 6 b , v19 . 1 6 b
ret
br x14
endfunc
/ / Read a v e r t i c a l 4 x16 s l i c e o u t o f a 1 6 x16 m a t r i x , d o a t r a n s f o r m o n i t ,
@ -917,6 +921,8 @@ endfunc
/ / x3 = s l i c e o f f s e t
/ / x9 = t e m p b u f f e r s t r i d e
function \ t x f m \ ( ) 1 6 _ 1 d _ 4 x16 _ p a s s2 _ n e o n
mov x14 , x30
.irp i, 1 6 , 1 7 , 1 8 , 1 9 , 2 0 , 2 1 , 2 2 , 2 3 , 2 4 , 2 5 , 2 6 , 2 7
load \ i , x2 , x9
.endr
@ -928,7 +934,7 @@ function \txfm\()16_1d_4x16_pass2_neon
add x3 , x0 , x1
lsl x1 , x1 , #1
\ txfm\ ( ) 1 6
bl \ t x f m \ ( ) 1 6
dup v8 . 8 h , w13
.macro load_add_store coef0 , c o e f1 , c o e f2 , c o e f3 , c o e f4 , c o e f5 , c o e f6 , c o e f7
@ -983,7 +989,7 @@ function \txfm\()16_1d_4x16_pass2_neon
load_ a d d _ s t o r e v24 . 4 s , v25 . 4 s , v26 . 4 s , v27 . 4 s , v28 . 4 s , v29 . 4 s , v30 . 4 s , v31 . 4 s
.purgem load_add_store
ret
br x14
endfunc
.endm
@ -1158,7 +1164,7 @@ function idct32x32_dc_add_neon
ret
endfunc
.macro idct32_odd
function i d c t 3 2 _ o d d
dmbutterfly v16 , v31 , v10 . s [ 0 ] , v10 . s [ 1 ] , v4 , v5 , v6 , v7 / / v16 = t 1 6 a , v31 = t 3 1 a
dmbutterfly v24 , v23 , v10 . s [ 2 ] , v10 . s [ 3 ] , v4 , v5 , v6 , v7 / / v24 = t 1 7 a , v23 = t 3 0 a
dmbutterfly v20 , v27 , v11 . s [ 0 ] , v11 . s [ 1 ] , v4 , v5 , v6 , v7 / / v20 = t 1 8 a , v27 = t 2 9 a
@ -1209,7 +1215,8 @@ endfunc
dmbutterfly0 v26 , v21 , v26 , v21 , v4 , v5 , v6 , v7 , v8 , v9 / / v26 = t 2 6 a , v21 = t 2 1 a
dmbutterfly0 v25 , v22 , v25 , v22 , v4 , v5 , v6 , v7 , v8 , v9 / / v25 = t 2 5 , v22 = t 2 2
dmbutterfly0 v24 , v23 , v24 , v23 , v4 , v5 , v6 , v7 , v8 , v9 / / v24 = t 2 4 a , v23 = t 2 3 a
.endm
ret
endfunc
/ / Do a n 3 2 - p o i n t I D C T o f a 4 x32 s l i c e o u t o f a 3 2 x32 m a t r i x .
/ / The 3 2 - p o i n t I D C T c a n b e d e c o m p o s e d i n t o t w o 1 6 - p o i n t I D C T s ;
@ -1221,6 +1228,8 @@ endfunc
/ / x2 = s r c
/ / x9 = d o u b l e i n p u t s t r i d e
function i d c t 3 2 _ 1 d _ 4 x32 _ p a s s1 _ n e o n
mov x14 , x30
movi v4 . 4 s , #0
/ / v1 6 = I N ( 0 ) , v17 = I N ( 2 ) . . . v31 = I N ( 3 0 )
@ -1229,7 +1238,7 @@ function idct32_1d_4x32_pass1_neon
st1 { v4 . 4 s } , [ x2 ] , x9
.endr
idc t1 6
bl i d c t 1 6
/ / Do f o u r 4 x4 t r a n s p o s e s . O r i g i n a l l y , v16 - v31 c o n t a i n t h e
/ / 1 6 rows. A f t e r w a r d s , v16 - v19 , v20 - v23 , v24 - v27 a n d v28 - v31
@ -1280,7 +1289,7 @@ function idct32_1d_4x32_pass1_neon
st1 { v4 . 4 s } , [ x2 ] , x9
.endr
idc t3 2 _ o d d
bl i d c t 3 2 _ o d d
transpose_ 4 x4 s v31 , v30 , v29 , v28 , v4 , v5 , v6 , v7
transpose_ 4 x4 s v27 , v26 , v25 , v24 , v4 , v5 , v6 , v7
@ -1330,7 +1339,7 @@ function idct32_1d_4x32_pass1_neon
store_ r e v v29 . 4 s , v25 . 4 s , v21 . 4 s , v17 . 4 s , v29 . 1 6 b , v25 . 1 6 b
store_ r e v v28 . 4 s , v24 . 4 s , v20 . 4 s , v16 . 4 s , v28 . 1 6 b , v24 . 1 6 b
.purgem store_rev
ret
br x14
endfunc
/ / This i s m o s t l y t h e s a m e a s 4 x32 _ p a s s1 , b u t w i t h o u t t h e t r a n s p o s e ,
@ -1342,13 +1351,15 @@ endfunc
/ / x7 = n e g a t i v e d o u b l e t e m p b u f f e r s t r i d e
/ / x9 = d o u b l e t e m p b u f f e r s t r i d e
function i d c t 3 2 _ 1 d _ 4 x32 _ p a s s2 _ n e o n
mov x14 , x30
/ / v1 6 = I N ( 0 ) , v17 = I N ( 2 ) . . . v31 = I N ( 3 0 )
.irp i, 1 6 , 1 7 , 1 8 , 1 9 , 2 0 , 2 1 , 2 2 , 2 3 , 2 4 , 2 5 , 2 6 , 2 7 , 2 8 , 2 9 , 3 0 , 3 1
ld1 { v \ i \ ( ) . 4 s } , [ x2 ] , x9
.endr
sub x2 , x2 , x9 , l s l #4
idc t1 6
bl i d c t 1 6
.irp i, 1 6 , 1 7 , 1 8 , 1 9 , 2 0 , 2 1 , 2 2 , 2 3 , 2 4 , 2 5 , 2 6 , 2 7 , 2 8 , 2 9 , 3 0 , 3 1
st1 { v \ i \ ( ) . 4 s } , [ x2 ] , x9
@ -1364,7 +1375,7 @@ function idct32_1d_4x32_pass2_neon
sub x2 , x2 , x9 , l s l #4
sub x2 , x2 , #128
idc t3 2 _ o d d
bl i d c t 3 2 _ o d d
.macro load_acc_store a, b , c , d , n e g =0
.if \ neg = = 0
@ -1420,7 +1431,7 @@ function idct32_1d_4x32_pass2_neon
load_ a c c _ s t o r e v24 . 4 s , v25 . 4 s , v26 . 4 s , v27 . 4 s , 1
load_ a c c _ s t o r e v28 . 4 s , v29 . 4 s , v30 . 4 s , v31 . 4 s , 1
.purgem load_acc_store
ret
br x14
endfunc
const m i n _ e o b _ i d c t _ i d c t _ 3 2 , a l i g n =4