@ -66,6 +66,79 @@
umlsl2 \ a3 \ ( ) . 4 s , \ s1 \ ( ) . 8 h , \ k
.endm
/ / int b = m 2 s1 - m 1 ;
/ / int f = p2 s1 - p1 ;
/ / int d c = c0 s1 - m 1 ;
/ / int d e = c0 s1 - p1 ;
/ / int s p _ m a x = F F M I N ( p1 - c0 s1 , m 1 - c0 s1 ) ;
/ / sp_ m a x = F F M I N ( s p _ m a x , F F M A X ( - b ,- f ) ) ;
/ / int s p _ m i n = F F M I N ( c0 s1 - p1 , c0 s1 - m 1 ) ;
/ / sp_ m i n = F F M I N ( s p _ m i n , F F M A X ( b ,f ) ) ;
/ / diff = d i f f = = 0 ? 0 : F F M A X 3 ( d i f f , s p _ m i n , s p _ m a x ) ;
.macro SPAT_CHECK diff, m 2 s1 , m 1 , c0 s1 , p1 , p2 s1 , t 0 , t 1 , t 2 , t 3
uqsub \ t 0 \ ( ) . 1 6 b , \ p1 \ ( ) . 1 6 b , \ c0 s1 \ ( ) . 1 6 b
uqsub \ t 2 \ ( ) . 1 6 b , \ m 1 \ ( ) . 1 6 b , \ c0 s1 \ ( ) . 1 6 b
umin \ t 2 \ ( ) . 1 6 b , \ t 0 \ ( ) . 1 6 b , \ t 2 \ ( ) . 1 6 b
uqsub \ t 1 \ ( ) . 1 6 b , \ m 1 \ ( ) . 1 6 b , \ m 2 s1 \ ( ) . 1 6 b
uqsub \ t 3 \ ( ) . 1 6 b , \ p1 \ ( ) . 1 6 b , \ p2 s1 \ ( ) . 1 6 b
umax \ t 3 \ ( ) . 1 6 b , \ t 3 \ ( ) . 1 6 b , \ t 1 \ ( ) . 1 6 b
umin \ t 3 \ ( ) . 1 6 b , \ t 3 \ ( ) . 1 6 b , \ t 2 \ ( ) . 1 6 b
uqsub \ t 0 \ ( ) . 1 6 b , \ c0 s1 \ ( ) . 1 6 b , \ p1 \ ( ) . 1 6 b
uqsub \ t 2 \ ( ) . 1 6 b , \ c0 s1 \ ( ) . 1 6 b , \ m 1 \ ( ) . 1 6 b
umin \ t 2 \ ( ) . 1 6 b , \ t 0 \ ( ) . 1 6 b , \ t 2 \ ( ) . 1 6 b
uqsub \ t 1 \ ( ) . 1 6 b , \ m 2 s1 \ ( ) . 1 6 b , \ m 1 \ ( ) . 1 6 b
uqsub \ t 0 \ ( ) . 1 6 b , \ p2 s1 \ ( ) . 1 6 b , \ p1 \ ( ) . 1 6 b
umax \ t 0 \ ( ) . 1 6 b , \ t 0 \ ( ) . 1 6 b , \ t 1 \ ( ) . 1 6 b
umin \ t 2 \ ( ) . 1 6 b , \ t 2 \ ( ) . 1 6 b , \ t 0 \ ( ) . 1 6 b
cmeq \ t 1 \ ( ) . 1 6 b , \ d i f f \ ( ) . 1 6 b , #0
umax \ d i f f \ ( ) . 1 6 b , \ d i f f \ ( ) . 1 6 b , \ t 3 \ ( ) . 1 6 b
umax \ d i f f \ ( ) . 1 6 b , \ d i f f \ ( ) . 1 6 b , \ t 2 \ ( ) . 1 6 b
bic \ d i f f \ ( ) . 1 6 b , \ d i f f \ ( ) . 1 6 b , \ t 1 \ ( ) . 1 6 b
.endm
/ / i0 = s0 ;
/ / if ( i 0 > d0 + d i f f0 )
/ / i0 = d0 + d i f f0 ;
/ / else i f ( i 0 < d0 - d i f f0 )
/ / i0 = d0 - d i f f0 ;
/ /
/ / i0 = s0 i s s a f e
.macro DIFF_CLIP i0 , s0 , d0 , d i f f , t 0 , t 1
uqadd \ t 0 \ ( ) . 1 6 b , \ d0 \ ( ) . 1 6 b , \ d i f f \ ( ) . 1 6 b
uqsub \ t 1 \ ( ) . 1 6 b , \ d0 \ ( ) . 1 6 b , \ d i f f \ ( ) . 1 6 b
umin \ i 0 \ ( ) . 1 6 b , \ s0 \ ( ) . 1 6 b , \ t 0 \ ( ) . 1 6 b
umax \ i 0 \ ( ) . 1 6 b , \ i 0 \ ( ) . 1 6 b , \ t 1 \ ( ) . 1 6 b
.endm
/ / i0 = F F A B S ( m 1 - p1 ) > t d0 ? i 1 : i 2 ;
/ / DIFF_ C L I P
/ /
/ / i0 = i 1 i s s a f e
.macro INTERPOL i0 , i 1 , i 2 , m 1 , d0 , p1 , t d0 , d i f f , t 0 , t 1 , t 2
uabd \ t 0 \ ( ) . 1 6 b , \ m 1 \ ( ) . 1 6 b , \ p1 \ ( ) . 1 6 b
cmhi \ t 0 \ ( ) . 1 6 b , \ t 0 \ ( ) . 1 6 b , \ t d0 \ ( ) . 1 6 b
bsl \ t 0 \ ( ) . 1 6 b , \ i 1 \ ( ) . 1 6 b , \ i 2 \ ( ) . 1 6 b
DIFF_ C L I P \ i 0 , \ t 0 , \ d0 , \ d i f f , \ t 1 , \ t 2
.endm
.macro PUSH_VREGS
stp d8 , d9 , [ s p , #- 64 ] !
stp d10 , d11 , [ s p , #16 ]
stp d12 , d13 , [ s p , #32 ]
stp d14 , d15 , [ s p , #48 ]
.endm
.macro POP_VREGS
ldp d14 , d15 , [ s p , #48 ]
ldp d12 , d13 , [ s p , #32 ]
ldp d10 , d11 , [ s p , #16 ]
ldp d8 , d9 , [ s p ] , #64
.endm
.macro LDR_COEFFS d, t 0
movrel \ t 0 , c o e f f s , 0
ld1 { \ d \ ( ) . 8 h } , [ \ t 0 ]
@ -81,6 +154,110 @@ const coeffs, align=4 // align 4 means align on 2^4 boundry
.hword 5 0 7 7 , 9 8 1 / / sp[ 0 ] = v0 . h [ 6 ]
endconst
/ / = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
/ /
/ / void f f _ b w d i f _ f i l t e r _ e d g e _ n e o n (
/ / void * d s t 1 , / / x0
/ / void * p r e v1 , / / x1
/ / void * c u r1 , / / x2
/ / void * n e x t 1 , / / x3
/ / int w , / / w4
/ / int p r e f s , / / w5
/ / int m r e f s , / / w6
/ / int p r e f s2 , / / w7
/ / int m r e f s2 , / / [ s p , #0 ]
/ / int p a r i t y , / / [ s p , #S P _ I N T ]
/ / int c l i p _ m a x , / / [ s p , #S P _ I N T * 2 ] u n u s e d
/ / int s p a t ) ; // [sp, #SP_INT*3]
function f f _ b w d i f _ f i l t e r _ e d g e _ n e o n , e x p o r t =1
/ / Sanity c h e c k w
cmp w4 , #0
ble 9 9 f
/ / # define p r e v2 c u r
/ / const u i n t 8 _ t * r e s t r i c t n e x t 2 = p a r i t y ? p r e v : n e x t ;
ldr w8 , [ s p , #0 ] / / m r e f s2
ldr w17 , [ s p , #S P _ I N T ] / / p a r i t y
ldr w16 , [ s p , #S P _ I N T * 3 ] / / s p a t
cmp w17 , #0
csel x17 , x1 , x3 , n e
/ / for ( x = 0 ; x < w; x++) {
10 :
/ / int m 1 = c u r [ m r e f s ] ;
/ / int d = ( p r e v2 [ 0 ] + n e x t 2 [ 0 ] ) > > 1 ;
/ / int p1 = c u r [ p r e f s ] ;
/ / int t e m p o r a l _ d i f f0 = F F A B S ( p r e v2 [ 0 ] - n e x t 2 [ 0 ] ) ;
/ / int t e m p o r a l _ d i f f1 = ( F F A B S ( p r e v [ m r e f s ] - m 1 ) + F F A B S ( p r e v [ p r e f s ] - p1 ) ) > > 1 ;
/ / int t e m p o r a l _ d i f f2 = ( F F A B S ( n e x t [ m r e f s ] - m 1 ) + F F A B S ( n e x t [ p r e f s ] - p1 ) ) > > 1 ;
/ / int d i f f = F F M A X 3 ( t e m p o r a l _ d i f f0 > > 1 , t e m p o r a l _ d i f f1 , t e m p o r a l _ d i f f2 ) ;
ldr q31 , [ x2 ]
ldr q21 , [ x17 ]
uhadd v16 . 1 6 b , v31 . 1 6 b , v21 . 1 6 b / / d0 = v16
uabd v17 . 1 6 b , v31 . 1 6 b , v21 . 1 6 b / / t d0 = v17
ldr q24 , [ x2 , w6 , s x t w ] / / m 1 = v24
ldr q22 , [ x2 , w5 , s x t w ] / / p1 = v22
ldr q0 , [ x1 , w6 , s x t w ] / / p r e v [ m r e f s ]
ldr q2 , [ x1 , w5 , s x t w ] / / p r e v [ p r e f s ]
ldr q1 , [ x3 , w6 , s x t w ] / / n e x t [ m r e f s ]
ldr q3 , [ x3 , w5 , s x t w ] / / n e x t [ p r e f s ]
ushr v29 . 1 6 b , v17 . 1 6 b , #1
uabd v31 . 1 6 b , v0 . 1 6 b , v24 . 1 6 b
uabd v30 . 1 6 b , v2 . 1 6 b , v22 . 1 6 b
uhadd v0 . 1 6 b , v31 . 1 6 b , v30 . 1 6 b / / t d1 = q0
uabd v31 . 1 6 b , v1 . 1 6 b , v24 . 1 6 b
uabd v30 . 1 6 b , v3 . 1 6 b , v22 . 1 6 b
uhadd v1 . 1 6 b , v31 . 1 6 b , v30 . 1 6 b / / t d2 = q1
umax v0 . 1 6 b , v0 . 1 6 b , v29 . 1 6 b
umax v0 . 1 6 b , v0 . 1 6 b , v1 . 1 6 b / / d i f f = v0
/ / if ( s p a t ) {
/ / SPAT_ C H E C K ( )
/ / }
/ / i0 = ( m 1 + p1 ) > > 1 ;
cbz w16 , 1 f
ldr q31 , [ x2 , w8 , s x t w ]
ldr q18 , [ x17 , w8 , s x t w ]
ldr q30 , [ x2 , w7 , s x t w ]
ldr q19 , [ x17 , w7 , s x t w ]
uhadd v18 . 1 6 b , v18 . 1 6 b , v31 . 1 6 b
uhadd v19 . 1 6 b , v19 . 1 6 b , v30 . 1 6 b
SPAT_ C H E C K v0 , v18 , v24 , v16 , v22 , v19 , v31 , v30 , v29 , v28
1 :
uhadd v2 . 1 6 b , v22 . 1 6 b , v24 . 1 6 b
/ / i0 = v2 , s0 = v2 , d0 = v16 , d i f f = v0 , t 0 = v31 , t 1 = v30
DIFF_ C L I P v2 , v2 , v16 , v0 , v31 , v30
/ / dst[ 0 ] = a v _ c l i p ( i n t e r p o l , 0 , c l i p _ m a x ) ;
str q2 , [ x0 ] , #16
/ / dst+ + ;
/ / cur+ + ;
/ / }
subs w4 , w4 , #16
add x1 , x1 , #16
add x2 , x2 , #16
add x3 , x3 , #16
add x17 , x17 , #16
bgt 1 0 b
99 :
ret
endfunc
/ / = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
/ /
/ / void f f _ b w d i f _ f i l t e r _ i n t r a _ n e o n (