@ -41,7 +41,11 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
and w10 , w10 , #0x1F
strh w9 , [ s p , x10 , l s l #1 ]
bne 0 b
add w6 , w6 , #7
bic w6 , w6 , #7
ld1 { v16 . 1 6 b - v19 . 1 6 b } , [ s p ] , #64
sub x2 , x2 , x6
sub x3 , x3 , x6
movi v20 . 8 h , #1
1 : mov w8 , w6 / / b e g i n n i n g o f l i n e
2 : / / Simple l a y o u t f o r a c c e s s i n g 1 6 b i t v a l u e s
@ -52,7 +56,7 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
/ / | xDE#x A D | x C A # x F E | x B E # x E F | x F E # x E D | . . . .
/ / + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - >
/ / i- 0 i - 1 i - 2 i - 3
ld1 { v2 . 8 b } , [ x1 ] / / d s t [ x ] = a v _ c l i p _ p i x e l ( s r c [ x ] + o f f s e t _ t a b l e [ s r c [ x ] > > s h i f t ] ) ;
ld1 { v2 . 8 b } , [ x1 ] , #8 / / d s t [ x ] = a v _ c l i p _ p i x e l ( s r c [ x ] + o f f s e t _ t a b l e [ s r c [ x ] > > s h i f t ] ) ;
uxtl v0 . 8 h , v2 . 8 b / / l o a d s r c [ x ]
ushr v2 . 8 h , v0 . 8 h , #3 / / > > B I T _ D E P T H - 3
shl v1 . 8 h , v2 . 8 h , #1 / / l o w ( x2 , a c c e s s i n g s h o r t )
@ -61,7 +65,7 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
tbx v2 . 1 6 b , { v16 . 1 6 b - v19 . 1 6 b } , v1 . 1 6 b / / t a b l e
add v1 . 8 h , v0 . 8 h , v2 . 8 h / / s r c [ x ] + t a b l e
sqxtun v4 . 8 b , v1 . 8 h / / c l i p + n a r r o w
st1 { v4 . 8 b } , [ x0 ] / / s t o r e
st1 { v4 . 8 b } , [ x0 ] , #8 / / s t o r e
subs w8 , w8 , #8 / / d o n e 8 p i x e l s
bne 2 b
subs w7 , w7 , #1 / / f i n i s h e d l i n e , p r e p . n e w