@ -31,10 +31,10 @@ function ff_vp8_idct_add_neon, export=1
movk w4 , #35468 / 2 , l s l 1 6
dup v4 . 2 s , w4
smull v26 . 4 s , v1 . 4 h , v4 . 4 h [ 0 ]
smull v27 . 4 s , v3 . 4 h , v4 . 4 h [ 0 ]
sqdmulh v20 . 4 h , v1 . 4 h , v4 . 4 h [ 1 ]
sqdmulh v23 . 4 h , v3 . 4 h , v4 . 4 h [ 1 ]
smull v26 . 4 s , v1 . 4 h , v4 . h [ 0 ]
smull v27 . 4 s , v3 . 4 h , v4 . h [ 0 ]
sqdmulh v20 . 4 h , v1 . 4 h , v4 . h [ 1 ]
sqdmulh v23 . 4 h , v3 . 4 h , v4 . h [ 1 ]
sqshrn v21 . 4 h , v26 . 4 s , #16
sqshrn v22 . 4 h , v27 . 4 s , #16
add v21 . 4 h , v21 . 4 h , v1 . 4 h
@ -54,12 +54,12 @@ function ff_vp8_idct_add_neon, export=1
transpose_ 4 x4 H v0 , v1 , v2 , v3 , v24 , v5 , v6 , v7
movi v29 . 8 h , #0
smull v26 . 4 s , v1 . 4 h , v4 . 4 h [ 0 ]
smull v26 . 4 s , v1 . 4 h , v4 . h [ 0 ]
st1 { v29 . 8 h } , [ x1 ] , #16
smull v27 . 4 s , v3 . 4 h , v4 . 4 h [ 0 ]
smull v27 . 4 s , v3 . 4 h , v4 . h [ 0 ]
st1 { v29 . 1 6 b } , [ x1 ]
sqdmulh v21 . 4 h , v1 . 4 h , v4 . 4 h [ 1 ]
sqdmulh v23 . 4 h , v3 . 4 h , v4 . 4 h [ 1 ]
sqdmulh v21 . 4 h , v1 . 4 h , v4 . h [ 1 ]
sqdmulh v23 . 4 h , v3 . 4 h , v4 . h [ 1 ]
sqshrn v20 . 4 h , v26 . 4 s , #16
sqshrn v22 . 4 h , v27 . 4 s , #16
add v20 . 4 h , v20 . 4 h , v1 . 4 h
@ -469,7 +469,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1
ld1 { v6 . d } [ 1 ] , [ x0 ] , x1
ld1 { v7 . d } [ 1 ] , [ x0 ] , x1
transpose_ 8 x16 b v0 , v1 , v2 , v3 , v4 , v5 , v6 , v7 , v30 , v31
transpose_ 8 x16 B v0 , v1 , v2 , v3 , v4 , v5 , v6 , v7 , v30 , v31
dup v22 . 1 6 b , w2 / / f l i m _ E
.if ! \ simple
@ -480,7 +480,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1
sub x0 , x0 , x1 , l s l #4 / / b a c k u p 1 6 r o w s
transpose_ 8 x16 b v0 , v1 , v2 , v3 , v4 , v5 , v6 , v7 , v30 , v31
transpose_ 8 x16 B v0 , v1 , v2 , v3 , v4 , v5 , v6 , v7 , v30 , v31
/ / Store p i x e l s :
st1 { v0 . d } [ 0 ] , [ x0 ] , x1
@ -531,7 +531,7 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
ld1 { v7 . d } [ 0 ] , [ x0 ] , x2
ld1 { v7 . d } [ 1 ] , [ x1 ] , x2
transpose_ 8 x16 b v0 , v1 , v2 , v3 , v4 , v5 , v6 , v7 , v30 , v31
transpose_ 8 x16 B v0 , v1 , v2 , v3 , v4 , v5 , v6 , v7 , v30 , v31
dup v22 . 1 6 b , w3 / / f l i m _ E
dup v23 . 1 6 b , w4 / / f l i m _ I
@ -541,7 +541,7 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
sub x0 , x0 , x2 , l s l #3 / / b a c k u p u 8 r o w s
sub x1 , x1 , x2 , l s l #3 / / b a c k u p v 8 r o w s
transpose_ 8 x16 b v0 , v1 , v2 , v3 , v4 , v5 , v6 , v7 , v30 , v31
transpose_ 8 x16 B v0 , v1 , v2 , v3 , v4 , v5 , v6 , v7 , v30 , v31
/ / Store p i x e l s :
st1 { v0 . d } [ 0 ] , [ x0 ] , x2 / / l o a d u
@ -613,13 +613,13 @@ endfunc
uxtl v22 . 8 h , v24 . 8 b
ext v26 . 8 b , \ s0 \ ( ) . 8 b , \ s1 \ ( ) . 8 b , #5
uxtl v25 . 8 h , v25 . 8 b
mul v21 . 8 h , v21 . 8 h , v0 . 8 h [ 2 ]
mul v21 . 8 h , v21 . 8 h , v0 . h [ 2 ]
uxtl v26 . 8 h , v26 . 8 b
mul v22 . 8 h , v22 . 8 h , v0 . 8 h [ 3 ]
mls v21 . 8 h , v19 . 8 h , v0 . 8 h [ 1 ]
mls v22 . 8 h , v25 . 8 h , v0 . 8 h [ 4 ]
mla v21 . 8 h , v18 . 8 h , v0 . 8 h [ 0 ]
mla v22 . 8 h , v26 . 8 h , v0 . 8 h [ 5 ]
mul v22 . 8 h , v22 . 8 h , v0 . h [ 3 ]
mls v21 . 8 h , v19 . 8 h , v0 . h [ 1 ]
mls v22 . 8 h , v25 . 8 h , v0 . h [ 4 ]
mla v21 . 8 h , v18 . 8 h , v0 . h [ 0 ]
mla v22 . 8 h , v26 . 8 h , v0 . h [ 5 ]
sqadd v22 . 8 h , v21 . 8 h , v22 . 8 h
sqrshrun \ d \ ( ) . 8 b , v22 . 8 h , #7
.endm
@ -640,20 +640,20 @@ endfunc
uxtl2 v2 . 8 h , v2 . 1 6 b
uxtl v17 . 8 h , v16 . 8 b
uxtl2 v16 . 8 h , v16 . 1 6 b
mul v19 . 8 h , v19 . 8 h , v0 . 8 h [ 3 ]
mul v18 . 8 h , v18 . 8 h , v0 . 8 h [ 2 ]
mul v3 . 8 h , v3 . 8 h , v0 . 8 h [ 2 ]
mul v22 . 8 h , v22 . 8 h , v0 . 8 h [ 3 ]
mls v19 . 8 h , v20 . 8 h , v0 . 8 h [ 4 ]
mul v19 . 8 h , v19 . 8 h , v0 . h [ 3 ]
mul v18 . 8 h , v18 . 8 h , v0 . h [ 2 ]
mul v3 . 8 h , v3 . 8 h , v0 . h [ 2 ]
mul v22 . 8 h , v22 . 8 h , v0 . h [ 3 ]
mls v19 . 8 h , v20 . 8 h , v0 . h [ 4 ]
uxtl v20 . 8 h , \ v0 \ ( ) . 8 b
uxtl2 v1 . 8 h , \ v0 \ ( ) . 1 6 b
mls v18 . 8 h , v17 . 8 h , v0 . 8 h [ 1 ]
mls v3 . 8 h , v16 . 8 h , v0 . 8 h [ 1 ]
mls v22 . 8 h , v23 . 8 h , v0 . 8 h [ 4 ]
mla v18 . 8 h , v20 . 8 h , v0 . 8 h [ 0 ]
mla v19 . 8 h , v21 . 8 h , v0 . 8 h [ 5 ]
mla v3 . 8 h , v1 . 8 h , v0 . 8 h [ 0 ]
mla v22 . 8 h , v2 . 8 h , v0 . 8 h [ 5 ]
mls v18 . 8 h , v17 . 8 h , v0 . h [ 1 ]
mls v3 . 8 h , v16 . 8 h , v0 . h [ 1 ]
mls v22 . 8 h , v23 . 8 h , v0 . h [ 4 ]
mla v18 . 8 h , v20 . 8 h , v0 . h [ 0 ]
mla v19 . 8 h , v21 . 8 h , v0 . h [ 5 ]
mla v3 . 8 h , v1 . 8 h , v0 . h [ 0 ]
mla v22 . 8 h , v2 . 8 h , v0 . h [ 5 ]
sqadd v19 . 8 h , v18 . 8 h , v19 . 8 h
sqadd v22 . 8 h , v3 . 8 h , v22 . 8 h
sqrshrun \ d0 \ ( ) . 8 b , v19 . 8 h , #7
@ -667,12 +667,12 @@ endfunc
uxtl \ s4 \ ( ) . 8 h , \ s4 \ ( ) . 8 b
uxtl \ s0 \ ( ) . 8 h , \ s0 \ ( ) . 8 b
uxtl \ s5 \ ( ) . 8 h , \ s5 \ ( ) . 8 b
mul \ s2 \ ( ) . 8 h , \ s2 \ ( ) . 8 h , v0 . 8 h [ 2 ]
mul \ s3 \ ( ) . 8 h , \ s3 \ ( ) . 8 h , v0 . 8 h [ 3 ]
mls \ s2 \ ( ) . 8 h , \ s1 \ ( ) . 8 h , v0 . 8 h [ 1 ]
mls \ s3 \ ( ) . 8 h , \ s4 \ ( ) . 8 h , v0 . 8 h [ 4 ]
mla \ s2 \ ( ) . 8 h , \ s0 \ ( ) . 8 h , v0 . 8 h [ 0 ]
mla \ s3 \ ( ) . 8 h , \ s5 \ ( ) . 8 h , v0 . 8 h [ 5 ]
mul \ s2 \ ( ) . 8 h , \ s2 \ ( ) . 8 h , v0 . h [ 2 ]
mul \ s3 \ ( ) . 8 h , \ s3 \ ( ) . 8 h , v0 . h [ 3 ]
mls \ s2 \ ( ) . 8 h , \ s1 \ ( ) . 8 h , v0 . h [ 1 ]
mls \ s3 \ ( ) . 8 h , \ s4 \ ( ) . 8 h , v0 . h [ 4 ]
mla \ s2 \ ( ) . 8 h , \ s0 \ ( ) . 8 h , v0 . h [ 0 ]
mla \ s3 \ ( ) . 8 h , \ s5 \ ( ) . 8 h , v0 . h [ 5 ]
sqadd \ s3 \ ( ) . 8 h , \ s2 \ ( ) . 8 h , \ s3 \ ( ) . 8 h
sqrshrun \ d0 \ ( ) . 8 b , \ s3 \ ( ) . 8 h , #7
.endm
@ -685,20 +685,20 @@ endfunc
uxtl \ s4 \ ( ) . 8 h , \ s4 \ ( ) . 8 b
uxtl \ s2 \ ( ) . 8 h , \ s2 \ ( ) . 8 b
uxtl \ s5 \ ( ) . 8 h , \ s5 \ ( ) . 8 b
mul \ s0 \ ( ) . 8 h , \ s0 \ ( ) . 8 h , v0 . 8 h [ 0 ]
mul v31 . 8 h , \ s3 \ ( ) . 8 h , v0 . 8 h [ 3 ]
mul \ s3 \ ( ) . 8 h , \ s3 \ ( ) . 8 h , v0 . 8 h [ 2 ]
mul \ s6 \ ( ) . 8 h , \ s6 \ ( ) . 8 h , v0 . 8 h [ 5 ]
mls \ s0 \ ( ) . 8 h , \ s1 \ ( ) . 8 h , v0 . 8 h [ 1 ]
mls v31 . 8 h , \ s4 \ ( ) . 8 h , v0 . 8 h [ 4 ]
mls \ s3 \ ( ) . 8 h , \ s2 \ ( ) . 8 h , v0 . 8 h [ 1 ]
mls \ s6 \ ( ) . 8 h , \ s5 \ ( ) . 8 h , v0 . 8 h [ 4 ]
mla \ s0 \ ( ) . 8 h , \ s2 \ ( ) . 8 h , v0 . 8 h [ 2 ]
mla v31 . 8 h , \ s5 \ ( ) . 8 h , v0 . 8 h [ 5 ]
mla \ s3 \ ( ) . 8 h , \ s1 \ ( ) . 8 h , v0 . 8 h [ 0 ]
mla \ s6 \ ( ) . 8 h , \ s4 \ ( ) . 8 h , v0 . 8 h [ 3 ]
mul \ s0 \ ( ) . 8 h , \ s0 \ ( ) . 8 h , v0 . h [ 0 ]
mul v31 . 8 h , \ s3 \ ( ) . 8 h , v0 . h [ 3 ]
mul \ s3 \ ( ) . 8 h , \ s3 \ ( ) . 8 h , v0 . h [ 2 ]
mul \ s6 \ ( ) . 8 h , \ s6 \ ( ) . 8 h , v0 . h [ 5 ]
mls \ s0 \ ( ) . 8 h , \ s1 \ ( ) . 8 h , v0 . h [ 1 ]
mls v31 . 8 h , \ s4 \ ( ) . 8 h , v0 . h [ 4 ]
mls \ s3 \ ( ) . 8 h , \ s2 \ ( ) . 8 h , v0 . h [ 1 ]
mls \ s6 \ ( ) . 8 h , \ s5 \ ( ) . 8 h , v0 . h [ 4 ]
mla \ s0 \ ( ) . 8 h , \ s2 \ ( ) . 8 h , v0 . h [ 2 ]
mla v31 . 8 h , \ s5 \ ( ) . 8 h , v0 . h [ 5 ]
mla \ s3 \ ( ) . 8 h , \ s1 \ ( ) . 8 h , v0 . h [ 0 ]
mla \ s6 \ ( ) . 8 h , \ s4 \ ( ) . 8 h , v0 . h [ 3 ]
sqadd v31 . 8 h , \ s0 \ ( ) . 8 h , v31 . 8 h
sqadd \ s6 \ ( ) . 8 h , \ s3 \ ( ) . 8 h , \ s6 \ ( ) . 8 h
sqrshrun \ d0 \ ( ) . 8 b , v31 . 8 h , #7
@ -713,10 +713,10 @@ endfunc
ext v25 . 8 b , \ v0 \ ( ) . 8 b , \ v1 \ ( ) . 8 b , #3
uxtl v22 . 8 h , v23 . 8 b
uxtl v25 . 8 h , v25 . 8 b
mul v20 . 8 h , v20 . 8 h , v0 . 8 h [ 2 ]
mul v22 . 8 h , v22 . 8 h , v0 . 8 h [ 3 ]
mls v20 . 8 h , v19 . 8 h , v0 . 8 h [ 1 ]
mls v22 . 8 h , v25 . 8 h , v0 . 8 h [ 4 ]
mul v20 . 8 h , v20 . 8 h , v0 . h [ 2 ]
mul v22 . 8 h , v22 . 8 h , v0 . h [ 3 ]
mls v20 . 8 h , v19 . 8 h , v0 . h [ 1 ]
mls v22 . 8 h , v25 . 8 h , v0 . h [ 4 ]
sqadd v22 . 8 h , v20 . 8 h , v22 . 8 h
sqrshrun \ d \ ( ) . 8 b , v22 . 8 h , #7
.endm
@ -727,14 +727,14 @@ endfunc
uxtl \ s2 \ ( ) . 8 h , \ s2 \ ( ) . 8 b
uxtl \ s3 \ ( ) . 8 h , \ s3 \ ( ) . 8 b
uxtl \ s4 \ ( ) . 8 h , \ s4 \ ( ) . 8 b
mul v21 . 8 h , \ s1 \ ( ) . 8 h , v0 . 8 h [ 2 ]
mul v23 . 8 h , \ s2 \ ( ) . 8 h , v0 . 8 h [ 3 ]
mul \ s2 \ ( ) . 8 h , \ s2 \ ( ) . 8 h , v0 . 8 h [ 2 ]
mul v22 . 8 h , \ s3 \ ( ) . 8 h , v0 . 8 h [ 3 ]
mls v21 . 8 h , \ s0 \ ( ) . 8 h , v0 . 8 h [ 1 ]
mls v23 . 8 h , \ s3 \ ( ) . 8 h , v0 . 8 h [ 4 ]
mls \ s2 \ ( ) . 8 h , \ s1 \ ( ) . 8 h , v0 . 8 h [ 1 ]
mls v22 . 8 h , \ s4 \ ( ) . 8 h , v0 . 8 h [ 4 ]
mul v21 . 8 h , \ s1 \ ( ) . 8 h , v0 . h [ 2 ]
mul v23 . 8 h , \ s2 \ ( ) . 8 h , v0 . h [ 3 ]
mul \ s2 \ ( ) . 8 h , \ s2 \ ( ) . 8 h , v0 . h [ 2 ]
mul v22 . 8 h , \ s3 \ ( ) . 8 h , v0 . h [ 3 ]
mls v21 . 8 h , \ s0 \ ( ) . 8 h , v0 . h [ 1 ]
mls v23 . 8 h , \ s3 \ ( ) . 8 h , v0 . h [ 4 ]
mls \ s2 \ ( ) . 8 h , \ s1 \ ( ) . 8 h , v0 . h [ 1 ]
mls v22 . 8 h , \ s4 \ ( ) . 8 h , v0 . h [ 4 ]
sqadd v21 . 8 h , v21 . 8 h , v23 . 8 h
sqadd \ s2 \ ( ) . 8 h , \ s2 \ ( ) . 8 h , v22 . 8 h
sqrshrun \ d0 \ ( ) . 8 b , v21 . 8 h , #7
@ -759,7 +759,7 @@ function ff_put_vp8_epel16_v6_neon, export=1
sxtw x4 , w4
sxtw x6 , w6
movrel x17 , s u b p e l _ f i l t e r s - 1 6
movrel x17 , s u b p e l _ f i l t e r s , - 1 6
add x6 , x17 , x6 , l s l #4 / / y
ld1 { v0 . 8 h } , [ x6 ]
1 :
@ -788,7 +788,7 @@ function ff_put_vp8_epel16_h6_neon, export=1
sxtw x5 , w5 / / x
/ / first p a s s ( h o r i z o n t a l ) :
movrel x17 , s u b p e l _ f i l t e r s - 1 6
movrel x17 , s u b p e l _ f i l t e r s , - 1 6
add x5 , x17 , x5 , l s l #4 / / x
ld1 { v0 . 8 h } , [ x5 ]
1 :
@ -807,7 +807,7 @@ function ff_put_vp8_epel16_h6v6_neon, export=1
sub x2 , x2 , #2
/ / first p a s s ( h o r i z o n t a l ) :
movrel x17 , s u b p e l _ f i l t e r s - 1 6
movrel x17 , s u b p e l _ f i l t e r s , - 1 6
sxtw x5 , w5 / / x
add x16 , x17 , x5 , l s l #4 / / x
sub s p , s p , #336 + 1 6
@ -854,7 +854,7 @@ function ff_put_vp8_epel8_h6v6_neon, export=1
sxtw x4 , w4
/ / first p a s s ( h o r i z o n t a l ) :
movrel x17 , s u b p e l _ f i l t e r s - 1 6
movrel x17 , s u b p e l _ f i l t e r s , - 1 6
sxtw x5 , w5
add x5 , x17 , x5 , l s l #4 / / x
sub s p , s p , #168 + 1 6
@ -900,7 +900,7 @@ function ff_put_vp8_epel8_h4v6_neon, export=1
sxtw x4 , w4
/ / first p a s s ( h o r i z o n t a l ) :
movrel x17 , s u b p e l _ f i l t e r s - 1 6
movrel x17 , s u b p e l _ f i l t e r s , - 1 6
sxtw x5 , w5
add x5 , x17 , x5 , l s l #4 / / x
sub s p , s p , #168 + 1 6
@ -947,7 +947,7 @@ function ff_put_vp8_epel8_h4v4_neon, export=1
/ / first p a s s ( h o r i z o n t a l ) :
movrel x17 , s u b p e l _ f i l t e r s - 1 6
movrel x17 , s u b p e l _ f i l t e r s , - 1 6
sxtw x5 , w5
add x5 , x17 , x5 , l s l #4 / / x
sub s p , s p , #168 + 1 6
@ -992,7 +992,7 @@ function ff_put_vp8_epel8_h6v4_neon, export=1
/ / first p a s s ( h o r i z o n t a l ) :
movrel x17 , s u b p e l _ f i l t e r s - 1 6
movrel x17 , s u b p e l _ f i l t e r s , - 1 6
sxtw x5 , w5
add x5 , x17 , x5 , l s l #4 / / x
sub s p , s p , #168 + 1 6