@ -3804,7 +3804,8 @@ function hevc_put_hevc_qpel_hv4_8_end_neon
.endm
1 : calc_ a l l
.purgem calc
2 : ret
2 : mov s p , x14
ret
endfunc
function h e v c _ p u t _ h e v c _ q p e l _ h v6 _ 8 _ e n d _ n e o n
@ -3831,7 +3832,8 @@ function hevc_put_hevc_qpel_hv6_8_end_neon
.endm
1 : calc_ a l l
.purgem calc
2 : ret
2 : mov s p , x14
ret
endfunc
function h e v c _ p u t _ h e v c _ q p e l _ h v8 _ 8 _ e n d _ n e o n
@ -3857,7 +3859,8 @@ function hevc_put_hevc_qpel_hv8_8_end_neon
.endm
1 : calc_ a l l
.purgem calc
2 : ret
2 : mov s p , x14
ret
endfunc
function h e v c _ p u t _ h e v c _ q p e l _ h v12 _ 8 _ e n d _ n e o n
@ -3882,7 +3885,8 @@ function hevc_put_hevc_qpel_hv12_8_end_neon
.endm
1 : calc_ a l l 2
.purgem calc
2 : ret
2 : mov s p , x14
ret
endfunc
function h e v c _ p u t _ h e v c _ q p e l _ h v16 _ 8 _ e n d _ n e o n
@ -3906,7 +3910,8 @@ function hevc_put_hevc_qpel_hv16_8_end_neon
.endm
1 : calc_ a l l 2
.purgem calc
2 : ret
2 : mov s p , x14
ret
endfunc
function h e v c _ p u t _ h e v c _ q p e l _ h v32 _ 8 _ e n d _ n e o n
@ -3937,162 +3942,187 @@ function hevc_put_hevc_qpel_hv32_8_end_neon
add s p , s p , #32
subs w6 , w6 , #16
b. h i 0 b
add w10 , w3 , #6
add s p , s p , #64 / / d i s c a r d r e s t o f f i r s t l i n e
lsl x10 , x10 , #7
add s p , s p , x10 / / t m p _ a r r a y w i t h o u t f i r s t l i n e
mov s p , x14
ret
endfunc
# if H A V E _ I 8 M M
ENABLE_ I 8 M M
function f f _ h e v c _ p u t _ h e v c _ q p e l _ h v4 _ 8 _ n e o n _ i 8 m m , e x p o r t =1
add w10 , w3 , #7
.macro qpel_hv suffix
function f f _ h e v c _ p u t _ h e v c _ q p e l _ h v4 _ 8 _ \ s u f f i x , e x p o r t =1
add w10 , w3 , #8
mov x7 , #128
lsl x10 , x10 , #7
mov x14 , s p
sub s p , s p , x10 / / t m p _ a r r a y
stp x5 , x30 , [ s p , #- 32 ] !
stp x5 , x30 , [ s p , #- 48 ] !
stp x0 , x3 , [ s p , #16 ]
add x0 , s p , #32
str x14 , [ s p , #32 ]
add x0 , s p , #48
sub x1 , x1 , x2 , l s l #1
add x3 , x3 , #7
sub x1 , x1 , x2
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h4 _ 8 _ n e o n _ i 8 m m )
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h4 _ 8 _ \ s u f f i x )
ldr x14 , [ s p , #32 ]
ldp x0 , x3 , [ s p , #16 ]
ldp x5 , x30 , [ s p ] , #32
ldp x5 , x30 , [ s p ] , #48
b h e v c _ p u t _ h e v c _ q p e l _ h v4 _ 8 _ e n d _ n e o n
endfunc
function f f _ h e v c _ p u t _ h e v c _ q p e l _ h v6 _ 8 _ n e o n _ i 8 m m , e x p o r t =1
add w10 , w3 , #7
function f f _ h e v c _ p u t _ h e v c _ q p e l _ h v6 _ 8 _ \ s u f f i x , e x p o r t =1
add w10 , w3 , #8
mov x7 , #128
lsl x10 , x10 , #7
mov x14 , s p
sub s p , s p , x10 / / t m p _ a r r a y
stp x5 , x30 , [ s p , #- 32 ] !
stp x5 , x30 , [ s p , #- 48 ] !
stp x0 , x3 , [ s p , #16 ]
add x0 , s p , #32
str x14 , [ s p , #32 ]
add x0 , s p , #48
sub x1 , x1 , x2 , l s l #1
add x3 , x3 , #7
sub x1 , x1 , x2
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h6 _ 8 _ n e o n _ i 8 m m )
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h6 _ 8 _ \ s u f f i x )
ldr x14 , [ s p , #32 ]
ldp x0 , x3 , [ s p , #16 ]
ldp x5 , x30 , [ s p ] , #32
ldp x5 , x30 , [ s p ] , #48
b h e v c _ p u t _ h e v c _ q p e l _ h v6 _ 8 _ e n d _ n e o n
endfunc
function f f _ h e v c _ p u t _ h e v c _ q p e l _ h v8 _ 8 _ n e o n _ i 8 m m , e x p o r t =1
add w10 , w3 , #7
function f f _ h e v c _ p u t _ h e v c _ q p e l _ h v8 _ 8 _ \ s u f f i x , e x p o r t =1
add w10 , w3 , #8
lsl x10 , x10 , #7
sub x1 , x1 , x2 , l s l #1
mov x14 , s p
sub s p , s p , x10 / / t m p _ a r r a y
stp x5 , x30 , [ s p , #- 32 ] !
stp x5 , x30 , [ s p , #- 48 ] !
stp x0 , x3 , [ s p , #16 ]
add x0 , s p , #32
str x14 , [ s p , #32 ]
add x0 , s p , #48
add x3 , x3 , #7
sub x1 , x1 , x2
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h8 _ 8 _ n e o n _ i 8 m m )
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h8 _ 8 _ \ s u f f i x )
ldr x14 , [ s p , #32 ]
ldp x0 , x3 , [ s p , #16 ]
ldp x5 , x30 , [ s p ] , #32
ldp x5 , x30 , [ s p ] , #48
b h e v c _ p u t _ h e v c _ q p e l _ h v8 _ 8 _ e n d _ n e o n
endfunc
function f f _ h e v c _ p u t _ h e v c _ q p e l _ h v12 _ 8 _ n e o n _ i 8 m m , e x p o r t =1
add w10 , w3 , #7
function f f _ h e v c _ p u t _ h e v c _ q p e l _ h v12 _ 8 _ \ s u f f i x , e x p o r t =1
add w10 , w3 , #8
lsl x10 , x10 , #7
sub x1 , x1 , x2 , l s l #1
mov x14 , s p
sub s p , s p , x10 / / t m p _ a r r a y
stp x5 , x30 , [ s p , #- 32 ] !
stp x5 , x30 , [ s p , #- 48 ] !
stp x0 , x3 , [ s p , #16 ]
add x0 , s p , #32
str x14 , [ s p , #32 ]
add x0 , s p , #48
add x3 , x3 , #7
sub x1 , x1 , x2
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h12 _ 8 _ n e o n _ i 8 m m )
mov w6 , #12
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h12 _ 8 _ \ s u f f i x )
ldr x14 , [ s p , #32 ]
ldp x0 , x3 , [ s p , #16 ]
ldp x5 , x30 , [ s p ] , #32
ldp x5 , x30 , [ s p ] , #48
b h e v c _ p u t _ h e v c _ q p e l _ h v12 _ 8 _ e n d _ n e o n
endfunc
function f f _ h e v c _ p u t _ h e v c _ q p e l _ h v16 _ 8 _ n e o n _ i 8 m m , e x p o r t =1
add w10 , w3 , #7
function f f _ h e v c _ p u t _ h e v c _ q p e l _ h v16 _ 8 _ \ s u f f i x , e x p o r t =1
add w10 , w3 , #8
lsl x10 , x10 , #7
sub x1 , x1 , x2 , l s l #1
mov x14 , s p
sub s p , s p , x10 / / t m p _ a r r a y
stp x5 , x30 , [ s p , #- 32 ] !
stp x5 , x30 , [ s p , #- 48 ] !
stp x0 , x3 , [ s p , #16 ]
str x14 , [ s p , #32 ]
add x3 , x3 , #7
add x0 , s p , #32
add x0 , s p , #48
sub x1 , x1 , x2
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h16 _ 8 _ n e o n _ i 8 m m )
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h16 _ 8 _ \ s u f f i x )
ldr x14 , [ s p , #32 ]
ldp x0 , x3 , [ s p , #16 ]
ldp x5 , x30 , [ s p ] , #32
ldp x5 , x30 , [ s p ] , #48
b h e v c _ p u t _ h e v c _ q p e l _ h v16 _ 8 _ e n d _ n e o n
endfunc
function f f _ h e v c _ p u t _ h e v c _ q p e l _ h v24 _ 8 _ n e o n _ i 8 m m , e x p o r t =1
function f f _ h e v c _ p u t _ h e v c _ q p e l _ h v24 _ 8 _ \ s u f f i x , e x p o r t =1
stp x4 , x5 , [ s p , #- 64 ] !
stp x2 , x3 , [ s p , #16 ]
stp x0 , x1 , [ s p , #32 ]
str x30 , [ s p , #48 ]
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h v12 _ 8 _ n e o n _ i 8 m m )
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h v12 _ 8 _ \ s u f f i x )
ldp x0 , x1 , [ s p , #32 ]
ldp x2 , x3 , [ s p , #16 ]
ldp x4 , x5 , [ s p ] , #48
add x1 , x1 , #12
add x0 , x0 , #24
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h v12 _ 8 _ n e o n _ i 8 m m )
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h v12 _ 8 _ \ s u f f i x )
ldr x30 , [ s p ] , #16
ret
endfunc
function f f _ h e v c _ p u t _ h e v c _ q p e l _ h v32 _ 8 _ n e o n _ i 8 m m , e x p o r t =1
add w10 , w3 , #7
function f f _ h e v c _ p u t _ h e v c _ q p e l _ h v32 _ 8 _ \ s u f f i x , e x p o r t =1
add w10 , w3 , #8
sub x1 , x1 , x2 , l s l #1
lsl x10 , x10 , #7
sub x1 , x1 , x2
mov x14 , s p
sub s p , s p , x10 / / t m p _ a r r a y
stp x5 , x30 , [ s p , #- 32 ] !
stp x5 , x30 , [ s p , #- 48 ] !
stp x0 , x3 , [ s p , #16 ]
str x14 , [ s p , #32 ]
add x3 , x3 , #7
add x0 , s p , #32
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h32 _ 8 _ n e o n _ i 8 m m )
add x0 , s p , #48
mov w6 , #32
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h32 _ 8 _ \ s u f f i x )
ldr x14 , [ s p , #32 ]
ldp x0 , x3 , [ s p , #16 ]
ldp x5 , x30 , [ s p ] , #32
ldp x5 , x30 , [ s p ] , #48
b h e v c _ p u t _ h e v c _ q p e l _ h v32 _ 8 _ e n d _ n e o n
endfunc
function f f _ h e v c _ p u t _ h e v c _ q p e l _ h v48 _ 8 _ n e o n _ i 8 m m , e x p o r t =1
function f f _ h e v c _ p u t _ h e v c _ q p e l _ h v48 _ 8 _ \ s u f f i x , e x p o r t =1
stp x4 , x5 , [ s p , #- 64 ] !
stp x2 , x3 , [ s p , #16 ]
stp x0 , x1 , [ s p , #32 ]
str x30 , [ s p , #48 ]
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h v24 _ 8 _ n e o n _ i 8 m m )
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h v24 _ 8 _ \ s u f f i x )
ldp x0 , x1 , [ s p , #32 ]
ldp x2 , x3 , [ s p , #16 ]
ldp x4 , x5 , [ s p ] , #48
add x1 , x1 , #24
add x0 , x0 , #48
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h v24 _ 8 _ n e o n _ i 8 m m )
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h v24 _ 8 _ \ s u f f i x )
ldr x30 , [ s p ] , #16
ret
endfunc
function f f _ h e v c _ p u t _ h e v c _ q p e l _ h v64 _ 8 _ n e o n _ i 8 m m , e x p o r t =1
function f f _ h e v c _ p u t _ h e v c _ q p e l _ h v64 _ 8 _ \ s u f f i x , e x p o r t =1
stp x4 , x5 , [ s p , #- 64 ] !
stp x2 , x3 , [ s p , #16 ]
stp x0 , x1 , [ s p , #32 ]
str x30 , [ s p , #48 ]
mov x6 , #32
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h v32 _ 8 _ n e o n _ i 8 m m )
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h v32 _ 8 _ \ s u f f i x )
ldp x0 , x1 , [ s p , #32 ]
ldp x2 , x3 , [ s p , #16 ]
ldp x4 , x5 , [ s p ] , #48
add x1 , x1 , #32
add x0 , x0 , #64
mov x6 , #32
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h v32 _ 8 _ n e o n _ i 8 m m )
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h v32 _ 8 _ \ s u f f i x )
ldr x30 , [ s p ] , #16
ret
endfunc
.endm
qpel_ h v n e o n
# if H A V E _ I 8 M M
ENABLE_ I 8 M M
qpel_ h v n e o n _ i 8 m m
DISABLE_ I 8 M M
# endif