@ -2169,7 +2169,8 @@ function hevc_put_hevc_qpel_uni_hv4_8_end_neon
.endm
1 : calc_ a l l
.purgem calc
2 : ret
2 : mov s p , x14
ret
endfunc
function h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v6 _ 8 _ e n d _ n e o n
@ -2198,7 +2199,8 @@ function hevc_put_hevc_qpel_uni_hv6_8_end_neon
.endm
1 : calc_ a l l
.purgem calc
2 : ret
2 : mov s p , x14
ret
endfunc
function h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v8 _ 8 _ e n d _ n e o n
@ -2225,7 +2227,8 @@ function hevc_put_hevc_qpel_uni_hv8_8_end_neon
.endm
1 : calc_ a l l
.purgem calc
2 : ret
2 : mov s p , x14
ret
endfunc
function h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v12 _ 8 _ e n d _ n e o n
@ -2252,7 +2255,8 @@ function hevc_put_hevc_qpel_uni_hv12_8_end_neon
.endm
1 : calc_ a l l 2
.purgem calc
2 : ret
2 : mov s p , x14
ret
endfunc
function h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v16 _ 8 _ e n d _ n e o n
@ -2286,21 +2290,17 @@ function hevc_put_hevc_qpel_uni_hv16_8_end_neon
add s p , s p , #32
subs w7 , w7 , #16
b. n e 0 b
add w10 , w4 , #6
add s p , s p , x12 / / d i s c a r d r e s t o f f i r s t l i n e
lsl x10 , x10 , #7
add s p , s p , x10 / / t m p _ a r r a y w i t h o u t f i r s t l i n e
mov s p , x14
ret
endfunc
# if H A V E _ I 8 M M
ENABLE_ I 8 M M
function f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v4 _ 8 _ n e o n _ i 8 m m , e x p o r t =1
add w10 , w4 , #7
.macro qpel_uni_hv suffix
function f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v4 _ 8 _ \ s u f f i x , e x p o r t =1
add w10 , w4 , #8
lsl x10 , x10 , #7
mov x14 , s p
sub s p , s p , x10 / / t m p _ a r r a y
str x30 , [ s p , #- 48 ] !
stp x30 , x14 , [ s p , #- 48 ] !
stp x4 , x6 , [ s p , #16 ]
stp x0 , x1 , [ s p , #32 ]
sub x1 , x2 , x3 , l s l #1
@ -2309,18 +2309,19 @@ function ff_hevc_put_hevc_qpel_uni_hv4_8_neon_i8mm, export=1
mov x2 , x3
add x3 , x4 , #7
mov x4 , x5
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h4 _ 8 _ n e o n _ i 8 m m )
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h4 _ 8 _ \ s u f f i x )
ldp x4 , x6 , [ s p , #16 ]
ldp x0 , x1 , [ s p , #32 ]
ldr x30 , [ s p ] , #48
ldp x30 , x14 , [ s p ] , #48
b h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v4 _ 8 _ e n d _ n e o n
endfunc
function f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v6 _ 8 _ n e o n _ i 8 m m , e x p o r t =1
add w10 , w4 , #7
function f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v6 _ 8 _ \ s u f f i x , e x p o r t =1
add w10 , w4 , #8
lsl x10 , x10 , #7
mov x14 , s p
sub s p , s p , x10 / / t m p _ a r r a y
str x30 , [ s p , #- 48 ] !
stp x30 , x14 , [ s p , #- 48 ] !
stp x4 , x6 , [ s p , #16 ]
stp x0 , x1 , [ s p , #32 ]
sub x1 , x2 , x3 , l s l #1
@ -2329,18 +2330,19 @@ function ff_hevc_put_hevc_qpel_uni_hv6_8_neon_i8mm, export=1
mov x2 , x3
add w3 , w4 , #7
mov x4 , x5
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h6 _ 8 _ n e o n _ i 8 m m )
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h6 _ 8 _ \ s u f f i x )
ldp x4 , x6 , [ s p , #16 ]
ldp x0 , x1 , [ s p , #32 ]
ldr x30 , [ s p ] , #48
ldp x30 , x14 , [ s p ] , #48
b h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v6 _ 8 _ e n d _ n e o n
endfunc
function f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v8 _ 8 _ n e o n _ i 8 m m , e x p o r t =1
add w10 , w4 , #7
function f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v8 _ 8 _ \ s u f f i x , e x p o r t =1
add w10 , w4 , #8
lsl x10 , x10 , #7
mov x14 , s p
sub s p , s p , x10 / / t m p _ a r r a y
str x30 , [ s p , #- 48 ] !
stp x30 , x14 , [ s p , #- 48 ] !
stp x4 , x6 , [ s p , #16 ]
stp x0 , x1 , [ s p , #32 ]
sub x1 , x2 , x3 , l s l #1
@ -2349,60 +2351,67 @@ function ff_hevc_put_hevc_qpel_uni_hv8_8_neon_i8mm, export=1
mov x2 , x3
add w3 , w4 , #7
mov x4 , x5
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h8 _ 8 _ n e o n _ i 8 m m )
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h8 _ 8 _ \ s u f f i x )
ldp x4 , x6 , [ s p , #16 ]
ldp x0 , x1 , [ s p , #32 ]
ldr x30 , [ s p ] , #48
ldp x30 , x14 , [ s p ] , #48
b h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v8 _ 8 _ e n d _ n e o n
endfunc
function f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v12 _ 8 _ n e o n _ i 8 m m , e x p o r t =1
add w10 , w4 , #7
function f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v12 _ 8 _ \ s u f f i x , e x p o r t =1
add w10 , w4 , #8
lsl x10 , x10 , #7
mov x14 , s p
sub s p , s p , x10 / / t m p _ a r r a y
stp x7 , x30 , [ s p , #- 48 ] !
stp x7 , x30 , [ s p , #- 6 4] !
stp x4 , x6 , [ s p , #16 ]
stp x0 , x1 , [ s p , #32 ]
str x14 , [ s p , #48 ]
sub x1 , x2 , x3 , l s l #1
sub x1 , x1 , x3
mov x2 , x3
add x0 , s p , #48
add x0 , s p , #6 4
add w3 , w4 , #7
mov x4 , x5
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h12 _ 8 _ n e o n _ i 8 m m )
mov w6 , #12
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h12 _ 8 _ \ s u f f i x )
ldr x14 , [ s p , #48 ]
ldp x4 , x6 , [ s p , #16 ]
ldp x0 , x1 , [ s p , #32 ]
ldp x7 , x30 , [ s p ] , #48
ldp x7 , x30 , [ s p ] , #6 4
b h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v12 _ 8 _ e n d _ n e o n
endfunc
function f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v16 _ 8 _ n e o n _ i 8 m m , e x p o r t =1
add w10 , w4 , #7
function f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v16 _ 8 _ \ s u f f i x , e x p o r t =1
add w10 , w4 , #8
lsl x10 , x10 , #7
mov x14 , s p
sub s p , s p , x10 / / t m p _ a r r a y
stp x7 , x30 , [ s p , #- 48 ] !
stp x7 , x30 , [ s p , #- 6 4] !
stp x4 , x6 , [ s p , #16 ]
stp x0 , x1 , [ s p , #32 ]
add x0 , s p , #48
str x14 , [ s p , #48 ]
add x0 , s p , #64
sub x1 , x2 , x3 , l s l #1
sub x1 , x1 , x3
mov x2 , x3
add w3 , w4 , #7
mov x4 , x5
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h16 _ 8 _ n e o n _ i 8 m m )
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h16 _ 8 _ \ s u f f i x )
ldr x14 , [ s p , #48 ]
ldp x4 , x6 , [ s p , #16 ]
ldp x0 , x1 , [ s p , #32 ]
ldp x7 , x30 , [ s p ] , #48
ldp x7 , x30 , [ s p ] , #6 4
b h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v16 _ 8 _ e n d _ n e o n
endfunc
function f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v24 _ 8 _ n e o n _ i 8 m m , e x p o r t =1
function f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v24 _ 8 _ \ s u f f i x , e x p o r t =1
stp x4 , x5 , [ s p , #- 64 ] !
stp x2 , x3 , [ s p , #16 ]
stp x0 , x1 , [ s p , #32 ]
stp x6 , x30 , [ s p , #48 ]
mov x7 , #16
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v16 _ 8 _ n e o n _ i 8 m m )
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v16 _ 8 _ \ s u f f i x )
ldp x2 , x3 , [ s p , #16 ]
add x2 , x2 , #16
ldp x0 , x1 , [ s p , #32 ]
@ -2410,71 +2419,100 @@ function ff_hevc_put_hevc_qpel_uni_hv24_8_neon_i8mm, export=1
mov x7 , #8
add x0 , x0 , #16
ldr x6 , [ s p ]
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v8 _ 8 _ n e o n _ i 8 m m )
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v8 _ 8 _ \ s u f f i x )
ldr x30 , [ s p , #8 ]
add s p , s p , #16
ret
endfunc
function f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v32 _ 8 _ n e o n _ i 8 m m , e x p o r t =1
add w10 , w4 , #7
function f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v32 _ 8 _ \ s u f f i x , e x p o r t =1
add w10 , w4 , #8
lsl x10 , x10 , #7
mov x14 , s p
sub s p , s p , x10 / / t m p _ a r r a y
stp x7 , x30 , [ s p , #- 48 ] !
stp x7 , x30 , [ s p , #- 6 4] !
stp x4 , x6 , [ s p , #16 ]
stp x0 , x1 , [ s p , #32 ]
str x14 , [ s p , #48 ]
sub x1 , x2 , x3 , l s l #1
add x0 , s p , #48
add x0 , s p , #6 4
sub x1 , x1 , x3
mov x2 , x3
add w3 , w4 , #7
mov x4 , x5
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h32 _ 8 _ n e o n _ i 8 m m )
mov w6 , #32
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h32 _ 8 _ \ s u f f i x )
ldr x14 , [ s p , #48 ]
ldp x4 , x6 , [ s p , #16 ]
ldp x0 , x1 , [ s p , #32 ]
ldp x7 , x30 , [ s p ] , #48
ldp x7 , x30 , [ s p ] , #6 4
b h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v16 _ 8 _ e n d _ n e o n
endfunc
function f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v48 _ 8 _ n e o n _ i 8 m m , e x p o r t =1
add w10 , w4 , #7
function f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v48 _ 8 _ \ s u f f i x , e x p o r t =1
add w10 , w4 , #8
lsl x10 , x10 , #7
mov x14 , s p
sub s p , s p , x10 / / t m p _ a r r a y
stp x7 , x30 , [ s p , #- 48 ] !
stp x7 , x30 , [ s p , #- 6 4] !
stp x4 , x6 , [ s p , #16 ]
stp x0 , x1 , [ s p , #32 ]
str x14 , [ s p , #48 ]
sub x1 , x2 , x3 , l s l #1
sub x1 , x1 , x3
mov x2 , x3
add x0 , s p , #48
add x0 , s p , #6 4
add w3 , w4 , #7
mov x4 , x5
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h48 _ 8 _ n e o n _ i 8 m m )
.ifc \ suffix, n e o n
mov w6 , #48
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h32 _ 8 _ \ s u f f i x )
.else
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h48 _ 8 _ \ s u f f i x )
.endif
ldr x14 , [ s p , #48 ]
ldp x4 , x6 , [ s p , #16 ]
ldp x0 , x1 , [ s p , #32 ]
ldp x7 , x30 , [ s p ] , #48
ldp x7 , x30 , [ s p ] , #6 4
b h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v16 _ 8 _ e n d _ n e o n
endfunc
function f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v64 _ 8 _ n e o n _ i 8 m m , e x p o r t =1
add w10 , w4 , #7
function f f _ h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v64 _ 8 _ \ s u f f i x , e x p o r t =1
add w10 , w4 , #8
lsl x10 , x10 , #7
mov x14 , s p
sub s p , s p , x10 / / t m p _ a r r a y
stp x7 , x30 , [ s p , #- 48 ] !
stp x7 , x30 , [ s p , #- 6 4] !
stp x4 , x6 , [ s p , #16 ]
stp x0 , x1 , [ s p , #32 ]
add x0 , s p , #48
str x14 , [ s p , #48 ]
add x0 , s p , #64
sub x1 , x2 , x3 , l s l #1
mov x2 , x3
sub x1 , x1 , x3
add w3 , w4 , #7
mov x4 , x5
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h64 _ 8 _ n e o n _ i 8 m m )
.ifc \ suffix, n e o n
mov w6 , #64
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h32 _ 8 _ \ s u f f i x )
.else
bl X ( f f _ h e v c _ p u t _ h e v c _ q p e l _ h64 _ 8 _ \ s u f f i x )
.endif
ldr x14 , [ s p , #48 ]
ldp x4 , x6 , [ s p , #16 ]
ldp x0 , x1 , [ s p , #32 ]
ldp x7 , x30 , [ s p ] , #48
ldp x7 , x30 , [ s p ] , #6 4
b h e v c _ p u t _ h e v c _ q p e l _ u n i _ h v16 _ 8 _ e n d _ n e o n
endfunc
.endm
qpel_ u n i _ h v n e o n
# if H A V E _ I 8 M M
ENABLE_ I 8 M M
qpel_ u n i _ h v n e o n _ i 8 m m
DISABLE_ I 8 M M
# endif