@ -536,7 +536,7 @@ endconst
.macro idct4_adds type, d e p t h
func f f _ h26 4 _ i d c t _ a d d \ t y p e \ ( ) _ \ d e p t h \ ( ) _ r v v , z v e 3 2 x
csrwi v x r m , 0
addi s p , s p , - 9 6
addi s p , s p , - 6 4
lla t 0 , f f _ h26 4 _ s c a n 8
sd s0 , ( s p )
li t 1 , 3 2 * ( \ d e p t h / 8 )
@ -547,14 +547,6 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x
sd s3 , 3 2 ( s p )
sd s4 , 4 0 ( s p )
sd s5 , 4 8 ( s p )
sd s6 , 5 6 ( s p )
sd s7 , 6 4 ( s p )
.if \ depth > 8
sd s8 , 7 2 ( s p )
sd s9 , 8 0 ( s p )
mv s8 , a5
mv s9 , a6
.endif
vsetivli z e r o , 1 6 , e 8 , m 1 , t a , m a
vle8 . v v8 , ( t 0 )
.if \ depth = = 8
@ -583,8 +575,8 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x
li s1 , 1 6
mv s4 , a0
mv s5 , a1
mv s6 , a2
mv s7 , a3
mv a1 , a2
mv a2 , a3
1 :
andi t 0 , s2 , 1
addi s1 , s1 , - 1
@ -594,12 +586,7 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x
.endif
lw t 2 , ( s5 ) # b l o c k _ o f f s e t [ i ]
andi t 1 , s3 , 1
mv a1 , s6
mv a2 , s7
add a0 , s4 , t 2
.if \ depth > 8
mv a5 , s8
.endif
.ifc \ type, 1 6
bnez t 1 , 2 f # i f ( n n z = = 1 & & b l o c k [ i * 1 6 ] )
.else
@ -611,23 +598,13 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x
.ifnc \ type, 1 6
beqz t 1 , 3 f # i f ( b l o c k [ i * 16 ] )
.endif
.if \ depth = = 8
call f f _ h26 4 _ i d c t _ d c _ a d d _ \ d e p t h \ ( ) _ c
.else
jalr s9
.endif
jal f f _ h26 4 _ i d c t 4 _ d c _ a d d _ \ d e p t h \ ( ) _ r v v
3 :
srli s3 , s3 , 1
addi s5 , s5 , 4
addi s6 , s6 , 1 6 * 2 * ( \ d e p t h / 8 )
addi a1 , a1 , 1 6 * 2 * ( \ d e p t h / 8 )
bnez s1 , 1 b
.if \ depth > 8
ld s9 , 8 0 ( s p )
ld s8 , 7 2 ( s p )
.endif
ld s7 , 6 4 ( s p )
ld s6 , 5 6 ( s p )
ld s5 , 4 8 ( s p )
ld s4 , 4 0 ( s p )
ld s3 , 3 2 ( s p )
@ -635,7 +612,7 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x
ld s1 , 1 6 ( s p )
ld r a , 8 ( s p )
ld s0 , 0 ( s p )
addi s p , s p , 9 6
addi s p , s p , 6 4
ret
endfunc
.endm
@ -646,7 +623,7 @@ idct4_adds 16intra, \depth
func f f _ h26 4 _ i d c t 8 _ a d d4 _ \ d e p t h \ ( ) _ r v v , z v e 3 2 x
csrwi v x r m , 0
addi s p , s p , - 9 6
addi s p , s p , - 6 4
lla t 0 , f f _ h26 4 _ s c a n 8
sd s0 , ( s p )
li t 1 , 4 * 3 2 * ( \ d e p t h / 8 )
@ -658,14 +635,6 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x
sd s3 , 3 2 ( s p )
sd s4 , 4 0 ( s p )
sd s5 , 4 8 ( s p )
sd s6 , 5 6 ( s p )
sd s7 , 6 4 ( s p )
.if \ depth > 8
sd s8 , 7 2 ( s p )
sd s9 , 8 0 ( s p )
mv s8 , a5
mv s9 , a6
.endif
vsetivli z e r o , 4 , e 8 , m f4 , t a , m a
vlse8 . v v8 , ( t 0 ) , t 2
.if \ depth = = 8
@ -689,8 +658,8 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x
li s1 , 4
mv s4 , a0
mv s5 , a1
mv s6 , a2
mv s7 , a3
mv a1 , a2
mv a2 , a3
1 :
andi t 0 , s2 , 1
addi s1 , s1 , - 1
@ -698,33 +667,23 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x
beqz t 0 , 3 f # i f ( n n z )
lw t 2 , ( s5 ) # b l o c k _ o f f s e t [ i ]
andi t 1 , s3 , 1
mv a1 , s6
mv a2 , s7
add a0 , s4 , t 2
.if \ depth > 8
mv a5 , s8
.endif
bnez t 1 , 2 f # i f ( n n z = = 1 & & b l o c k [ i * 1 6 ] )
jal . L i d c t 8 _ a d d _ \ d e p t h \ ( ) _ r v v
j 3 f
2 :
.if \ depth = = 8
call f f _ h26 4 _ i d c t 8 _ d c _ a d d _ \ d e p t h \ ( ) _ c
j 3 f
.else
jalr s9
j 4 f # i d c t 8 _ a d d _ 1 6 u p d a t e s a1
.endif
2 :
jal f f _ h26 4 _ i d c t 8 _ d c _ a d d _ \ d e p t h \ ( ) _ r v v
3 :
addi a1 , a1 , 4 * 1 6 * 2 * ( \ d e p t h / 8 )
4 :
srli s3 , s3 , 1
addi s5 , s5 , 4 * 4
addi s6 , s6 , 4 * 1 6 * 2 * ( \ d e p t h / 8 )
bnez s1 , 1 b
.if \ depth > 8
ld s9 , 8 0 ( s p )
ld s8 , 7 2 ( s p )
.endif
ld s7 , 6 4 ( s p )
ld s6 , 5 6 ( s p )
ld s5 , 4 8 ( s p )
ld s4 , 4 0 ( s p )
ld s3 , 3 2 ( s p )
@ -732,7 +691,7 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x
ld s1 , 1 6 ( s p )
ld r a , 8 ( s p )
ld s0 , 0 ( s p )
addi s p , s p , 9 6
addi s p , s p , 6 4
ret
endfunc
.endr
@ -740,19 +699,16 @@ endfunc
.irp depth, 9 , 1 0 , 1 2 , 1 4
func f f _ h26 4 _ i d c t _ a d d16 _ \ d e p t h \ ( ) _ r v v , z v e 3 2 x
li a5 , ( 1 < < \ d e p t h ) - 1
lla a6 , f f _ h26 4 _ i d c t _ d c _ a d d _ \ d e p t h \ ( ) _ c
j f f _ h26 4 _ i d c t _ a d d16 _ 1 6 _ r v v
endfunc
func f f _ h26 4 _ i d c t _ a d d16 i n t r a _ \ d e p t h \ ( ) _ r v v , z v e 3 2 x
li a5 , ( 1 < < \ d e p t h ) - 1
lla a6 , f f _ h26 4 _ i d c t _ d c _ a d d _ \ d e p t h \ ( ) _ c
j f f _ h26 4 _ i d c t _ a d d16 i n t r a _ 1 6 _ r v v
endfunc
func f f _ h26 4 _ i d c t 8 _ a d d4 _ \ d e p t h \ ( ) _ r v v , z v e 3 2 x
li a5 , ( 1 < < \ d e p t h ) - 1
lla a6 , f f _ h26 4 _ i d c t 8 _ d c _ a d d _ \ d e p t h \ ( ) _ c
j f f _ h26 4 _ i d c t 8 _ a d d4 _ 1 6 _ r v v
endfunc
.endr