swscale/rgb2rgb: avoid S-regs in RISC-V V uyvytoyuv422

We can make do with callee-clobbered registers only now.
As an added bonus, this makes the code XLEN-independent.
pull/390/head
Rémi Denis-Courmont 2 years ago
parent be37a2e364
commit e50f8e861b
  1. 2
      libswscale/riscv/rgb2rgb.c
  2. 10
      libswscale/riscv/rgb2rgb_rvv.S

@ -55,10 +55,8 @@ av_cold void rgb2rgb_init_riscv(void)
shuffle_bytes_1230 = ff_shuffle_bytes_1230_rvv; shuffle_bytes_1230 = ff_shuffle_bytes_1230_rvv;
shuffle_bytes_3012 = ff_shuffle_bytes_3012_rvv; shuffle_bytes_3012 = ff_shuffle_bytes_3012_rvv;
interleaveBytes = ff_interleave_bytes_rvv; interleaveBytes = ff_interleave_bytes_rvv;
#if (__riscv_xlen == 64)
uyvytoyuv422 = ff_uyvytoyuv422_rvv; uyvytoyuv422 = ff_uyvytoyuv422_rvv;
yuyvtoyuv422 = ff_yuyvtoyuv422_rvv; yuyvtoyuv422 = ff_yuyvtoyuv422_rvv;
#endif
} }
#endif #endif
} }

@ -100,12 +100,9 @@ func ff_interleave_bytes_rvv, zve32x
ret ret
endfunc endfunc
#if (__riscv_xlen == 64)
.macro yuy2_to_i422p y_shift .macro yuy2_to_i422p y_shift
addi sp, sp, -16
sd s0, (sp)
addi a4, a4, 1 addi a4, a4, 1
lw s0, 16(sp) lw t6, (sp)
srai a4, a4, 1 // pixel width -> chroma width srai a4, a4, 1 // pixel width -> chroma width
1: 1:
mv t4, a4 mv t4, a4
@ -131,14 +128,12 @@ endfunc
add t2, t5, t2 add t2, t5, t2
bnez t4, 2b bnez t4, 2b
add a3, a3, s0 add a3, a3, t6
add a0, a0, a6 add a0, a0, a6
add a1, a1, a7 add a1, a1, a7
add a2, a2, a7 add a2, a2, a7
bnez a5, 1b bnez a5, 1b
ld s0, (sp)
addi sp, sp, 16
ret ret
.endm .endm
@ -149,4 +144,3 @@ endfunc
func ff_yuyvtoyuv422_rvv, zve32x func ff_yuyvtoyuv422_rvv, zve32x
yuy2_to_i422p 0 yuy2_to_i422p 0
endfunc endfunc
#endif

Loading…
Cancel
Save