lavc/alacdsp: unroll RISC-V V loops

This increases the group multiplier as per T-Head C910 benchmarks:

alac_append_extra_bits_mono_c: 803.0
alac_append_extra_bits_stereo_c: 1604.2
alac_decorrelate_stereo_c: 1077.5

LMUL=1
alac_append_extra_bits_mono_rvv_i32: 418.2
alac_append_extra_bits_stereo_rvv_i32: 693.2
alac_decorrelate_stereo_rvv_i32: 673.5

LMUL=2
alac_append_extra_bits_mono_rvv_i32: 382.2
alac_append_extra_bits_stereo_rvv_i32: 648.2
alac_decorrelate_stereo_rvv_i32: 542.7

LMUL=4
alac_append_extra_bits_mono_rvv_i32: 241.5
alac_append_extra_bits_stereo_rvv_i32: 512.7
alac_decorrelate_stereo_rvv_i32: 364.2

LMUL=8
alac_append_extra_bits_mono_rvv_i32: 239.7
alac_append_extra_bits_stereo_rvv_i32: 497.2
alac_decorrelate_stereo_rvv_i32: 426.7
pull/389/head
Rémi Denis-Courmont 2 years ago
parent a28aa0475d
commit c541ecf0dc
  1. 6
      libavcodec/riscv/alacdsp_rvv.S

@ -25,7 +25,7 @@ func ff_alac_decorrelate_stereo_rvv, zve32x
ld a4, 8(a0)
ld a0, 0(a0)
1:
vsetvli t0, a1, e32, m1, ta, ma
vsetvli t0, a1, e32, m4, ta, ma
vle32.v v24, (a4)
sub a1, a1, t0
vle32.v v16, (a0)
@ -47,7 +47,7 @@ func ff_alac_append_extra_bits_mono_rvv, zve32x
ld a0, (a0)
ld a1, (a1)
1:
vsetvli t0, a4, e32, m1, ta, ma
vsetvli t0, a4, e32, m8, ta, ma
vle32.v v16, (a0)
sub a4, a4, t0
vle32.v v24, (a1)
@ -67,7 +67,7 @@ func ff_alac_append_extra_bits_stereo_rvv, zve32x
ld a7, 8(a1)
ld a1, (a1)
1:
vsetvli t0, a4, e32, m1, ta, ma
vsetvli t0, a4, e32, m8, ta, ma
vle32.v v16, (a0)
sub a4, a4, t0
vle32.v v0, (a6)

Loading…
Cancel
Save