diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S index 2766f7c41e..2e8259b24f 100644 --- a/libavcodec/riscv/vp8dsp_rvv.S +++ b/libavcodec/riscv/vp8dsp_rvv.S @@ -275,11 +275,35 @@ func ff_put_vp8_bilin4_\type\()_rvv, zve32x li t4, 4 sub t1, t1, \mn 1: - addi a4, a4, -1 - bilin_load v0, \type, \mn - vse8.v v0, (a0) - add a2, a2, a3 - add a0, a0, a1 + add t0, a2, a3 + add t2, a0, a1 + addi a4, a4, -2 +.ifc \type,v + add t3, t0, a3 +.else + addi t5, a2, 1 + addi t3, t0, 1 + vle8.v v2, (t5) +.endif + vle8.v v0, (a2) + vle8.v v4, (t0) + vle8.v v6, (t3) + vwmulu.vx v28, v0, t1 + vwmulu.vx v26, v4, t1 +.ifc \type,v + vwmaccu.vx v28, \mn, v4 +.else + vwmaccu.vx v28, \mn, v2 +.endif + vwmaccu.vx v26, \mn, v6 + vwaddu.wx v24, v28, t4 + vwaddu.wx v22, v26, t4 + vnsra.wi v30, v24, 3 + vnsra.wi v0, v22, 3 + vse8.v v30, (a0) + vse8.v v0, (t2) + add a2, t0, a3 + add a0, t2, a1 bnez a4, 1b ret