From b6f37ffba71fa26b6176eb964cadcb442a115a54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Mon, 10 Jun 2024 20:29:56 +0300 Subject: [PATCH] lavc/vc1dsp: match C block layout in inv_trans_4x8_rvv Although checkasm does not verify this, the decoder requires that the transform updates the input block exactly like the C code does. This fixes vc1-ism, vc1_ilaced_twomv, vc1_sa00040, vc1_sa10091, vc1_sa10143, vc1_sa20021, vc1test_smm0005 and wmv3-drm-dec tests. --- libavcodec/riscv/vc1dsp_rvv.S | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/libavcodec/riscv/vc1dsp_rvv.S b/libavcodec/riscv/vc1dsp_rvv.S index c4517d54f5..860b0cc5b1 100644 --- a/libavcodec/riscv/vc1dsp_rvv.S +++ b/libavcodec/riscv/vc1dsp_rvv.S @@ -303,15 +303,24 @@ func ff_vc1_inv_trans_4x8_rvv, zve32x vlsseg4e16.v v0, (a2), a3 li t1, 3 jal t0, ff_vc1_inv_trans_4_rvv + vssseg4e16.v v0, (a2), a3 + vsetivli zero, 4, e16, mf2, ta, ma addi t1, a2, 1 * 8 * 2 - vse16.v v0, (a2) + vle16.v v0, (a2) addi t2, a2, 2 * 8 * 2 - vse16.v v1, (t1) + vle16.v v1, (t1) addi t3, a2, 3 * 8 * 2 - vse16.v v2, (t2) - vse16.v v3, (t3) - vsetivli zero, 4, e16, mf2, ta, ma - vlseg8e16.v v0, (a2) + vle16.v v2, (t2) + addi t4, a2, 4 * 8 * 2 + vle16.v v3, (t3) + addi t5, a2, 5 * 8 * 2 + vle16.v v4, (t4) + addi t6, a2, 6 * 8 * 2 + vle16.v v5, (t5) + addi t1, a2, 7 * 8 * 2 + vle16.v v6, (t6) + vle16.v v7, (t1) + jal t0, ff_vc1_inv_trans_8_rvv vadd.vi v4, v4, 1 add t0, a1, a0