From 78e1565f847588cb3d34d1a920902fb253d9c03c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 30 Jun 2024 14:15:33 +0300 Subject: [PATCH] lavc/vc1dsp: fuse multiply-adds in R-V V inv_trans_4 T-Head C908 (cycles): before after vc1dsp.vc1_inv_trans_4x4_rvv_i32: 128.0 120.0 vc1dsp.vc1_inv_trans_4x8_rvv_i32: 244.0 240.0 vc1dsp.vc1_inv_trans_8x4_rvv_i32: 239.2 235.2 --- libavcodec/riscv/vc1dsp_rvv.S | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/libavcodec/riscv/vc1dsp_rvv.S b/libavcodec/riscv/vc1dsp_rvv.S index 9d85377cec..8c127c7644 100644 --- a/libavcodec/riscv/vc1dsp_rvv.S +++ b/libavcodec/riscv/vc1dsp_rvv.S @@ -194,14 +194,12 @@ func ff_vc1_inv_trans_4_rvv, zve32x li t4, 22 vmul.vx v10, v2, t3 li t2, 10 - vmul.vx v14, v1, t4 + vmul.vx v26, v1, t4 + vmul.vx v27, v3, t4 vadd.vv v24, v8, v10 # t1 vsub.vv v25, v8, v10 # t2 - vmul.vx v16, v3, t2 - vmul.vx v18, v3, t4 - vmul.vx v20, v1, t2 - vadd.vv v26, v14, v16 # t3 - vsub.vv v27, v18, v20 # t4 + vmacc.vx v26, t2, v3 # t3 + vnmsac.vx v27, t2, v1 # t4 vwadd.vv v8, v24, v26 vwsub.vv v10, v25, v27 vwadd.vv v12, v25, v27