From 88d973a5d658dc61dfd32e87b062724def46addc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 12 May 2024 20:02:03 +0300 Subject: [PATCH] lavc/flacdsp: R-V V flac_wasted33 T-Head C908: flac_wasted_33_c: 786.2 flac_wasted_33_rvv_i64: 486.5 --- libavcodec/riscv/flacdsp_init.c | 4 ++++ libavcodec/riscv/flacdsp_rvv.S | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/libavcodec/riscv/flacdsp_init.c b/libavcodec/riscv/flacdsp_init.c index 454787470b..4f1652dbe7 100644 --- a/libavcodec/riscv/flacdsp_init.c +++ b/libavcodec/riscv/flacdsp_init.c @@ -32,6 +32,7 @@ void ff_flac_lpc32_rvv(int32_t *decoded, const int coeffs[32], void ff_flac_lpc32_rvv_simple(int32_t *decoded, const int coeffs[32], int pred_order, int qlevel, int len); void ff_flac_wasted32_rvv(int32_t *, int shift, int len); +void ff_flac_wasted33_rvv(int64_t *, const int32_t *, int shift, int len); void ff_flac_decorrelate_indep2_16_rvv(uint8_t **out, int32_t **in, int channels, int len, int shift); void ff_flac_decorrelate_indep4_16_rvv(uint8_t **out, int32_t **in, @@ -84,6 +85,9 @@ av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt, c->wasted32 = ff_flac_wasted32_rvv; + if (flags & AV_CPU_FLAG_RVV_I64) + c->wasted33 = ff_flac_wasted33_rvv; + # if (__riscv_xlen >= 64) switch (fmt) { case AV_SAMPLE_FMT_S16: diff --git a/libavcodec/riscv/flacdsp_rvv.S b/libavcodec/riscv/flacdsp_rvv.S index d7009cdec2..6287faa260 100644 --- a/libavcodec/riscv/flacdsp_rvv.S +++ b/libavcodec/riscv/flacdsp_rvv.S @@ -115,6 +115,38 @@ func ff_flac_wasted32_rvv, zve32x ret endfunc +func ff_flac_wasted33_rvv, zve64x + srli t0, a2, 5 + li t1, 1 + bnez t0, 2f + sll a2, t1, a2 +1: + vsetvli t0, a3, e32, m4, ta, ma + vle32.v v8, (a1) + sub a3, a3, t0 + vwmulsu.vx v16, v8, a2 + sh2add a1, t0, a1 + vse64.v v16, (a0) + sh3add a0, t0, a0 + bnez a3, 1b + + ret + +2: // Pessimistic case: wasted >= 32 + vsetvli t0, a3, e32, m4, ta, ma + vle32.v v8, (a1) + sub a3, a3, t0 + vwcvtu.x.x.v v16, v8 + sh2add a1, t0, a1 + vsetvli zero, zero, e64, m8, ta, ma + vsll.vx v16, v16, a2 + vse64.v v16, (a0) + sh3add a0, t0, a0 + bnez a3, 2b + + ret +endfunc + #if (__riscv_xlen == 64) func ff_flac_decorrelate_indep2_16_rvv, zve32x ld a0, (a0)