lavc/bswapdsp: purge RISC-V V bswap32

This cannot beat the Zbb implementation, and it is unlikely that a real meaningful CPU design would support V and not Zbb. The best loop rewrite that I could come up with (4 shifts, 2 ands, 3 ors) is still ~40% slower than Zbb. A proper faster vector implementation should be feasible with the cryptographic vector extensions, but that is a story for another time.
2 years ago · 61e5ca4ded
parent 5de1db5370
commit 61e5ca4ded
2 changed files with 1 additions and 27 deletions
--- a/libavcodec/riscv/bswapdsp_init.c
+++ b/libavcodec/riscv/bswapdsp_init.c
@ -26,7 +26,6 @@
 #include "libavcodec/bswapdsp.h"

 void ff_bswap32_buf_rvb(uint32_t *dst, const uint32_t *src, int len);
-void ff_bswap32_buf_rvv(uint32_t *dst, const uint32_t *src, int len);
 void ff_bswap16_buf_rvv(uint16_t *dst, const uint16_t *src, int len);

 av_cold void ff_bswapdsp_init_riscv(BswapDSPContext *c)
@ -39,10 +38,8 @@ av_cold void ff_bswapdsp_init_riscv(BswapDSPContext *c)
            c->bswap_buf = ff_bswap32_buf_rvb;
 #endif
 #if HAVE_RVV
-        if (flags & AV_CPU_FLAG_RVV_I32) {
-            c->bswap_buf = ff_bswap32_buf_rvv;
+        if (flags & AV_CPU_FLAG_RVV_I32)
            c->bswap16_buf = ff_bswap16_buf_rvv;
-        }
 #endif
    }
 }
--- a/libavcodec/riscv/bswapdsp_rvv.S
+++ b/libavcodec/riscv/bswapdsp_rvv.S
@ -21,29 +21,6 @@
 #include "config.h"
 #include "libavutil/riscv/asm.S"

-func ff_bswap32_buf_rvv, zve32x
-        li      t4, 4
-        addi    t1, a0, 1
-        addi    t2, a0, 2
-        addi    t3, a0, 3
-1:
-        vsetvli    t0, a2, e8, m1, ta, ma
-        vlseg4e8.v v8, (a1)
-        sub        a2, a2, t0
-        sh2add     a1, t0, a1
-        vsse8.v    v8, (t3), t4
-        sh2add     t3, t0, t3
-        vsse8.v    v9, (t2), t4
-        sh2add     t2, t0, t2
-        vsse8.v    v10, (t1), t4
-        sh2add     t1, t0, t1
-        vsse8.v    v11, (a0), t4
-        sh2add     a0, t0, a0
-        bnez       a2, 1b
-
-        ret
-endfunc
-
 func ff_bswap16_buf_rvv, zve32x
 1:
        vsetvli t0, a2, e16, m8, ta, ma