From 7e1cdc69fbe5cc82203b6a772e14f6e5f88b4b7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sat, 28 Oct 2023 15:29:32 +0300 Subject: [PATCH] lavc/utvideodsp: R-V V restore_rgb_planes10 restore_rgb_planes10_c: 185852.2 restore_rgb_planes10_rvv_i32: 90130.5 --- libavcodec/riscv/utvideodsp_init.c | 9 +++++++- libavcodec/riscv/utvideodsp_rvv.S | 35 ++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/libavcodec/riscv/utvideodsp_init.c b/libavcodec/riscv/utvideodsp_init.c index dfaa16692a..f5038c4736 100644 --- a/libavcodec/riscv/utvideodsp_init.c +++ b/libavcodec/riscv/utvideodsp_init.c @@ -26,13 +26,20 @@ void ff_restore_rgb_planes_rvv(uint8_t *r, uint8_t *g, uint8_t *b, ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b, int width, int height); +void ff_restore_rgb_planes10_rvv(uint16_t *r, uint16_t *g, uint16_t *b, + ptrdiff_t linesize_r, ptrdiff_t linesize_g, + ptrdiff_t linesize_b, int width, int height); av_cold void ff_utvideodsp_init_riscv(UTVideoDSPContext *c) { #if HAVE_RVV int flags = av_get_cpu_flags(); - if (flags & AV_CPU_FLAG_RVV_I32) + if (flags & AV_CPU_FLAG_RVV_I32) { c->restore_rgb_planes = ff_restore_rgb_planes_rvv; + + if (flags & AV_CPU_FLAG_RVB_ADDR) + c->restore_rgb_planes10 = ff_restore_rgb_planes10_rvv; + } #endif } diff --git a/libavcodec/riscv/utvideodsp_rvv.S b/libavcodec/riscv/utvideodsp_rvv.S index 673e3442ce..fa70d0eb34 100644 --- a/libavcodec/riscv/utvideodsp_rvv.S +++ b/libavcodec/riscv/utvideodsp_rvv.S @@ -51,3 +51,38 @@ func ff_restore_rgb_planes_rvv, zve32x ret endfunc + +func ff_restore_rgb_planes10_rvv, zve32x + li t1, -0x200 + li t2, 0x3FF + sub a3, a3, a6 + sub a4, a4, a6 + sub a5, a5, a6 +1: + mv t6, a6 + addi a7, a7, -1 +2: + vsetvli t0, t6, e16, m8, ta, ma + vle16.v v16, (a1) + sub t6, t6, t0 + vle16.v v8, (a0) + vadd.vx v16, v16, t1 + sh1add a1, t0, a1 + vle16.v v24, (a2) + vadd.vv v8, v8, v16 + vadd.vv v24, v24, v16 + vand.vx v8, v8, t2 + vand.vx v24, v24, t2 + vse16.v v8, (a0) + sh1add a0, t0, a0 + vse16.v v24, (a2) + sh1add a2, t0, a2 + bnez t6, 2b + + sh1add a0, a3, a0 + sh1add a1, a4, a1 + sh1add a2, a5, a2 + bnez a7, 1b + + ret +endfunc