lavc/vvc_mc R-V V sad

k230               banana_f3
sad_8x16_c:                 387.7 ( 1.00x)    394.9 ( 1.00x)
sad_8x16_rvv_i32:           109.7 ( 3.53x)    103.5 ( 3.82x)
sad_16x8_c:                 378.2 ( 1.00x)    384.7 ( 1.00x)
sad_16x8_rvv_i32:            82.0 ( 4.61x)    61.7 ( 6.24x)
sad_16x16_c:                748.7 ( 1.00x)    759.7 ( 1.00x)
sad_16x16_rvv_i32:          128.5 ( 5.83x)    113.7 ( 6.68x)
pull/391/head
sunyuechi 1 month ago committed by Nuo Mi
parent b3f7440298
commit 16d4945e9a
  1. 3
      libavcodec/riscv/vvc/Makefile
  2. 61
      libavcodec/riscv/vvc/vvc_sad_rvv.S
  3. 7
      libavcodec/riscv/vvc/vvcdsp_init.c

@ -1,2 +1,3 @@
OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvcdsp_init.o
RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o
RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o \
riscv/vvc/vvc_sad_rvv.o

@ -0,0 +1,61 @@
/*
* Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavcodec/riscv/h26x/asm.S"
.macro func_sad vlen
func ff_vvc_sad_rvv_\vlen, zve32x, zbb, zba
lpad 0
slli t2, a3, 7 // dy * 128
li t1, 4*128+4
add t3, t2, a2 // dy * 128 + dx
sub t1, t1, t2
sub t1, t1, a2
sh1add a0, t3, a0
sh1add a1, t1, a1
li t3, 16
beq a4, t3, SADVSET\vlen\()16
.irp w,8,16
SADVSET\vlen\w:
vsetvlstatic32 \w, \vlen
vmv.v.i v0, 0
vmv.s.x v24, zero
vsetvlstatic16 \w, \vlen
SAD\vlen\w:
addi a5, a5, -2
vle16.v v8, (a0)
vle16.v v16, (a1)
vsub.vv v8, v8, v16
vneg.v v16, v8
addi a0, a0, 2 * 128 * 2
vmax.vv v8, v8, v16
vwaddu.wv v0, v0, v8
addi a1, a1, 2 * 128 * 2
bnez a5, SAD\vlen\w
vsetvlstatic32 \w, \vlen
vredsum.vs v24, v0, v24
vmv.x.s a0, v24
ret
.endr
endfunc
.endm
func_sad 256
func_sad 128

@ -59,6 +59,9 @@ DMVR_PROTOTYPES(8, rvv_256)
c->inter.dmvr[1][1] = ff_vvc_dmvr_hv_##bd##_##opt; \
} while (0)
int ff_vvc_sad_rvv_128(const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h);
int ff_vvc_sad_rvv_256(const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h);
#define PUT_PIXELS_PROTOTYPES2(bd, opt) \
void bf(ff_vvc_put_pixels, bd, opt)(int16_t *dst, \
const uint8_t *_src, const ptrdiff_t _src_stride, \
@ -97,6 +100,8 @@ void ff_vvc_dsp_init_riscv(VVCDSPContext *const c, const int bd)
FUNCS(LUMA, rvv_256);
FUNCS(CHROMA, rvv_256);
break;
case 10:
c->inter.sad = ff_vvc_sad_rvv_256;
default:
break;
}
@ -111,6 +116,8 @@ void ff_vvc_dsp_init_riscv(VVCDSPContext *const c, const int bd)
FUNCS(LUMA, rvv_128);
FUNCS(CHROMA, rvv_128);
break;
case 10:
c->inter.sad = ff_vvc_sad_rvv_128;
default:
break;
}

Loading…
Cancel
Save