lavc/vp8dsp: R-V V put_epel v

C908:
vp8_put_epel4_v4_c: 11.0
vp8_put_epel4_v4_rvv_i32: 5.0
vp8_put_epel4_v6_c: 16.5
vp8_put_epel4_v6_rvv_i32: 6.2
vp8_put_epel8_v4_c: 43.7
vp8_put_epel8_v4_rvv_i32: 11.2
vp8_put_epel8_v6_c: 68.7
vp8_put_epel8_v6_rvv_i32: 13.2
vp8_put_epel16_v4_c: 92.5
vp8_put_epel16_v4_rvv_i32: 13.7
vp8_put_epel16_v6_c: 135.7
vp8_put_epel16_v6_rvv_i32: 16.5

Signed-off-by: Rémi Denis-Courmont <remi@remlab.net>
release/7.1
sunyuechi 10 months ago committed by Rémi Denis-Courmont
parent 109daea619
commit 6e77af1c22
  1. 7
      libavcodec/riscv/vp8dsp_init.c
  2. 34
      libavcodec/riscv/vp8dsp_rvv.S

@ -90,6 +90,13 @@ av_cold void ff_vp78dsp_init_riscv(VP8DSPContext *c)
c->put_vp8_epel_pixels_tab[0][0][1] = ff_put_vp8_epel16_h4_rvv; c->put_vp8_epel_pixels_tab[0][0][1] = ff_put_vp8_epel16_h4_rvv;
c->put_vp8_epel_pixels_tab[1][0][1] = ff_put_vp8_epel8_h4_rvv; c->put_vp8_epel_pixels_tab[1][0][1] = ff_put_vp8_epel8_h4_rvv;
c->put_vp8_epel_pixels_tab[2][0][1] = ff_put_vp8_epel4_h4_rvv; c->put_vp8_epel_pixels_tab[2][0][1] = ff_put_vp8_epel4_h4_rvv;
c->put_vp8_epel_pixels_tab[0][2][0] = ff_put_vp8_epel16_v6_rvv;
c->put_vp8_epel_pixels_tab[1][2][0] = ff_put_vp8_epel8_v6_rvv;
c->put_vp8_epel_pixels_tab[2][2][0] = ff_put_vp8_epel4_v6_rvv;
c->put_vp8_epel_pixels_tab[0][1][0] = ff_put_vp8_epel16_v4_rvv;
c->put_vp8_epel_pixels_tab[1][1][0] = ff_put_vp8_epel8_v4_rvv;
c->put_vp8_epel_pixels_tab[2][1][0] = ff_put_vp8_epel4_v4_rvv;
} }
#endif #endif
#endif #endif

@ -161,9 +161,13 @@ const subpel_filters
.byte 0, -1, 12, 123, -6, 0 .byte 0, -1, 12, 123, -6, 0
endconst endconst
.macro epel_filter size .macro epel_filter size type
lla t2, subpel_filters lla t2, subpel_filters
.ifc \type,v
addi t0, a6, -1
.else
addi t0, a5, -1 addi t0, a5, -1
.endif
li t1, 6 li t1, 6
mul t0, t0, t1 mul t0, t0, t1
add t0, t0, t2 add t0, t0, t2
@ -176,19 +180,25 @@ endconst
.endif .endif
.endm .endm
.macro epel_load dst len size .macro epel_load dst len size type
addi t6, a2, -1 .ifc \type,v
addi a7, a2, 1 mv a5, a3
.else
li a5, 1
.endif
sub t6, a2, a5
add a7, a2, a5
vle8.v v24, (a2) vle8.v v24, (a2)
vle8.v v22, (t6) vle8.v v22, (t6)
vle8.v v26, (a7) vle8.v v26, (a7)
addi a7, a7, 1 add a7, a7, a5
vle8.v v28, (a7) vle8.v v28, (a7)
vwmulu.vx v16, v24, t2 vwmulu.vx v16, v24, t2
vwmulu.vx v20, v26, t3 vwmulu.vx v20, v26, t3
.ifc \size,6 .ifc \size,6
addi t6, t6, -1 sub t6, t6, a5
addi a7, a7, 1 add a7, a7, a5
vle8.v v24, (t6) vle8.v v24, (t6)
vle8.v v26, (a7) vle8.v v26, (a7)
vwmaccu.vx v16, t0, v24 vwmaccu.vx v16, t0, v24
@ -206,18 +216,18 @@ endconst
vnclipu.wi \dst, v24, 0 vnclipu.wi \dst, v24, 0
.endm .endm
.macro epel_load_inc dst len size .macro epel_load_inc dst len size type
epel_load \dst \len \size epel_load \dst \len \size \type
add a2, a2, a3 add a2, a2, a3
.endm .endm
.macro epel len size type .macro epel len size type
func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
epel_filter \size epel_filter \size \type
vsetvlstatic8 \len vsetvlstatic8 \len
1: 1:
addi a4, a4, -1 addi a4, a4, -1
epel_load_inc v30 \len \size epel_load_inc v30 \len \size \type
vse8.v v30, (a0) vse8.v v30, (a0)
add a0, a0, a1 add a0, a0, a1
bnez a4, 1b bnez a4, 1b
@ -232,4 +242,6 @@ put_vp8_bilin_h_v \len v a6
put_vp8_bilin_hv \len put_vp8_bilin_hv \len
epel \len 6 h epel \len 6 h
epel \len 4 h epel \len 4 h
epel \len 6 v
epel \len 4 v
.endr .endr

Loading…
Cancel
Save