lavc/vp8dsp: R-V V put_epel hv

C908:
vp8_put_epel4_h4v4_c: 20.0
vp8_put_epel4_h4v4_rvv_i32: 11.0
vp8_put_epel4_h4v6_c: 25.2
vp8_put_epel4_h4v6_rvv_i32: 13.5
vp8_put_epel4_h6v4_c: 22.2
vp8_put_epel4_h6v4_rvv_i32: 14.5
vp8_put_epel4_h6v6_c: 29.0
vp8_put_epel4_h6v6_rvv_i32: 15.7
vp8_put_epel8_h4v4_c: 73.0
vp8_put_epel8_h4v4_rvv_i32: 22.2
vp8_put_epel8_h4v6_c: 90.5
vp8_put_epel8_h4v6_rvv_i32: 26.7
vp8_put_epel8_h6v4_c: 85.0
vp8_put_epel8_h6v4_rvv_i32: 27.2
vp8_put_epel8_h6v6_c: 104.7
vp8_put_epel8_h6v6_rvv_i32: 29.5
vp8_put_epel16_h4v4_c: 145.5
vp8_put_epel16_h4v4_rvv_i32: 26.5
vp8_put_epel16_h4v6_c: 190.7
vp8_put_epel16_h4v6_rvv_i32: 47.5
vp8_put_epel16_h6v4_c: 173.7
vp8_put_epel16_h6v4_rvv_i32: 33.2
vp8_put_epel16_h6v6_c: 222.2
vp8_put_epel16_h6v6_rvv_i32: 35.5

Amended to disable unsupported RV128.

Signed-off-by: Rémi Denis-Courmont <remi@remlab.net>
release/7.1
sunyuechi 8 months ago committed by Rémi Denis-Courmont
parent 0b2316e37f
commit 63697d3350
  1. 13
      libavcodec/riscv/vp8dsp_init.c
  2. 123
      libavcodec/riscv/vp8dsp_rvv.S

@ -97,6 +97,19 @@ av_cold void ff_vp78dsp_init_riscv(VP8DSPContext *c)
c->put_vp8_epel_pixels_tab[0][1][0] = ff_put_vp8_epel16_v4_rvv; c->put_vp8_epel_pixels_tab[0][1][0] = ff_put_vp8_epel16_v4_rvv;
c->put_vp8_epel_pixels_tab[1][1][0] = ff_put_vp8_epel8_v4_rvv; c->put_vp8_epel_pixels_tab[1][1][0] = ff_put_vp8_epel8_v4_rvv;
c->put_vp8_epel_pixels_tab[2][1][0] = ff_put_vp8_epel4_v4_rvv; c->put_vp8_epel_pixels_tab[2][1][0] = ff_put_vp8_epel4_v4_rvv;
c->put_vp8_epel_pixels_tab[0][2][2] = ff_put_vp8_epel16_h6v6_rvv;
c->put_vp8_epel_pixels_tab[1][2][2] = ff_put_vp8_epel8_h6v6_rvv;
c->put_vp8_epel_pixels_tab[2][2][2] = ff_put_vp8_epel4_h6v6_rvv;
c->put_vp8_epel_pixels_tab[0][2][1] = ff_put_vp8_epel16_h4v6_rvv;
c->put_vp8_epel_pixels_tab[1][2][1] = ff_put_vp8_epel8_h4v6_rvv;
c->put_vp8_epel_pixels_tab[2][2][1] = ff_put_vp8_epel4_h4v6_rvv;
c->put_vp8_epel_pixels_tab[0][1][1] = ff_put_vp8_epel16_h4v4_rvv;
c->put_vp8_epel_pixels_tab[1][1][1] = ff_put_vp8_epel8_h4v4_rvv;
c->put_vp8_epel_pixels_tab[2][1][1] = ff_put_vp8_epel4_h4v4_rvv;
c->put_vp8_epel_pixels_tab[0][1][2] = ff_put_vp8_epel16_h6v4_rvv;
c->put_vp8_epel_pixels_tab[1][1][2] = ff_put_vp8_epel8_h6v4_rvv;
c->put_vp8_epel_pixels_tab[2][1][2] = ff_put_vp8_epel4_h6v4_rvv;
} }
#endif #endif
#endif #endif

@ -161,26 +161,26 @@ const subpel_filters
.byte 0, -1, 12, 123, -6, 0 .byte 0, -1, 12, 123, -6, 0
endconst endconst
.macro epel_filter size type .macro epel_filter size type regtype
lla t2, subpel_filters lla \regtype\()2, subpel_filters
.ifc \type,v .ifc \type,v
addi t0, a6, -1 addi \regtype\()0, a6, -1
.else .else
addi t0, a5, -1 addi \regtype\()0, a5, -1
.endif .endif
li t1, 6 li \regtype\()1, 6
mul t0, t0, t1 mul \regtype\()0, \regtype\()0, \regtype\()1
add t0, t0, t2 add \regtype\()0, \regtype\()0, \regtype\()2
.irp n,1,2,3,4 .irp n,1,2,3,4
lb t\n, \n(t0) lb \regtype\n, \n(\regtype\()0)
.endr .endr
.ifc \size,6 .ifc \size,6
lb t5, 5(t0) lb \regtype\()5, 5(\regtype\()0)
lb t0, (t0) lb \regtype\()0, (\regtype\()0)
.endif .endif
.endm .endm
.macro epel_load dst len size type .macro epel_load dst len size type from_mem regtype
.ifc \type,v .ifc \type,v
mv a5, a3 mv a5, a3
.else .else
@ -189,24 +189,35 @@ endconst
sub t6, a2, a5 sub t6, a2, a5
add a7, a2, a5 add a7, a2, a5
.if \from_mem
vle8.v v24, (a2) vle8.v v24, (a2)
vle8.v v22, (t6) vle8.v v22, (t6)
vle8.v v26, (a7) vle8.v v26, (a7)
add a7, a7, a5 add a7, a7, a5
vle8.v v28, (a7) vle8.v v28, (a7)
vwmulu.vx v16, v24, t2 vwmulu.vx v16, v24, \regtype\()2
vwmulu.vx v20, v26, t3 vwmulu.vx v20, v26, \regtype\()3
.ifc \size,6 .ifc \size,6
sub t6, t6, a5 sub t6, t6, a5
add a7, a7, a5 add a7, a7, a5
vle8.v v24, (t6) vle8.v v24, (t6)
vle8.v v26, (a7) vle8.v v26, (a7)
vwmaccu.vx v16, t0, v24 vwmaccu.vx v16, \regtype\()0, v24
vwmaccu.vx v16, t5, v26 vwmaccu.vx v16, \regtype\()5, v26
.endif
vwmaccsu.vx v16, \regtype\()1, v22
vwmaccsu.vx v16, \regtype\()4, v28
.else
vwmulu.vx v16, v4, \regtype\()2
vwmulu.vx v20, v6, \regtype\()3
.ifc \size,6
vwmaccu.vx v16, \regtype\()0, v0
vwmaccu.vx v16, \regtype\()5, v10
.endif
vwmaccsu.vx v16, \regtype\()1, v2
vwmaccsu.vx v16, \regtype\()4, v8
.endif .endif
li t6, 64 li t6, 64
vwmaccsu.vx v16, t1, v22
vwmaccsu.vx v16, t4, v28
vwadd.wx v16, v16, t6 vwadd.wx v16, v16, t6
vsetvlstatic16 \len vsetvlstatic16 \len
vwadd.vv v24, v16, v20 vwadd.vv v24, v16, v20
@ -216,18 +227,18 @@ endconst
vnclipu.wi \dst, v24, 0 vnclipu.wi \dst, v24, 0
.endm .endm
.macro epel_load_inc dst len size type .macro epel_load_inc dst len size type from_mem regtype
epel_load \dst \len \size \type epel_load \dst \len \size \type \from_mem \regtype
add a2, a2, a3 add a2, a2, a3
.endm .endm
.macro epel len size type .macro epel len size type
func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
epel_filter \size \type epel_filter \size \type t
vsetvlstatic8 \len vsetvlstatic8 \len
1: 1:
addi a4, a4, -1 addi a4, a4, -1
epel_load_inc v30 \len \size \type epel_load_inc v30 \len \size \type 1 t
vse8.v v30, (a0) vse8.v v30, (a0)
add a0, a0, a1 add a0, a0, a1
bnez a4, 1b bnez a4, 1b
@ -236,6 +247,72 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
endfunc endfunc
.endm .endm
.macro epel_hv len hsize vsize
func ff_put_vp8_epel\len\()_h\hsize\()v\vsize\()_rvv, zve32x
#if __riscv_xlen == 64
addi sp, sp, -48
.irp n,0,1,2,3,4,5
sd s\n, \n\()<<3(sp)
.endr
#else
addi sp, sp, -24
.irp n,0,1,2,3,4,5
sw s\n, \n\()<<2(sp)
.endr
#endif
sub a2, a2, a3
epel_filter \hsize h t
epel_filter \vsize v s
vsetvlstatic8 \len
.if \hsize == 6 || \vsize == 6
sub a2, a2, a3
epel_load_inc v0 \len \hsize h 1 t
.endif
epel_load_inc v2 \len \hsize h 1 t
epel_load_inc v4 \len \hsize h 1 t
epel_load_inc v6 \len \hsize h 1 t
epel_load_inc v8 \len \hsize h 1 t
.if \hsize == 6 || \vsize == 6
epel_load_inc v10 \len \hsize h 1 t
.endif
addi a4, a4, -1
1:
addi a4, a4, -1
epel_load v30 \len \vsize v 0 s
vse8.v v30, (a0)
.if \hsize == 6 || \vsize == 6
vmv.v.v v0, v2
.endif
vmv.v.v v2, v4
vmv.v.v v4, v6
vmv.v.v v6, v8
.if \hsize == 6 || \vsize == 6
vmv.v.v v8, v10
epel_load_inc v10 \len \hsize h 1 t
.else
epel_load_inc v8 \len 4 h 1 t
.endif
add a0, a0, a1
bnez a4, 1b
epel_load v30 \len \vsize v 0 s
vse8.v v30, (a0)
#if __riscv_xlen == 64
.irp n,0,1,2,3,4,5
ld s\n, \n\()<<3(sp)
.endr
addi sp, sp, 48
#else
.irp n,0,1,2,3,4,5
lw s\n, \n\()<<2(sp)
.endr
addi sp, sp, 24
#endif
ret
endfunc
.endm
.irp len,16,8,4 .irp len,16,8,4
put_vp8_bilin_h_v \len h a5 put_vp8_bilin_h_v \len h a5
put_vp8_bilin_h_v \len v a6 put_vp8_bilin_h_v \len v a6
@ -244,4 +321,8 @@ epel \len 6 h
epel \len 4 h epel \len 4 h
epel \len 6 v epel \len 6 v
epel \len 4 v epel \len 4 v
epel_hv \len 6 6
epel_hv \len 4 4
epel_hv \len 6 4
epel_hv \len 4 6
.endr .endr

Loading…
Cancel
Save