mirror of https://github.com/FFmpeg/FFmpeg.git
This works out a bit more favourably than VP8's due to: - additional multiplications that can be vectored, - hardware-supported fixed-point rounding mode. vp7_luma_dc_wht_c: 3.2 vp7_luma_dc_wht_rvv_i64: 2.0release/7.1
parent
91b5ea7bb9
commit
fd39997f72
5 changed files with 144 additions and 0 deletions
@ -0,0 +1,41 @@ |
||||
/*
|
||||
* Copyright (c) 2024 Rémi Denis-Courmont. |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/riscv/cpu.h" |
||||
#include "libavcodec/vp8dsp.h" |
||||
|
||||
void ff_vp7_luma_dc_wht_rvv(int16_t block[4][4][16], int16_t dc[16]); |
||||
|
||||
av_cold void ff_vp7dsp_init_riscv(VP8DSPContext *c) |
||||
{ |
||||
#if HAVE_RVV |
||||
int flags = av_get_cpu_flags(); |
||||
|
||||
if ((flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128)) { |
||||
#if __riscv_xlen >= 64 |
||||
c->vp8_luma_dc_wht = ff_vp7_luma_dc_wht_rvv; |
||||
#endif |
||||
} |
||||
#endif |
||||
} |
@ -0,0 +1,95 @@ |
||||
/* |
||||
* Copyright (c) 2024 Rémi Denis-Courmont. |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/riscv/asm.S" |
||||
|
||||
#if __riscv_xlen >= 64 |
||||
func ff_vp7_luma_dc_wht_rvv, zve32x |
||||
csrwi vxrm, 0 |
||||
li t4, 12540 |
||||
vsetivli zero, 4, e16, mf2, ta, ma |
||||
vlseg4e16.v v0, (a1) |
||||
li t6, 30274 |
||||
vwmul.vx v8, v1, t4 |
||||
li t5, 23170 |
||||
vwmul.vx v9, v3, t6 |
||||
addi t1, sp, -12 * 2 |
||||
vwmul.vx v10, v1, t6 |
||||
addi t2, sp, -8 * 2 |
||||
vwmul.vx v11, v3, t4 |
||||
addi t3, sp, -4 * 2 |
||||
vwadd.vv v4, v0, v2 |
||||
addi sp, sp, -16 * 2 |
||||
vwsub.vv v5, v0, v2 |
||||
vsetvli zero, zero, e32, m1, ta, ma |
||||
vadd.vv v7, v10, v11 |
||||
vmul.vx v4, v4, t5 |
||||
vsub.vv v6, v8, v9 |
||||
vmul.vx v5, v5, t5 |
||||
vadd.vv v0, v4, v7 |
||||
vsub.vv v3, v4, v7 |
||||
vadd.vv v1, v5, v6 |
||||
vsub.vv v2, v5, v6 |
||||
vsetvli zero, zero, e16, mf2, ta, ma |
||||
vnsra.wi v4, v0, 14 |
||||
vnsra.wi v7, v3, 14 |
||||
vnsra.wi v5, v1, 14 |
||||
vnsra.wi v6, v2, 14 |
||||
vsseg4e16.v v4, (sp) |
||||
vle16.v v0, (sp) |
||||
vle16.v v1, (t1) |
||||
vle16.v v2, (t2) |
||||
vle16.v v3, (t3) |
||||
vwmul.vx v8, v1, t4 |
||||
li t0, 16 * 2 |
||||
vwmul.vx v9, v3, t6 |
||||
addi t1, a0, 1 * 4 * 16 * 2 |
||||
vwmul.vx v10, v1, t6 |
||||
addi t2, a0, 2 * 4 * 16 * 2 |
||||
vwmul.vx v11, v3, t4 |
||||
addi t3, a0, 3 * 4 * 16 * 2 |
||||
vwadd.vv v4, v0, v2 |
||||
vwsub.vv v5, v0, v2 |
||||
vsetvli zero, zero, e32, m1, ta, ma |
||||
vmul.vx v4, v4, t5 |
||||
sd zero, (a1) |
||||
vadd.vv v7, v10, v11 |
||||
sd zero, 8(a1) |
||||
vmul.vx v5, v5, t5 |
||||
sd zero, 16(a1) |
||||
vsub.vv v6, v8, v9 |
||||
sd zero, 24(a1) |
||||
vadd.vv v0, v4, v7 |
||||
addi sp, sp, 16 * 2 |
||||
vsub.vv v3, v4, v7 |
||||
vadd.vv v1, v5, v6 |
||||
vsub.vv v2, v5, v6 |
||||
vsetvli zero, zero, e16, mf2, ta, ma |
||||
vnclip.wi v4, v0, 18 |
||||
vnclip.wi v5, v1, 18 |
||||
vnclip.wi v6, v2, 18 |
||||
vnclip.wi v7, v3, 18 |
||||
vsse16.v v4, (a0), t0 |
||||
vsse16.v v5, (t1), t0 |
||||
vsse16.v v6, (t2), t0 |
||||
vsse16.v v7, (t3), t0 |
||||
ret |
||||
endfunc |
||||
#endif |
Loading…
Reference in new issue