C908: vp9_dc_8x8_8bpp_c: 46.0 vp9_dc_8x8_8bpp_rvv_i64: 41.0 vp9_dc_16x16_8bpp_c: 109.2 vp9_dc_16x16_8bpp_rvv_i32: 72.7 vp9_dc_32x32_8bpp_c: 365.2 vp9_dc_32x32_8bpp_rvv_i32: 165.5 vp9_dc_127_8x8_8bpp_c: 23.0 vp9_dc_127_8x8_8bpp_rvv_i64: 22.0 vp9_dc_127_16x16_8bpp_c: 70.2 vp9_dc_127_16x16_8bpp_rvv_i32: 50.2 vp9_dc_127_32x32_8bpp_c: 295.2 vp9_dc_127_32x32_8bpp_rvv_i32: 136.7 vp9_dc_128_8x8_8bpp_c: 23.0 vp9_dc_128_8x8_8bpp_rvv_i64: 22.0 vp9_dc_128_16x16_8bpp_c: 70.2 vp9_dc_128_16x16_8bpp_rvv_i32: 50.2 vp9_dc_128_32x32_8bpp_c: 295.2 vp9_dc_128_32x32_8bpp_rvv_i32: 136.7 vp9_dc_129_8x8_8bpp_c: 23.0 vp9_dc_129_8x8_8bpp_rvv_i64: 22.0 vp9_dc_129_16x16_8bpp_c: 70.2 vp9_dc_129_16x16_8bpp_rvv_i32: 50.2 vp9_dc_129_32x32_8bpp_c: 295.2 vp9_dc_129_32x32_8bpp_rvv_i32: 136.7 vp9_dc_left_8x8_8bpp_c: 38.0 vp9_dc_left_8x8_8bpp_rvv_i64: 36.0 vp9_dc_left_16x16_8bpp_c: 93.2 vp9_dc_left_16x16_8bpp_rvv_i32: 67.7 vp9_dc_left_32x32_8bpp_c: 333.2 vp9_dc_left_32x32_8bpp_rvv_i32: 158.5 vp9_dc_top_8x8_8bpp_c: 38.7 vp9_dc_top_8x8_8bpp_rvv_i64: 36.0 vp9_dc_top_16x16_8bpp_c: 93.2 vp9_dc_top_16x16_8bpp_rvv_i32: 67.7 vp9_dc_top_32x32_8bpp_c: 333.2 vp9_dc_top_32x32_8bpp_rvv_i32: 156.2 Signed-off-by: Rémi Denis-Courmont <remi@remlab.net>release/7.1
parent
dedc2456bf
commit
c3a96f97f8
6 changed files with 352 additions and 0 deletions
@ -0,0 +1,115 @@ |
||||
/* |
||||
* Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS). |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/riscv/asm.S" |
||||
|
||||
.macro avgdc size |
||||
vwredsumu.vs v16, v8, v16 |
||||
vsetivli zero, 1, e16, m1, ta, ma |
||||
vmv.x.s t1, v16 |
||||
addi t1, t1, 1 << (\size - 1) |
||||
srai t1, t1, \size |
||||
.endm |
||||
|
||||
.macro getdc type size |
||||
.ifc \type,top |
||||
vmv.v.x v16, zero |
||||
vle8.v v8, (a3) |
||||
avgdc \size |
||||
.elseif \type == left |
||||
vmv.v.x v16, zero |
||||
vle8.v v8, (a2) |
||||
avgdc \size |
||||
.elseif \type == dc |
||||
vmv.v.x v16, zero |
||||
vle8.v v8, (a2) |
||||
vwredsumu.vs v16, v8, v16 |
||||
vle8.v v8, (a3) |
||||
avgdc \size |
||||
.else |
||||
li t1, \type |
||||
.endif |
||||
.endm |
||||
|
||||
.macro dc_e32 type size n restore |
||||
.ifc \size,32 |
||||
li t0, 32 |
||||
vsetvli zero, t0, e8, m2, ta, ma |
||||
.else |
||||
vsetivli zero, 16, e8, m1, ta, ma |
||||
.endif |
||||
getdc \type \n |
||||
|
||||
.if \restore == 1 && \size == 32 |
||||
vsetvli zero, t0, e8, m2, ta, ma |
||||
.elseif \restore == 1 && \size == 16 |
||||
vsetivli zero, 16, e8, m1, ta, ma |
||||
.endif |
||||
vmv.v.x v0, t1 |
||||
|
||||
.rept \size |
||||
vse8.v v0, (a0) |
||||
add a0, a0, a1 |
||||
.endr |
||||
|
||||
ret |
||||
.endm |
||||
|
||||
.macro dc_e64 type size n restore |
||||
vsetivli zero, 8, e8, mf2, ta, ma |
||||
getdc \type \n |
||||
|
||||
li t0, 64 |
||||
vsetvli zero, t0, e8, m4, ta, ma |
||||
vmv.v.x v0, t1 |
||||
vsetivli zero, 8, e8, mf2, ta, ma |
||||
vsse64.v v0, (a0), a1 |
||||
|
||||
ret |
||||
.endm |
||||
|
||||
.macro func_dc name size type n restore ext |
||||
func ff_\()\name\()_\()\size\()x\size\()_rvv, \ext |
||||
.ifc \size,8 |
||||
dc_e64 \type \size \n \restore |
||||
.else |
||||
dc_e32 \type \size \n \restore |
||||
.endif |
||||
endfunc |
||||
.endm |
||||
|
||||
func_dc dc_127 32 127 0 0 zve32x |
||||
func_dc dc_127 16 127 0 0 zve32x |
||||
func_dc dc_127 8 127 0 0 zve64x |
||||
func_dc dc_128 32 128 0 0 zve32x |
||||
func_dc dc_128 16 128 0 0 zve32x |
||||
func_dc dc_128 8 128 0 0 zve64x |
||||
func_dc dc_129 32 129 0 0 zve32x |
||||
func_dc dc_129 16 129 0 0 zve32x |
||||
func_dc dc_129 8 129 0 0 zve64x |
||||
func_dc dc 32 dc 6 1 zve32x |
||||
func_dc dc 16 dc 5 1 zve32x |
||||
func_dc dc 8 dc 4 0 zve64x |
||||
func_dc dc_left 32 left 5 1 zve32x |
||||
func_dc dc_left 16 left 4 1 zve32x |
||||
func_dc dc_left 8 left 3 0 zve64x |
||||
func_dc dc_top 32 top 5 1 zve32x |
||||
func_dc dc_top 16 top 4 1 zve32x |
||||
func_dc dc_top 8 top 3 0 zve64x |
@ -0,0 +1,171 @@ |
||||
/*
|
||||
* Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS). |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVCODEC_RISCV_VP9DSP_RISCV_H |
||||
#define AVCODEC_RISCV_VP9DSP_RISCV_H |
||||
|
||||
#include <stddef.h> |
||||
#include <stdint.h> |
||||
|
||||
void ff_dc_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, |
||||
const uint8_t *a); |
||||
void ff_dc_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, |
||||
const uint8_t *a); |
||||
void ff_dc_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, |
||||
const uint8_t *a); |
||||
void ff_dc_top_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, |
||||
const uint8_t *a); |
||||
void ff_dc_top_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, |
||||
const uint8_t *a); |
||||
void ff_dc_top_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, |
||||
const uint8_t *a); |
||||
void ff_dc_left_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, |
||||
const uint8_t *a); |
||||
void ff_dc_left_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, |
||||
const uint8_t *a); |
||||
void ff_dc_left_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, |
||||
const uint8_t *a); |
||||
void ff_dc_127_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, |
||||
const uint8_t *a); |
||||
void ff_dc_127_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, |
||||
const uint8_t *a); |
||||
void ff_dc_127_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, |
||||
const uint8_t *a); |
||||
void ff_dc_128_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, |
||||
const uint8_t *a); |
||||
void ff_dc_128_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, |
||||
const uint8_t *a); |
||||
void ff_dc_128_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, |
||||
const uint8_t *a); |
||||
void ff_dc_129_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, |
||||
const uint8_t *a); |
||||
void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, |
||||
const uint8_t *a); |
||||
void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, |
||||
const uint8_t *a); |
||||
|
||||
#define VP9_8TAP_RISCV_RVV_FUNC(SIZE, type, type_idx) \ |
||||
void ff_put_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
|
||||
const uint8_t *src, \
|
||||
ptrdiff_t srcstride, \
|
||||
int h, int mx, int my); \
|
||||
\
|
||||
void ff_put_8tap_##type##_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride, \
|
||||
const uint8_t *src, \
|
||||
ptrdiff_t srcstride, \
|
||||
int h, int mx, int my); \
|
||||
\
|
||||
void ff_put_8tap_##type##_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride, \
|
||||
const uint8_t *src, \
|
||||
ptrdiff_t srcstride, \
|
||||
int h, int mx, int my); \
|
||||
\
|
||||
void ff_avg_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
|
||||
const uint8_t *src, \
|
||||
ptrdiff_t srcstride, \
|
||||
int h, int mx, int my); \
|
||||
\
|
||||
void ff_avg_8tap_##type##_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride, \
|
||||
const uint8_t *src, \
|
||||
ptrdiff_t srcstride, \
|
||||
int h, int mx, int my); \
|
||||
\
|
||||
void ff_avg_8tap_##type##_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride, \
|
||||
const uint8_t *src, \
|
||||
ptrdiff_t srcstride, \
|
||||
int h, int mx, int my); |
||||
|
||||
#define VP9_BILINEAR_RISCV_RVV_FUNC(SIZE) \ |
||||
void ff_put_bilin_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
|
||||
const uint8_t *src, ptrdiff_t srcstride, \
|
||||
int h, int mx, int my); \
|
||||
\
|
||||
void ff_put_bilin_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride, \
|
||||
const uint8_t *src, ptrdiff_t srcstride, \
|
||||
int h, int mx, int my); \
|
||||
\
|
||||
void ff_put_bilin_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride, \
|
||||
const uint8_t *src, ptrdiff_t srcstride, \
|
||||
int h, int mx, int my); \
|
||||
\
|
||||
void ff_avg_bilin_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
|
||||
const uint8_t *src, ptrdiff_t srcstride, \
|
||||
int h, int mx, int my); \
|
||||
\
|
||||
void ff_avg_bilin_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride, \
|
||||
const uint8_t *src, ptrdiff_t srcstride, \
|
||||
int h, int mx, int my); \
|
||||
\
|
||||
void ff_avg_bilin_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride, \
|
||||
const uint8_t *src, ptrdiff_t srcstride, \
|
||||
int h, int mx, int my); |
||||
|
||||
#define VP9_COPY_AVG_RISCV_RVV_FUNC(SIZE) \ |
||||
void ff_copy##SIZE##_rvv(uint8_t *dst, ptrdiff_t dststride, \
|
||||
const uint8_t *src, ptrdiff_t srcstride, \
|
||||
int h, int mx, int my); \
|
||||
\
|
||||
void ff_avg##SIZE##_rvv(uint8_t *dst, ptrdiff_t dststride, \
|
||||
const uint8_t *src, ptrdiff_t srcstride, \
|
||||
int h, int mx, int my); |
||||
|
||||
VP9_8TAP_RISCV_RVV_FUNC(64, regular, FILTER_8TAP_REGULAR); |
||||
VP9_8TAP_RISCV_RVV_FUNC(32, regular, FILTER_8TAP_REGULAR); |
||||
VP9_8TAP_RISCV_RVV_FUNC(16, regular, FILTER_8TAP_REGULAR); |
||||
VP9_8TAP_RISCV_RVV_FUNC(8, regular, FILTER_8TAP_REGULAR); |
||||
VP9_8TAP_RISCV_RVV_FUNC(4, regular, FILTER_8TAP_REGULAR); |
||||
|
||||
VP9_8TAP_RISCV_RVV_FUNC(64, sharp, FILTER_8TAP_SHARP); |
||||
VP9_8TAP_RISCV_RVV_FUNC(32, sharp, FILTER_8TAP_SHARP); |
||||
VP9_8TAP_RISCV_RVV_FUNC(16, sharp, FILTER_8TAP_SHARP); |
||||
VP9_8TAP_RISCV_RVV_FUNC(8, sharp, FILTER_8TAP_SHARP); |
||||
VP9_8TAP_RISCV_RVV_FUNC(4, sharp, FILTER_8TAP_SHARP); |
||||
|
||||
VP9_8TAP_RISCV_RVV_FUNC(64, smooth, FILTER_8TAP_SMOOTH); |
||||
VP9_8TAP_RISCV_RVV_FUNC(32, smooth, FILTER_8TAP_SMOOTH); |
||||
VP9_8TAP_RISCV_RVV_FUNC(16, smooth, FILTER_8TAP_SMOOTH); |
||||
VP9_8TAP_RISCV_RVV_FUNC(8, smooth, FILTER_8TAP_SMOOTH); |
||||
VP9_8TAP_RISCV_RVV_FUNC(4, smooth, FILTER_8TAP_SMOOTH); |
||||
|
||||
VP9_BILINEAR_RISCV_RVV_FUNC(64); |
||||
VP9_BILINEAR_RISCV_RVV_FUNC(32); |
||||
VP9_BILINEAR_RISCV_RVV_FUNC(16); |
||||
VP9_BILINEAR_RISCV_RVV_FUNC(8); |
||||
VP9_BILINEAR_RISCV_RVV_FUNC(4); |
||||
|
||||
VP9_COPY_AVG_RISCV_RVV_FUNC(64); |
||||
VP9_COPY_AVG_RISCV_RVV_FUNC(32); |
||||
VP9_COPY_AVG_RISCV_RVV_FUNC(16); |
||||
VP9_COPY_AVG_RISCV_RVV_FUNC(8); |
||||
VP9_COPY_AVG_RISCV_RVV_FUNC(4); |
||||
|
||||
#define VP9_COPY_RISCV_RVI_FUNC(SIZE) \ |
||||
void ff_copy##SIZE##_rvi(uint8_t *dst, ptrdiff_t dststride, \
|
||||
const uint8_t *src, ptrdiff_t srcstride, \
|
||||
int h, int mx, int my); |
||||
|
||||
VP9_COPY_RISCV_RVI_FUNC(8); |
||||
VP9_COPY_RISCV_RVI_FUNC(4); |
||||
|
||||
#undef VP9_8TAP_RISCV_RVV_FUNC |
||||
#undef VP9_BILINEAR_RISCV_RVV_FUNC |
||||
#undef VP9_COPY_AVG_RISCV_RVV_FUNC |
||||
|
||||
#endif // #ifndef AVCODEC_RISCV_VP9DSP_RISCV_H
|
@ -0,0 +1,61 @@ |
||||
/*
|
||||
* Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS). |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lervvr General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lervvr General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lervvr General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/riscv/cpu.h" |
||||
#include "libavcodec/vp9dsp.h" |
||||
#include "vp9dsp.h" |
||||
|
||||
static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp) |
||||
{ |
||||
#if HAVE_RVV |
||||
int flags = av_get_cpu_flags(); |
||||
|
||||
if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) { |
||||
dsp->intra_pred[TX_8X8][DC_PRED] = ff_dc_8x8_rvv; |
||||
dsp->intra_pred[TX_8X8][LEFT_DC_PRED] = ff_dc_left_8x8_rvv; |
||||
dsp->intra_pred[TX_8X8][DC_127_PRED] = ff_dc_127_8x8_rvv; |
||||
dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv; |
||||
dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv; |
||||
dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv; |
||||
} |
||||
|
||||
if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) { |
||||
dsp->intra_pred[TX_32X32][DC_PRED] = ff_dc_32x32_rvv; |
||||
dsp->intra_pred[TX_16X16][DC_PRED] = ff_dc_16x16_rvv; |
||||
dsp->intra_pred[TX_32X32][LEFT_DC_PRED] = ff_dc_left_32x32_rvv; |
||||
dsp->intra_pred[TX_16X16][LEFT_DC_PRED] = ff_dc_left_16x16_rvv; |
||||
dsp->intra_pred[TX_32X32][DC_127_PRED] = ff_dc_127_32x32_rvv; |
||||
dsp->intra_pred[TX_16X16][DC_127_PRED] = ff_dc_127_16x16_rvv; |
||||
dsp->intra_pred[TX_32X32][DC_128_PRED] = ff_dc_128_32x32_rvv; |
||||
dsp->intra_pred[TX_16X16][DC_128_PRED] = ff_dc_128_16x16_rvv; |
||||
dsp->intra_pred[TX_32X32][DC_129_PRED] = ff_dc_129_32x32_rvv; |
||||
dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv; |
||||
dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv; |
||||
dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv; |
||||
} |
||||
#endif |
||||
} |
||||
|
||||
av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact) |
||||
{ |
||||
vp9dsp_intrapred_init_rvv(dsp, bpp); |
||||
} |
Loading…
Reference in new issue