lavc/sbrdsp: R-V V hf_apply_noise functions

This is restricted to 128-bit vectors as larger vector sizes could read
past the end of the noise array. Support for future hardware with larger
vector sizes is left for some other time.

hf_apply_noise_0_c:       2319.7
hf_apply_noise_0_rvv_f32: 1229.0
hf_apply_noise_1_c:       2539.0
hf_apply_noise_1_rvv_f32: 1244.7
hf_apply_noise_2_c:       2319.7
hf_apply_noise_2_rvv_f32: 1232.7
hf_apply_noise_3_c:       2541.2
hf_apply_noise_3_rvv_f32: 1244.2
release/7.0
Rémi Denis-Courmont 1 year ago
parent 20e6195c54
commit c536e92207
  1. 17
      libavcodec/riscv/sbrdsp_init.c
  2. 67
      libavcodec/riscv/sbrdsp_rvv.S

@ -21,6 +21,7 @@
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/riscv/cpu.h"
#include "libavcodec/sbrdsp.h"
void ff_sbr_sum64x5_rvv(float *z);
@ -32,6 +33,14 @@ void ff_sbr_hf_gen_rvv(float (*X_high)[2], const float (*X_low)[2],
float bw, int start, int end);
void ff_sbr_hf_g_filt_rvv(float (*Y)[2], const float (*X_high)[40][2],
const float *g_filt, int m_max, intptr_t ixh);
void ff_sbr_hf_apply_noise_0_rvv(float (*Y)[2], const float *s,
const float *f, int n, int kx, int max);
void ff_sbr_hf_apply_noise_1_rvv(float (*Y)[2], const float *s,
const float *f, int n, int kx, int max);
void ff_sbr_hf_apply_noise_2_rvv(float (*Y)[2], const float *s,
const float *f, int n, int kx, int max);
void ff_sbr_hf_apply_noise_3_rvv(float (*Y)[2], const float *s,
const float *f, int n, int kx, int max);
av_cold void ff_sbrdsp_init_riscv(SBRDSPContext *c)
{
@ -44,6 +53,14 @@ av_cold void ff_sbrdsp_init_riscv(SBRDSPContext *c)
c->sum_square = ff_sbr_sum_square_rvv;
c->hf_gen = ff_sbr_hf_gen_rvv;
c->hf_g_filt = ff_sbr_hf_g_filt_rvv;
if (ff_get_rv_vlenb() <= 16) {
c->hf_apply_noise[0] = ff_sbr_hf_apply_noise_0_rvv;
c->hf_apply_noise[2] = ff_sbr_hf_apply_noise_2_rvv;
if (flags & AV_CPU_FLAG_RVB_BASIC) {
c->hf_apply_noise[1] = ff_sbr_hf_apply_noise_1_rvv;
c->hf_apply_noise[3] = ff_sbr_hf_apply_noise_3_rvv;
}
}
}
c->autocorrelate = ff_sbr_autocorrelate_rvv;
}

@ -243,3 +243,70 @@ func ff_sbr_hf_g_filt_rvv, zve32f
ret
endfunc
.macro hf_apply_noise n
lla a6, ff_sbr_noise_table
fmv.s.x ft0, zero
addi a6, a6, 8
1:
.if \n & 1
min t0, t0, a5 // preserve parity of t0 for v4 sign injector
vsetvli zero, t0, e32, m4, ta, mu
.else
vsetvli t0, a5, e32, m4, ta, mu
.endif
sh3add t6, a3, a6
vle32.v v8, (a1) // s_m
sub a5, a5, t0
vle32.v v12, (a2) // q_filt
sh2add a1, t0, a1
vmfeq.vf v0, v8, ft0 // s_m == 0.f
vlseg2e32.v v24, (t6) // ff_sbr_noise_table
sh2add a2, t0, a2
.if \n == 2
vfneg.v v8, v8
.endif
.if \n & 1
vfsgnjx.vv v8, v8, v4 // could equivalent use vxor.vv
.endif
add a3, t0, a3
vlseg2e32.v v16, (a0) // Y
andi a3, a3, 0x1ff
.if \n & 1
vfmul.vv v28, v12, v28
vfmacc.vv v16, v12, v24, v0.t
vmerge.vvm v28, v8, v28, v0
vfadd.vv v20, v20, v28
.else
vfmul.vv v24, v12, v24
vfmacc.vv v20, v12, v28, v0.t
vmerge.vvm v24, v8, v24, v0
vfadd.vv v16, v16, v24
.endif
vsseg2e32.v v16, (a0)
sh3add a0, t0, a0
bnez a5, 1b
ret
.endm
func ff_sbr_hf_apply_noise_0_rvv, zve32f
hf_apply_noise 0
endfunc
func ff_sbr_hf_apply_noise_3_rvv, zve32f
not a4, a4 // invert parity of kx
// fall through
endfunc
func ff_sbr_hf_apply_noise_1_rvv, zve32f
vsetvli t0, zero, e32, m4, ta, ma
vid.v v4
vxor.vx v4, v4, a4
vsll.vi v4, v4, 31 // v4[i] = (kx & 1) ? -0.f : +0.f
hf_apply_noise 1
endfunc
func ff_sbr_hf_apply_noise_2_rvv, zve32f
hf_apply_noise 2
endfunc

Loading…
Cancel
Save