lavc/ac3dsp: R-V V sum_square_butterfly_int32

ac3_sum_square_bufferfly_int32_c:       61.0
ac3_sum_square_bufferfly_int32_rvv_i64: 14.7
release/7.1
Rémi Denis-Courmont 7 months ago
parent 95568c4e31
commit 6459966beb
  1. 6
      libavcodec/riscv/ac3dsp_init.c
  2. 41
      libavcodec/riscv/ac3dsp_rvv.S

@ -28,6 +28,8 @@
void ff_extract_exponents_rvb(uint8_t *exp, int32_t *coef, int nb_coefs);
void ff_float_to_fixed24_rvv(int32_t *dst, const float *src, size_t len);
void ff_sum_square_butterfly_int32_rvv(int64_t *, const int32_t *,
const int32_t *, int);
av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
{
@ -39,6 +41,10 @@ av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
c->extract_exponents = ff_extract_exponents_rvb;
if (flags & AV_CPU_FLAG_RVV_F32)
c->float_to_fixed24 = ff_float_to_fixed24_rvv;
# if __riscv_xlen >= 64
if (flags & AV_CPU_FLAG_RVV_I64)
c->sum_square_butterfly_int32 = ff_sum_square_butterfly_int32_rvv;
# endif
}
#endif
}

@ -37,3 +37,44 @@ func ff_float_to_fixed24_rvv, zve32f
ret
endfunc
#if __riscv_xlen >= 64
func ff_sum_square_butterfly_int32_rvv, zve64x
vsetvli t0, zero, e64, m8, ta, ma
vmv.v.x v0, zero
vmv.v.x v8, zero
1:
vsetvli t0, a3, e32, m2, tu, ma
vle32.v v16, (a1)
sub a3, a3, t0
vle32.v v20, (a2)
sh2add a1, t0, a1
vadd.vv v24, v16, v20
sh2add a2, t0, a2
vsub.vv v28, v16, v20
vwmacc.vv v0, v16, v16
vwmacc.vv v4, v20, v20
vwmacc.vv v8, v24, v24
vwmacc.vv v12, v28, v28
bnez a3, 1b
vsetvli t0, zero, e64, m4, ta, ma
vmv.s.x v16, zero
vmv.s.x v17, zero
vredsum.vs v16, v0, v16
vmv.s.x v18, zero
vredsum.vs v17, v4, v17
vmv.s.x v19, zero
vredsum.vs v18, v8, v18
vmv.x.s t0, v16
vredsum.vs v19, v12, v19
vmv.x.s t1, v17
sd t0, (a0)
vmv.x.s t2, v18
sd t1, 8(a0)
vmv.x.s t3, v19
sd t2, 16(a0)
sd t3, 24(a0)
ret
endfunc
#endif

Loading…
Cancel
Save