dca_core: convert to lavu/tx

Thanks to Martin Storsjö <martin@martin.st> for fixing and testing the
arm32 and aarch64 changes.
pull/388/head
Lynne 2 years ago
parent 2689038f08
commit e0661fc805
No known key found for this signature in database
GPG Key ID: A2FEA5F03F034464
  1. 6
      libavcodec/aarch64/synth_filter_init.c
  2. 3
      libavcodec/aarch64/synth_filter_neon.S
  3. 12
      libavcodec/arm/synth_filter_init_arm.c
  4. 5
      libavcodec/arm/synth_filter_neon.S
  5. 5
      libavcodec/arm/synth_filter_vfp.S
  6. 23
      libavcodec/dca_core.c
  7. 5
      libavcodec/dca_core.h
  8. 10
      libavcodec/dcadsp.c
  9. 5
      libavcodec/dcadsp.h
  10. 14
      libavcodec/synth_filter.c
  11. 13
      libavcodec/synth_filter.h
  12. 7
      libavcodec/x86/synth_filter_init.c
  13. 25
      tests/checkasm/synth_filter.c

@ -32,11 +32,11 @@
AV_CHECK_OFFSET(FFTContext, imdct_half, IMDCT_HALF); AV_CHECK_OFFSET(FFTContext, imdct_half, IMDCT_HALF);
#endif #endif
void ff_synth_filter_float_neon(FFTContext *imdct, void ff_synth_filter_float_neon(AVTXContext *imdct,
float *synth_buf_ptr, int *synth_buf_offset, float *synth_buf_ptr, int *synth_buf_offset,
float synth_buf2[32], const float window[512], float synth_buf2[32], const float window[512],
float out[32], const float in[32], float out[32], float in[32],
float scale); float scale, av_tx_fn imdct_fn);
av_cold void ff_synth_filter_init_aarch64(SynthFilterContext *s) av_cold void ff_synth_filter_init_aarch64(SynthFilterContext *s)
{ {

@ -43,8 +43,8 @@
.endm .endm
function ff_synth_filter_float_neon, export=1 function ff_synth_filter_float_neon, export=1
mov x9, x7 // imdct_fn parameter
ldr w7, [x2] // *synth_buf_offset ldr w7, [x2] // *synth_buf_offset
ldr x9, [x0, #IMDCT_HALF] // imdct_half function pointer
sxtw x7, w7 sxtw x7, w7
stp x3, x4, [sp, #-64]! stp x3, x4, [sp, #-64]!
add x1, x1, x7, lsl #2 // synth_buf add x1, x1, x7, lsl #2 // synth_buf
@ -58,6 +58,7 @@ function ff_synth_filter_float_neon, export=1
str s0, [sp, #48] str s0, [sp, #48]
mov x2, x6 // in mov x2, x6 // in
mov x3, #4 // sizeof(float)
blr x9 blr x9

@ -26,17 +26,17 @@
#include "libavcodec/fft.h" #include "libavcodec/fft.h"
#include "libavcodec/synth_filter.h" #include "libavcodec/synth_filter.h"
void ff_synth_filter_float_vfp(FFTContext *imdct, void ff_synth_filter_float_vfp(AVTXContext *imdct,
float *synth_buf_ptr, int *synth_buf_offset, float *synth_buf_ptr, int *synth_buf_offset,
float synth_buf2[32], const float window[512], float synth_buf2[32], const float window[512],
float out[32], const float in[32], float out[32], float in[32],
float scale); float scale, av_tx_fn imdct_fn);
void ff_synth_filter_float_neon(FFTContext *imdct, void ff_synth_filter_float_neon(AVTXContext *imdct,
float *synth_buf_ptr, int *synth_buf_offset, float *synth_buf_ptr, int *synth_buf_offset,
float synth_buf2[32], const float window[512], float synth_buf2[32], const float window[512],
float out[32], const float in[32], float out[32], float in[32],
float scale); float scale, av_tx_fn imdct_fn);
av_cold void ff_synth_filter_init_arm(SynthFilterContext *s) av_cold void ff_synth_filter_init_arm(SynthFilterContext *s)
{ {

@ -31,10 +31,13 @@ function ff_synth_filter_float_neon, export=1
str r12, [r2] str r12, [r2]
ldr r2, [sp, #12*4] @ in ldr r2, [sp, #12*4] @ in
VFP ldr r12, [sp, #13*4] @ imdct_fn
NOVFP ldr r12, [sp, #14*4] @ imdct_fn
mov r3, #4 @ sizeof(float)
mov r9, r1 @ synth_buf mov r9, r1 @ synth_buf
VFP vpush {d0} VFP vpush {d0}
bl X(ff_imdct_half_neon) blx r12
VFP vpop {d0} VFP vpop {d0}
pop {r3} pop {r3}

@ -131,8 +131,11 @@ function ff_synth_filter_float_vfp, export=1
and lr, lr, #512-32 and lr, lr, #512-32
str lr, [P_SB_OFF] @ rotate offset, modulo buffer size, ready for next call str lr, [P_SB_OFF] @ rotate offset, modulo buffer size, ready for next call
ldr a3, [sp, #(16+6+2)*4] @ fetch in from stack, to pass to imdct_half ldr a3, [sp, #(16+6+2)*4] @ fetch in from stack, to pass to imdct_half
mov a4, #4 @ sizeof(float)
VFP ldr r12, [sp, #(16+6+3)*4] @ fetch imdct_fn from stack
NOVFP ldr r12, [sp, #(16+6+4)*4] @ fetch imdct_fn from stack
VFP vmov s16, SCALE @ imdct_half is free to corrupt s0, but it contains one of our arguments in hardfp case VFP vmov s16, SCALE @ imdct_half is free to corrupt s0, but it contains one of our arguments in hardfp case
bl X(ff_imdct_half_vfp) blx r12
VFP vmov SCALE, s16 VFP vmov SCALE, s16
fmrx OLDFPSCR, FPSCR fmrx OLDFPSCR, FPSCR

@ -2235,7 +2235,8 @@ static int filter_frame_float(DCACoreDecoder *s, AVFrame *frame)
// Filter bank reconstruction // Filter bank reconstruction
s->dcadsp->sub_qmf_float[x96_synth]( s->dcadsp->sub_qmf_float[x96_synth](
&s->synth, &s->synth,
&s->imdct[x96_synth], s->imdct[x96_synth],
s->imdct_fn[x96_synth],
output_samples[spkr], output_samples[spkr],
s->subband_samples[ch], s->subband_samples[ch],
ch < x96_nchannels ? s->x96_subband_samples[ch] : NULL, ch < x96_nchannels ? s->x96_subband_samples[ch] : NULL,
@ -2424,16 +2425,24 @@ av_cold void ff_dca_core_flush(DCACoreDecoder *s)
av_cold int ff_dca_core_init(DCACoreDecoder *s) av_cold int ff_dca_core_init(DCACoreDecoder *s)
{ {
int ret;
float scale = 1.0f;
if (!(s->float_dsp = avpriv_float_dsp_alloc(0))) if (!(s->float_dsp = avpriv_float_dsp_alloc(0)))
return -1; return -1;
if (!(s->fixed_dsp = avpriv_alloc_fixed_dsp(0))) if (!(s->fixed_dsp = avpriv_alloc_fixed_dsp(0)))
return -1; return -1;
ff_dcadct_init(&s->dcadct); ff_dcadct_init(&s->dcadct);
if (ff_mdct_init(&s->imdct[0], 6, 1, 1.0) < 0)
return -1; if ((ret = av_tx_init(&s->imdct[0], &s->imdct_fn[0], AV_TX_FLOAT_MDCT,
if (ff_mdct_init(&s->imdct[1], 7, 1, 1.0) < 0) 1, 32, &scale, 0)) < 0)
return -1; return ret;
if ((ret = av_tx_init(&s->imdct[1], &s->imdct_fn[1], AV_TX_FLOAT_MDCT,
1, 64, &scale, 0)) < 0)
return ret;
ff_synth_filter_init(&s->synth); ff_synth_filter_init(&s->synth);
s->x96_rand = 1; s->x96_rand = 1;
@ -2445,8 +2454,8 @@ av_cold void ff_dca_core_close(DCACoreDecoder *s)
av_freep(&s->float_dsp); av_freep(&s->float_dsp);
av_freep(&s->fixed_dsp); av_freep(&s->fixed_dsp);
ff_mdct_end(&s->imdct[0]); av_tx_uninit(&s->imdct[0]);
ff_mdct_end(&s->imdct[1]); av_tx_uninit(&s->imdct[1]);
av_freep(&s->subband_buffer); av_freep(&s->subband_buffer);
s->subband_size = 0; s->subband_size = 0;

@ -24,6 +24,7 @@
#include "libavutil/float_dsp.h" #include "libavutil/float_dsp.h"
#include "libavutil/fixed_dsp.h" #include "libavutil/fixed_dsp.h"
#include "libavutil/mem_internal.h" #include "libavutil/mem_internal.h"
#include "libavutil/tx.h"
#include "avcodec.h" #include "avcodec.h"
#include "get_bits.h" #include "get_bits.h"
@ -33,7 +34,6 @@
#include "dcadct.h" #include "dcadct.h"
#include "dcamath.h" #include "dcamath.h"
#include "dcahuff.h" #include "dcahuff.h"
#include "fft.h"
#include "synth_filter.h" #include "synth_filter.h"
#define DCA_CHANNELS 7 #define DCA_CHANNELS 7
@ -190,7 +190,8 @@ typedef struct DCACoreDecoder {
DCADSPData dcadsp_data[DCA_CHANNELS]; ///< FIR history buffers DCADSPData dcadsp_data[DCA_CHANNELS]; ///< FIR history buffers
DCADSPContext *dcadsp; DCADSPContext *dcadsp;
DCADCTContext dcadct; DCADCTContext dcadct;
FFTContext imdct[2]; AVTXContext *imdct[2];
av_tx_fn imdct_fn[2];
SynthFilterContext synth; SynthFilterContext synth;
AVFloatDSPContext *float_dsp; AVFloatDSPContext *float_dsp;
AVFixedDSPContext *fixed_dsp; AVFixedDSPContext *fixed_dsp;

@ -114,7 +114,8 @@ static void lfe_x96_float_c(float *dst, const float *src,
} }
static void sub_qmf32_float_c(SynthFilterContext *synth, static void sub_qmf32_float_c(SynthFilterContext *synth,
FFTContext *imdct, AVTXContext *imdct,
av_tx_fn imdct_fn,
float *pcm_samples, float *pcm_samples,
int32_t **subband_samples_lo, int32_t **subband_samples_lo,
int32_t **subband_samples_hi, int32_t **subband_samples_hi,
@ -137,13 +138,14 @@ static void sub_qmf32_float_c(SynthFilterContext *synth,
// One subband sample generates 32 interpolated ones // One subband sample generates 32 interpolated ones
synth->synth_filter_float(imdct, hist1, offset, synth->synth_filter_float(imdct, hist1, offset,
hist2, filter_coeff, hist2, filter_coeff,
pcm_samples, input, scale); pcm_samples, input, scale, imdct_fn);
pcm_samples += 32; pcm_samples += 32;
} }
} }
static void sub_qmf64_float_c(SynthFilterContext *synth, static void sub_qmf64_float_c(SynthFilterContext *synth,
FFTContext *imdct, AVTXContext *imdct,
av_tx_fn imdct_fn,
float *pcm_samples, float *pcm_samples,
int32_t **subband_samples_lo, int32_t **subband_samples_lo,
int32_t **subband_samples_hi, int32_t **subband_samples_hi,
@ -186,7 +188,7 @@ static void sub_qmf64_float_c(SynthFilterContext *synth,
// One subband sample generates 64 interpolated ones // One subband sample generates 64 interpolated ones
synth->synth_filter_float_64(imdct, hist1, offset, synth->synth_filter_float_64(imdct, hist1, offset,
hist2, filter_coeff, hist2, filter_coeff,
pcm_samples, input, scale); pcm_samples, input, scale, imdct_fn);
pcm_samples += 64; pcm_samples += 64;
} }
} }

@ -22,8 +22,8 @@
#define AVCODEC_DCADSP_H #define AVCODEC_DCADSP_H
#include "libavutil/common.h" #include "libavutil/common.h"
#include "libavutil/tx.h"
#include "fft.h"
#include "dcadct.h" #include "dcadct.h"
#include "synth_filter.h" #include "synth_filter.h"
@ -47,7 +47,8 @@ typedef struct DCADSPContext {
float *hist, ptrdiff_t len); float *hist, ptrdiff_t len);
void (*sub_qmf_float[2])(SynthFilterContext *synth, void (*sub_qmf_float[2])(SynthFilterContext *synth,
FFTContext *imdct, AVTXContext *imdct,
av_tx_fn imdct_fn,
float *pcm_samples, float *pcm_samples,
int32_t **subband_samples_lo, int32_t **subband_samples_lo,
int32_t **subband_samples_hi, int32_t **subband_samples_hi,

@ -24,15 +24,16 @@
#include "dcamath.h" #include "dcamath.h"
#include "synth_filter.h" #include "synth_filter.h"
static void synth_filter_float(FFTContext *imdct, static void synth_filter_float(AVTXContext *imdct,
float *synth_buf_ptr, int *synth_buf_offset, float *synth_buf_ptr, int *synth_buf_offset,
float synth_buf2[32], const float window[512], float synth_buf2[32], const float window[512],
float out[32], const float in[32], float scale) float out[32], float in[32], float scale,
av_tx_fn imdct_fn)
{ {
float *synth_buf = synth_buf_ptr + *synth_buf_offset; float *synth_buf = synth_buf_ptr + *synth_buf_offset;
int i, j; int i, j;
imdct->imdct_half(imdct, synth_buf, in); imdct_fn(imdct, synth_buf, in, sizeof(float));
for (i = 0; i < 16; i++) { for (i = 0; i < 16; i++) {
float a = synth_buf2[i ]; float a = synth_buf2[i ];
@ -60,15 +61,16 @@ static void synth_filter_float(FFTContext *imdct,
*synth_buf_offset = (*synth_buf_offset - 32) & 511; *synth_buf_offset = (*synth_buf_offset - 32) & 511;
} }
static void synth_filter_float_64(FFTContext *imdct, static void synth_filter_float_64(AVTXContext *imdct,
float *synth_buf_ptr, int *synth_buf_offset, float *synth_buf_ptr, int *synth_buf_offset,
float synth_buf2[64], const float window[1024], float synth_buf2[64], const float window[1024],
float out[64], const float in[64], float scale) float out[64], float in[64], float scale,
av_tx_fn imdct_fn)
{ {
float *synth_buf = synth_buf_ptr + *synth_buf_offset; float *synth_buf = synth_buf_ptr + *synth_buf_offset;
int i, j; int i, j;
imdct->imdct_half(imdct, synth_buf, in); imdct_fn(imdct, synth_buf, in, sizeof(float));
for (i = 0; i < 32; i++) { for (i = 0; i < 32; i++) {
float a = synth_buf2[i ]; float a = synth_buf2[i ];

@ -21,19 +21,20 @@
#ifndef AVCODEC_SYNTH_FILTER_H #ifndef AVCODEC_SYNTH_FILTER_H
#define AVCODEC_SYNTH_FILTER_H #define AVCODEC_SYNTH_FILTER_H
#include "fft.h" #include "libavutil/tx.h"
#include "dcadct.h" #include "dcadct.h"
typedef struct SynthFilterContext { typedef struct SynthFilterContext {
void (*synth_filter_float)(FFTContext *imdct, void (*synth_filter_float)(AVTXContext *imdct,
float *synth_buf_ptr, int *synth_buf_offset, float *synth_buf_ptr, int *synth_buf_offset,
float synth_buf2[32], const float window[512], float synth_buf2[32], const float window[512],
float out[32], const float in[32], float out[32], float in[32],
float scale); float scale, av_tx_fn imdct_fn);
void (*synth_filter_float_64)(FFTContext *imdct, void (*synth_filter_float_64)(AVTXContext *imdct,
float *synth_buf_ptr, int *synth_buf_offset, float *synth_buf_ptr, int *synth_buf_offset,
float synth_buf2[64], const float window[1024], float synth_buf2[64], const float window[1024],
float out[64], const float in[64], float scale); float out[64], float in[64], float scale,
av_tx_fn imdct_fn);
void (*synth_filter_fixed)(DCADCTContext *imdct, void (*synth_filter_fixed)(DCADCTContext *imdct,
int32_t *synth_buf_ptr, int *synth_buf_offset, int32_t *synth_buf_ptr, int *synth_buf_offset,
int32_t synth_buf2[32], const int32_t window[512], int32_t synth_buf2[32], const int32_t window[512],

@ -27,14 +27,15 @@
void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32], \ void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32], \
const float window[512], \ const float window[512], \
float out[32], intptr_t offset, float scale); \ float out[32], intptr_t offset, float scale); \
static void synth_filter_##opt(FFTContext *imdct, \ static void synth_filter_##opt(AVTXContext *imdct, \
float *synth_buf_ptr, int *synth_buf_offset, \ float *synth_buf_ptr, int *synth_buf_offset, \
float synth_buf2[32], const float window[512], \ float synth_buf2[32], const float window[512], \
float out[32], const float in[32], float scale) \ float out[32], float in[32], float scale, \
av_tx_fn imdct_fn) \
{ \ { \
float *synth_buf= synth_buf_ptr + *synth_buf_offset; \ float *synth_buf= synth_buf_ptr + *synth_buf_offset; \
\ \
imdct->imdct_half(imdct, synth_buf, in); \ imdct_fn(imdct, synth_buf, in, sizeof(float)); \
\ \
ff_synth_filter_inner_##opt(synth_buf, synth_buf2, window, \ ff_synth_filter_inner_##opt(synth_buf, synth_buf2, window, \
out, *synth_buf_offset, scale); \ out, *synth_buf_offset, scale); \

@ -26,6 +26,7 @@
#include "libavutil/internal.h" #include "libavutil/internal.h"
#include "libavutil/intfloat.h" #include "libavutil/intfloat.h"
#include "libavutil/mem_internal.h" #include "libavutil/mem_internal.h"
#include "libavutil/tx.h"
#include "libavcodec/dcadata.h" #include "libavcodec/dcadata.h"
#include "libavcodec/synth_filter.h" #include "libavcodec/synth_filter.h"
@ -45,10 +46,12 @@
void checkasm_check_synth_filter(void) void checkasm_check_synth_filter(void)
{ {
FFTContext imdct; float scale = 1.0;
AVTXContext *imdct;
av_tx_fn imdct_fn;
SynthFilterContext synth; SynthFilterContext synth;
ff_mdct_init(&imdct, 6, 1, 1.0); av_tx_init(&imdct, &imdct_fn, AV_TX_FLOAT_MDCT, 0, 16, &scale, 0);
ff_synth_filter_init(&synth); ff_synth_filter_init(&synth);
if (check_func(synth.synth_filter_float, "synth_filter_float")) { if (check_func(synth.synth_filter_float, "synth_filter_float")) {
@ -65,8 +68,8 @@ void checkasm_check_synth_filter(void)
float scale = 1.0f; float scale = 1.0f;
int i, offset0 = 0, offset1 = 0, offset_b = 0; int i, offset0 = 0, offset1 = 0, offset_b = 0;
declare_func(void, FFTContext *, float *, int *, float[32], const float[512], declare_func(void, AVTXContext *, float *, int *,
float[32], float[32], float); float[32], const float[512], float[32], float[32], float, av_tx_fn);
memset(buf2_0, 0, sizeof(*buf2_0) * BUF_SIZE); memset(buf2_0, 0, sizeof(*buf2_0) * BUF_SIZE);
memset(buf2_1, 0, sizeof(*buf2_1) * BUF_SIZE); memset(buf2_1, 0, sizeof(*buf2_1) * BUF_SIZE);
@ -86,10 +89,10 @@ void checkasm_check_synth_filter(void)
randomize_input(); randomize_input();
call_ref(&imdct, buf0, &offset0, buf2_0, window, call_ref(imdct, buf0, &offset0, buf2_0, window,
out0, in, scale); out0, in, scale, imdct_fn);
call_new(&imdct, buf1, &offset1, buf2_1, window, call_new(imdct, buf1, &offset1, buf2_1, window,
out1, in, scale); out1, in, scale, imdct_fn);
if (offset0 != offset1) { if (offset0 != offset1) {
fail(); fail();
@ -113,11 +116,11 @@ void checkasm_check_synth_filter(void)
} }
} }
bench_new(&imdct, buf_b, &offset_b, buf2_b, window, bench_new(imdct, buf_b, &offset_b, buf2_b, window,
out_b, in, scale); out_b, in, scale, imdct_fn);
} }
} }
ff_mdct_end(&imdct); av_tx_uninit(&imdct);
report("synth_filter"); report("synth_filter");
} }

Loading…
Cancel
Save