|
|
|
@ -940,12 +940,12 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_naive_inv_def) = { |
|
|
|
|
.prio = FF_TX_PRIO_MIN, |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
static av_cold int TX_NAME(ff_tx_mdct_sr_init)(AVTXContext *s, |
|
|
|
|
const FFTXCodelet *cd, |
|
|
|
|
uint64_t flags, |
|
|
|
|
FFTXCodeletOptions *opts, |
|
|
|
|
int len, int inv, |
|
|
|
|
const void *scale) |
|
|
|
|
static av_cold int TX_NAME(ff_tx_mdct_init)(AVTXContext *s, |
|
|
|
|
const FFTXCodelet *cd, |
|
|
|
|
uint64_t flags, |
|
|
|
|
FFTXCodeletOptions *opts, |
|
|
|
|
int len, int inv, |
|
|
|
|
const void *scale) |
|
|
|
|
{ |
|
|
|
|
int ret; |
|
|
|
|
FFTXCodeletOptions sub_opts = { .invert_lookup = inv }; |
|
|
|
@ -955,32 +955,49 @@ static av_cold int TX_NAME(ff_tx_mdct_sr_init)(AVTXContext *s, |
|
|
|
|
|
|
|
|
|
flags &= ~FF_TX_OUT_OF_PLACE; /* We want the subtransform to be */ |
|
|
|
|
flags |= AV_TX_INPLACE; /* in-place */ |
|
|
|
|
flags |= FF_TX_PRESHUFFLE; /* This function handles the permute step */ |
|
|
|
|
flags |= FF_TX_PRESHUFFLE; /* First try with an in-place transform */ |
|
|
|
|
|
|
|
|
|
if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts, len >> 1, |
|
|
|
|
inv, scale))) |
|
|
|
|
return ret; |
|
|
|
|
inv, scale))) { |
|
|
|
|
flags &= ~FF_TX_PRESHUFFLE; /* Now try with a generic FFT */ |
|
|
|
|
if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts, len >> 1, |
|
|
|
|
inv, scale))) |
|
|
|
|
return ret; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* If we need to preshuffle just steal the map from the subcontext */ |
|
|
|
|
if (s->sub[0].flags & FF_TX_PRESHUFFLE) { |
|
|
|
|
s->map = s->sub[0].map; |
|
|
|
|
s->sub[0].map = NULL; |
|
|
|
|
} else { |
|
|
|
|
s->map = av_malloc((len >> 1)*sizeof(*s->map)); |
|
|
|
|
if (!s->map) |
|
|
|
|
return AVERROR(ENOMEM); |
|
|
|
|
|
|
|
|
|
for (int i = 0; i < len >> 1; i++) |
|
|
|
|
s->map[i] = i; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if ((ret = TX_TAB(ff_tx_mdct_gen_exp)(s, inv ? s->sub->map : NULL))) |
|
|
|
|
if ((ret = TX_TAB(ff_tx_mdct_gen_exp)(s, inv ? s->map : NULL))) |
|
|
|
|
return ret; |
|
|
|
|
|
|
|
|
|
/* Saves a multiply in a hot path. */ |
|
|
|
|
if (inv) |
|
|
|
|
for (int i = 0; i < (s->len >> 1); i++) |
|
|
|
|
s->sub->map[i] <<= 1; |
|
|
|
|
s->map[i] <<= 1; |
|
|
|
|
|
|
|
|
|
return 0; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void TX_NAME(ff_tx_mdct_sr_fwd)(AVTXContext *s, void *_dst, void *_src, |
|
|
|
|
ptrdiff_t stride) |
|
|
|
|
static void TX_NAME(ff_tx_mdct_fwd)(AVTXContext *s, void *_dst, void *_src, |
|
|
|
|
ptrdiff_t stride) |
|
|
|
|
{ |
|
|
|
|
TXSample *src = _src, *dst = _dst; |
|
|
|
|
TXComplex *exp = s->exp, tmp, *z = _dst; |
|
|
|
|
const int len2 = s->len >> 1; |
|
|
|
|
const int len4 = s->len >> 2; |
|
|
|
|
const int len3 = len2 * 3; |
|
|
|
|
const int *sub_map = s->sub->map; |
|
|
|
|
const int *sub_map = s->map; |
|
|
|
|
|
|
|
|
|
stride /= sizeof(*dst); |
|
|
|
|
|
|
|
|
@ -1011,14 +1028,14 @@ static void TX_NAME(ff_tx_mdct_sr_fwd)(AVTXContext *s, void *_dst, void *_src, |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void TX_NAME(ff_tx_mdct_sr_inv)(AVTXContext *s, void *_dst, void *_src, |
|
|
|
|
ptrdiff_t stride) |
|
|
|
|
static void TX_NAME(ff_tx_mdct_inv)(AVTXContext *s, void *_dst, void *_src, |
|
|
|
|
ptrdiff_t stride) |
|
|
|
|
{ |
|
|
|
|
TXComplex *z = _dst, *exp = s->exp; |
|
|
|
|
const TXSample *src = _src, *in1, *in2; |
|
|
|
|
const int len2 = s->len >> 1; |
|
|
|
|
const int len4 = s->len >> 2; |
|
|
|
|
const int *sub_map = s->sub->map; |
|
|
|
|
const int *sub_map = s->map; |
|
|
|
|
|
|
|
|
|
stride /= sizeof(*src); |
|
|
|
|
in1 = src; |
|
|
|
@ -1043,28 +1060,28 @@ static void TX_NAME(ff_tx_mdct_sr_inv)(AVTXContext *s, void *_dst, void *_src, |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static const FFTXCodelet TX_NAME(ff_tx_mdct_sr_fwd_def) = { |
|
|
|
|
.name = TX_NAME_STR("mdct_sr_fwd"), |
|
|
|
|
.function = TX_NAME(ff_tx_mdct_sr_fwd), |
|
|
|
|
static const FFTXCodelet TX_NAME(ff_tx_mdct_fwd_def) = { |
|
|
|
|
.name = TX_NAME_STR("mdct_fwd"), |
|
|
|
|
.function = TX_NAME(ff_tx_mdct_fwd), |
|
|
|
|
.type = TX_TYPE(MDCT), |
|
|
|
|
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, |
|
|
|
|
.factors[0] = 2, |
|
|
|
|
.factors = { 2, TX_FACTOR_ANY }, |
|
|
|
|
.min_len = 2, |
|
|
|
|
.max_len = TX_LEN_UNLIMITED, |
|
|
|
|
.init = TX_NAME(ff_tx_mdct_sr_init), |
|
|
|
|
.init = TX_NAME(ff_tx_mdct_init), |
|
|
|
|
.cpu_flags = FF_TX_CPU_FLAGS_ALL, |
|
|
|
|
.prio = FF_TX_PRIO_BASE, |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
static const FFTXCodelet TX_NAME(ff_tx_mdct_sr_inv_def) = { |
|
|
|
|
.name = TX_NAME_STR("mdct_sr_inv"), |
|
|
|
|
.function = TX_NAME(ff_tx_mdct_sr_inv), |
|
|
|
|
static const FFTXCodelet TX_NAME(ff_tx_mdct_inv_def) = { |
|
|
|
|
.name = TX_NAME_STR("mdct_inv"), |
|
|
|
|
.function = TX_NAME(ff_tx_mdct_inv), |
|
|
|
|
.type = TX_TYPE(MDCT), |
|
|
|
|
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY, |
|
|
|
|
.factors[0] = 2, |
|
|
|
|
.factors = { 2, TX_FACTOR_ANY }, |
|
|
|
|
.min_len = 2, |
|
|
|
|
.max_len = TX_LEN_UNLIMITED, |
|
|
|
|
.init = TX_NAME(ff_tx_mdct_sr_init), |
|
|
|
|
.init = TX_NAME(ff_tx_mdct_init), |
|
|
|
|
.cpu_flags = FF_TX_CPU_FLAGS_ALL, |
|
|
|
|
.prio = FF_TX_PRIO_BASE, |
|
|
|
|
}; |
|
|
|
@ -1477,8 +1494,8 @@ const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = { |
|
|
|
|
&TX_NAME(ff_tx_fft_pfa_9xM_def), |
|
|
|
|
&TX_NAME(ff_tx_fft_pfa_15xM_def), |
|
|
|
|
&TX_NAME(ff_tx_fft_naive_def), |
|
|
|
|
&TX_NAME(ff_tx_mdct_sr_fwd_def), |
|
|
|
|
&TX_NAME(ff_tx_mdct_sr_inv_def), |
|
|
|
|
&TX_NAME(ff_tx_mdct_fwd_def), |
|
|
|
|
&TX_NAME(ff_tx_mdct_inv_def), |
|
|
|
|
&TX_NAME(ff_tx_mdct_pfa_3xM_fwd_def), |
|
|
|
|
&TX_NAME(ff_tx_mdct_pfa_5xM_fwd_def), |
|
|
|
|
&TX_NAME(ff_tx_mdct_pfa_7xM_fwd_def), |
|
|
|
|