|
|
|
@ -41,6 +41,7 @@ COSTABLE(65536); |
|
|
|
|
COSTABLE(131072); |
|
|
|
|
DECLARE_ALIGNED(32, FFTComplex, TX_NAME(ff_cos_53))[4]; |
|
|
|
|
DECLARE_ALIGNED(32, FFTComplex, TX_NAME(ff_cos_7))[3]; |
|
|
|
|
DECLARE_ALIGNED(32, FFTComplex, TX_NAME(ff_cos_9))[4]; |
|
|
|
|
|
|
|
|
|
static FFTSample * const cos_tabs[18] = { |
|
|
|
|
NULL, |
|
|
|
@ -111,10 +112,19 @@ static av_cold void ff_init_7_tabs(void) |
|
|
|
|
TX_NAME(ff_cos_7)[2] = (FFTComplex){ RESCALE(cos(2 * M_PI / 14)), RESCALE(sin(2 * M_PI / 14)) }; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static av_cold void ff_init_9_tabs(void) |
|
|
|
|
{ |
|
|
|
|
TX_NAME(ff_cos_9)[0] = (FFTComplex){ RESCALE(cos(2 * M_PI / 3)), RESCALE( sin(2 * M_PI / 3)) }; |
|
|
|
|
TX_NAME(ff_cos_9)[1] = (FFTComplex){ RESCALE(cos(2 * M_PI / 9)), RESCALE( sin(2 * M_PI / 9)) }; |
|
|
|
|
TX_NAME(ff_cos_9)[2] = (FFTComplex){ RESCALE(cos(2 * M_PI / 36)), RESCALE( sin(2 * M_PI / 36)) }; |
|
|
|
|
TX_NAME(ff_cos_9)[3] = (FFTComplex){ TX_NAME(ff_cos_9)[1].re + TX_NAME(ff_cos_9)[2].im, |
|
|
|
|
TX_NAME(ff_cos_9)[1].im - TX_NAME(ff_cos_9)[2].re }; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static CosTabsInitOnce cos_tabs_init_once[] = { |
|
|
|
|
{ ff_init_53_tabs, AV_ONCE_INIT }, |
|
|
|
|
{ ff_init_7_tabs, AV_ONCE_INIT }, |
|
|
|
|
{ NULL }, |
|
|
|
|
{ ff_init_9_tabs, AV_ONCE_INIT }, |
|
|
|
|
{ NULL }, |
|
|
|
|
{ init_cos_tabs_16, AV_ONCE_INIT }, |
|
|
|
|
{ init_cos_tabs_32, AV_ONCE_INIT }, |
|
|
|
@ -299,6 +309,130 @@ static av_always_inline void fft7(FFTComplex *out, FFTComplex *in, |
|
|
|
|
out[6*stride].im = in[0].im + z[0].im; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static av_always_inline void fft9(FFTComplex *out, FFTComplex *in, |
|
|
|
|
ptrdiff_t stride) |
|
|
|
|
{ |
|
|
|
|
const FFTComplex *tab = TX_NAME(ff_cos_9); |
|
|
|
|
FFTComplex t[16], w[4], x[5], y[5], z[2]; |
|
|
|
|
#ifdef TX_INT32 |
|
|
|
|
int64_t mtmp[12]; |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
BF(t[1].re, t[0].re, in[1].re, in[8].re); |
|
|
|
|
BF(t[1].im, t[0].im, in[1].im, in[8].im); |
|
|
|
|
BF(t[3].re, t[2].re, in[2].re, in[7].re); |
|
|
|
|
BF(t[3].im, t[2].im, in[2].im, in[7].im); |
|
|
|
|
BF(t[5].re, t[4].re, in[3].re, in[6].re); |
|
|
|
|
BF(t[5].im, t[4].im, in[3].im, in[6].im); |
|
|
|
|
BF(t[7].re, t[6].re, in[4].re, in[5].re); |
|
|
|
|
BF(t[7].im, t[6].im, in[4].im, in[5].im); |
|
|
|
|
|
|
|
|
|
w[0].re = t[0].re - t[6].re; |
|
|
|
|
w[0].im = t[0].im - t[6].im; |
|
|
|
|
w[1].re = t[2].re - t[6].re; |
|
|
|
|
w[1].im = t[2].im - t[6].im; |
|
|
|
|
w[2].re = t[1].re - t[7].re; |
|
|
|
|
w[2].im = t[1].im - t[7].im; |
|
|
|
|
w[3].re = t[3].re + t[7].re; |
|
|
|
|
w[3].im = t[3].im + t[7].im; |
|
|
|
|
|
|
|
|
|
z[0].re = in[0].re + t[4].re; |
|
|
|
|
z[0].im = in[0].im + t[4].im; |
|
|
|
|
|
|
|
|
|
z[1].re = t[0].re + t[2].re + t[6].re; |
|
|
|
|
z[1].im = t[0].im + t[2].im + t[6].im; |
|
|
|
|
|
|
|
|
|
out[0*stride].re = z[0].re + z[1].re; |
|
|
|
|
out[0*stride].im = z[0].im + z[1].im; |
|
|
|
|
|
|
|
|
|
#ifdef TX_INT32 |
|
|
|
|
mtmp[0] = t[1].re - t[3].re + t[7].re; |
|
|
|
|
mtmp[1] = t[1].im - t[3].im + t[7].im; |
|
|
|
|
|
|
|
|
|
y[3].re = (int32_t)(((int64_t)tab[0].im)*mtmp[0] + 0x40000000 >> 31); |
|
|
|
|
y[3].im = (int32_t)(((int64_t)tab[0].im)*mtmp[1] + 0x40000000 >> 31); |
|
|
|
|
|
|
|
|
|
mtmp[0] = (int32_t)(((int64_t)tab[0].re)*z[1].re + 0x40000000 >> 31); |
|
|
|
|
mtmp[1] = (int32_t)(((int64_t)tab[0].re)*z[1].im + 0x40000000 >> 31); |
|
|
|
|
mtmp[2] = (int32_t)(((int64_t)tab[0].re)*t[4].re + 0x40000000 >> 31); |
|
|
|
|
mtmp[3] = (int32_t)(((int64_t)tab[0].re)*t[4].im + 0x40000000 >> 31); |
|
|
|
|
|
|
|
|
|
x[3].re = z[0].re + (int32_t)mtmp[0]; |
|
|
|
|
x[3].im = z[0].im + (int32_t)mtmp[1]; |
|
|
|
|
z[0].re = in[0].re + (int32_t)mtmp[2]; |
|
|
|
|
z[0].im = in[0].im + (int32_t)mtmp[3]; |
|
|
|
|
|
|
|
|
|
mtmp[0] = ((int64_t)tab[1].re)*w[0].re; |
|
|
|
|
mtmp[1] = ((int64_t)tab[1].re)*w[0].im; |
|
|
|
|
mtmp[2] = ((int64_t)tab[2].im)*w[0].re; |
|
|
|
|
mtmp[3] = ((int64_t)tab[2].im)*w[0].im; |
|
|
|
|
mtmp[4] = ((int64_t)tab[1].im)*w[2].re; |
|
|
|
|
mtmp[5] = ((int64_t)tab[1].im)*w[2].im; |
|
|
|
|
mtmp[6] = ((int64_t)tab[2].re)*w[2].re; |
|
|
|
|
mtmp[7] = ((int64_t)tab[2].re)*w[2].im; |
|
|
|
|
|
|
|
|
|
x[1].re = (int32_t)(mtmp[0] + ((int64_t)tab[2].im)*w[1].re + 0x40000000 >> 31); |
|
|
|
|
x[1].im = (int32_t)(mtmp[1] + ((int64_t)tab[2].im)*w[1].im + 0x40000000 >> 31); |
|
|
|
|
x[2].re = (int32_t)(mtmp[2] - ((int64_t)tab[3].re)*w[1].re + 0x40000000 >> 31); |
|
|
|
|
x[2].im = (int32_t)(mtmp[3] - ((int64_t)tab[3].re)*w[1].im + 0x40000000 >> 31); |
|
|
|
|
y[1].re = (int32_t)(mtmp[4] + ((int64_t)tab[2].re)*w[3].re + 0x40000000 >> 31); |
|
|
|
|
y[1].im = (int32_t)(mtmp[5] + ((int64_t)tab[2].re)*w[3].im + 0x40000000 >> 31); |
|
|
|
|
y[2].re = (int32_t)(mtmp[6] - ((int64_t)tab[3].im)*w[3].re + 0x40000000 >> 31); |
|
|
|
|
y[2].im = (int32_t)(mtmp[7] - ((int64_t)tab[3].im)*w[3].im + 0x40000000 >> 31); |
|
|
|
|
|
|
|
|
|
y[0].re = (int32_t)(((int64_t)tab[0].im)*t[5].re + 0x40000000 >> 31); |
|
|
|
|
y[0].im = (int32_t)(((int64_t)tab[0].im)*t[5].im + 0x40000000 >> 31); |
|
|
|
|
|
|
|
|
|
#else |
|
|
|
|
y[3].re = tab[0].im*(t[1].re - t[3].re + t[7].re); |
|
|
|
|
y[3].im = tab[0].im*(t[1].im - t[3].im + t[7].im); |
|
|
|
|
|
|
|
|
|
x[3].re = z[0].re + tab[0].re*z[1].re; |
|
|
|
|
x[3].im = z[0].im + tab[0].re*z[1].im; |
|
|
|
|
z[0].re = in[0].re + tab[0].re*t[4].re; |
|
|
|
|
z[0].im = in[0].im + tab[0].re*t[4].im; |
|
|
|
|
|
|
|
|
|
x[1].re = tab[1].re*w[0].re + tab[2].im*w[1].re; |
|
|
|
|
x[1].im = tab[1].re*w[0].im + tab[2].im*w[1].im; |
|
|
|
|
x[2].re = tab[2].im*w[0].re - tab[3].re*w[1].re; |
|
|
|
|
x[2].im = tab[2].im*w[0].im - tab[3].re*w[1].im; |
|
|
|
|
y[1].re = tab[1].im*w[2].re + tab[2].re*w[3].re; |
|
|
|
|
y[1].im = tab[1].im*w[2].im + tab[2].re*w[3].im; |
|
|
|
|
y[2].re = tab[2].re*w[2].re - tab[3].im*w[3].re; |
|
|
|
|
y[2].im = tab[2].re*w[2].im - tab[3].im*w[3].im; |
|
|
|
|
|
|
|
|
|
y[0].re = tab[0].im*t[5].re; |
|
|
|
|
y[0].im = tab[0].im*t[5].im; |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
x[4].re = x[1].re + x[2].re; |
|
|
|
|
x[4].im = x[1].im + x[2].im; |
|
|
|
|
|
|
|
|
|
y[4].re = y[1].re - y[2].re; |
|
|
|
|
y[4].im = y[1].im - y[2].im; |
|
|
|
|
x[1].re = z[0].re + x[1].re; |
|
|
|
|
x[1].im = z[0].im + x[1].im; |
|
|
|
|
y[1].re = y[0].re + y[1].re; |
|
|
|
|
y[1].im = y[0].im + y[1].im; |
|
|
|
|
x[2].re = z[0].re + x[2].re; |
|
|
|
|
x[2].im = z[0].im + x[2].im; |
|
|
|
|
y[2].re = y[2].re - y[0].re; |
|
|
|
|
y[2].im = y[2].im - y[0].im; |
|
|
|
|
x[4].re = z[0].re - x[4].re; |
|
|
|
|
x[4].im = z[0].im - x[4].im; |
|
|
|
|
y[4].re = y[0].re - y[4].re; |
|
|
|
|
y[4].im = y[0].im - y[4].im; |
|
|
|
|
|
|
|
|
|
out[1*stride] = (FFTComplex){ x[1].re + y[1].im, x[1].im - y[1].re }; |
|
|
|
|
out[2*stride] = (FFTComplex){ x[2].re + y[2].im, x[2].im - y[2].re }; |
|
|
|
|
out[3*stride] = (FFTComplex){ x[3].re + y[3].im, x[3].im - y[3].re }; |
|
|
|
|
out[4*stride] = (FFTComplex){ x[4].re + y[4].im, x[4].im - y[4].re }; |
|
|
|
|
out[5*stride] = (FFTComplex){ x[4].re - y[4].im, x[4].im + y[4].re }; |
|
|
|
|
out[6*stride] = (FFTComplex){ x[3].re - y[3].im, x[3].im + y[3].re }; |
|
|
|
|
out[7*stride] = (FFTComplex){ x[2].re - y[2].im, x[2].im + y[2].re }; |
|
|
|
|
out[8*stride] = (FFTComplex){ x[1].re - y[1].im, x[1].im + y[1].re }; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static av_always_inline void fft15(FFTComplex *out, FFTComplex *in, |
|
|
|
|
ptrdiff_t stride) |
|
|
|
|
{ |
|
|
|
@ -472,6 +606,7 @@ static void compound_fft_##N##xM(AVTXContext *s, void *_out, \ |
|
|
|
|
DECL_COMP_FFT(3) |
|
|
|
|
DECL_COMP_FFT(5) |
|
|
|
|
DECL_COMP_FFT(7) |
|
|
|
|
DECL_COMP_FFT(9) |
|
|
|
|
DECL_COMP_FFT(15) |
|
|
|
|
|
|
|
|
|
static void split_radix_fft(AVTXContext *s, void *_out, void *_in, |
|
|
|
@ -570,6 +705,7 @@ static void compound_imdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \ |
|
|
|
|
DECL_COMP_IMDCT(3) |
|
|
|
|
DECL_COMP_IMDCT(5) |
|
|
|
|
DECL_COMP_IMDCT(7) |
|
|
|
|
DECL_COMP_IMDCT(9) |
|
|
|
|
DECL_COMP_IMDCT(15) |
|
|
|
|
|
|
|
|
|
#define DECL_COMP_MDCT(N) \ |
|
|
|
@ -619,6 +755,7 @@ static void compound_mdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \ |
|
|
|
|
DECL_COMP_MDCT(3) |
|
|
|
|
DECL_COMP_MDCT(5) |
|
|
|
|
DECL_COMP_MDCT(7) |
|
|
|
|
DECL_COMP_MDCT(9) |
|
|
|
|
DECL_COMP_MDCT(15) |
|
|
|
|
|
|
|
|
|
static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src, |
|
|
|
@ -773,6 +910,7 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx, |
|
|
|
|
SRC /= FACTOR; \
|
|
|
|
|
} |
|
|
|
|
CHECK_FACTOR(n, 15, len) |
|
|
|
|
CHECK_FACTOR(n, 9, len) |
|
|
|
|
CHECK_FACTOR(n, 7, len) |
|
|
|
|
CHECK_FACTOR(n, 5, len) |
|
|
|
|
CHECK_FACTOR(n, 3, len) |
|
|
|
@ -817,11 +955,13 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx, |
|
|
|
|
*tx = n == 3 ? compound_fft_3xM : |
|
|
|
|
n == 5 ? compound_fft_5xM : |
|
|
|
|
n == 7 ? compound_fft_7xM : |
|
|
|
|
n == 9 ? compound_fft_9xM : |
|
|
|
|
compound_fft_15xM; |
|
|
|
|
if (is_mdct) |
|
|
|
|
*tx = n == 3 ? inv ? compound_imdct_3xM : compound_mdct_3xM : |
|
|
|
|
n == 5 ? inv ? compound_imdct_5xM : compound_mdct_5xM : |
|
|
|
|
n == 7 ? inv ? compound_imdct_7xM : compound_mdct_7xM : |
|
|
|
|
n == 9 ? inv ? compound_imdct_9xM : compound_mdct_9xM : |
|
|
|
|
inv ? compound_imdct_15xM : compound_mdct_15xM; |
|
|
|
|
} |
|
|
|
|
} else { /* Direct transform case */ |
|
|
|
@ -834,6 +974,8 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx, |
|
|
|
|
init_cos_tabs(0); |
|
|
|
|
else if (n == 7) |
|
|
|
|
init_cos_tabs(1); |
|
|
|
|
else if (n == 9) |
|
|
|
|
init_cos_tabs(2); |
|
|
|
|
|
|
|
|
|
if (m != 1 && !(m & (m - 1))) { |
|
|
|
|
if ((err = ff_tx_gen_ptwo_revtab(s, n == 1 && !is_mdct && !(flags & AV_TX_INPLACE)))) |
|
|
|
|