From 0780ad9c688cc8272daa7780d3f112a9f55208ca Mon Sep 17 00:00:00 2001 From: Muhammad Faiz Date: Fri, 7 Jul 2017 14:43:39 +0700 Subject: [PATCH] avcodec/rdft: remove sintable It is redundant with costable. The first half of sintable is identical with the second half of costable. The second half of sintable is negative value of the first half of sintable. The computation is changed to handle sign of sin values, in C code and ARM assembly code. Signed-off-by: Muhammad Faiz --- libavcodec/Makefile | 3 +- libavcodec/arm/rdft_neon.S | 13 +++++--- libavcodec/rdft.c | 68 ++++++++++++++------------------------ libavcodec/rdft.h | 26 ++------------- 4 files changed, 36 insertions(+), 74 deletions(-) diff --git a/libavcodec/Makefile b/libavcodec/Makefile index b440a00746..59029a853c 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -122,8 +122,7 @@ OBJS-$(CONFIG_QSV) += qsv.o OBJS-$(CONFIG_QSVDEC) += qsvdec.o OBJS-$(CONFIG_QSVENC) += qsvenc.o OBJS-$(CONFIG_RANGECODER) += rangecoder.o -RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o -OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes) +OBJS-$(CONFIG_RDFT) += rdft.o OBJS-$(CONFIG_RV34DSP) += rv34dsp.o OBJS-$(CONFIG_SHARED) += log2_tab.o reverse.o OBJS-$(CONFIG_SINEWIN) += sinewin.o sinewin_fixed.o diff --git a/libavcodec/arm/rdft_neon.S b/libavcodec/arm/rdft_neon.S index 781d976354..eabb92b4bd 100644 --- a/libavcodec/arm/rdft_neon.S +++ b/libavcodec/arm/rdft_neon.S @@ -30,18 +30,21 @@ function ff_rdft_calc_neon, export=1 lsls r6, r6, #31 bne 1f - add r0, r4, #20 + add r0, r4, #24 bl X(ff_fft_permute_neon) - add r0, r4, #20 + add r0, r4, #24 mov r1, r5 bl X(ff_fft_calc_neon) 1: ldr r12, [r4, #0] @ nbits mov r2, #1 + ldr r8, [r4, #20] @ negative_sin lsl r12, r2, r12 add r0, r5, #8 + lsl r8, r8, #31 add r1, r5, r12, lsl #2 lsr r12, r12, #2 + vdup.32 d26, r8 ldr r2, [r4, #12] @ tcos sub r12, r12, #2 ldr r3, [r4, #16] @ tsin @@ -55,6 +58,7 @@ function ff_rdft_calc_neon, export=1 vld1.32 {d5}, [r3,:64]! @ tsin[i] vmov.f32 d18, #0.5 @ k1 vdup.32 d19, r6 + veor d5, d26, d5 pld [r0, #32] veor d19, d18, d19 @ k2 vmov.i32 d16, #0 @@ -90,6 +94,7 @@ function ff_rdft_calc_neon, export=1 vld1.32 {d5}, [r3,:64]! @ tsin[i] veor d24, d22, d17 @ ev.re,-ev.im vrev64.32 d3, d23 @ od.re, od.im + veor d5, d26, d5 pld [r2, #32] veor d2, d3, d16 @ -od.re, od.im pld [r3, #32] @@ -140,10 +145,10 @@ function ff_rdft_calc_neon, export=1 vmul.f32 d22, d22, d18 vst1.32 {d22}, [r5,:64] - add r0, r4, #20 + add r0, r4, #24 mov r1, r5 bl X(ff_fft_permute_neon) - add r0, r4, #20 + add r0, r4, #24 mov r1, r5 pop {r4-r8,lr} b X(ff_fft_calc_neon) diff --git a/libavcodec/rdft.c b/libavcodec/rdft.c index c318aa8394..194e0bc4ee 100644 --- a/libavcodec/rdft.c +++ b/libavcodec/rdft.c @@ -28,28 +28,6 @@ * (Inverse) Real Discrete Fourier Transforms. */ -/* sin(2*pi*x/n) for 0<=x>2); i++) { - i1 = 2*i; - i2 = n-i1; - /* Separate even and odd FFTs */ - ev.re = k1*(data[i1 ]+data[i2 ]); - od.im = -k2*(data[i1 ]-data[i2 ]); - ev.im = k1*(data[i1+1]-data[i2+1]); - od.re = k2*(data[i1+1]+data[i2+1]); - /* Apply twiddle factors to the odd FFT and add to the even FFT */ - data[i1 ] = ev.re + od.re*tcos[i] - od.im*tsin[i]; - data[i1+1] = ev.im + od.im*tcos[i] + od.re*tsin[i]; - data[i2 ] = ev.re - od.re*tcos[i] + od.im*tsin[i]; - data[i2+1] = -ev.im + od.im*tcos[i] + od.re*tsin[i]; + +#define RDFT_UNMANGLE(sign0, sign1) \ + for (i = 1; i < (n>>2); i++) { \ + i1 = 2*i; \ + i2 = n-i1; \ + /* Separate even and odd FFTs */ \ + ev.re = k1*(data[i1 ]+data[i2 ]); \ + od.im = -k2*(data[i1 ]-data[i2 ]); \ + ev.im = k1*(data[i1+1]-data[i2+1]); \ + od.re = k2*(data[i1+1]+data[i2+1]); \ + /* Apply twiddle factors to the odd FFT and add to the even FFT */ \ + data[i1 ] = ev.re + od.re*tcos[i] sign0 od.im*tsin[i]; \ + data[i1+1] = ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; \ + data[i2 ] = ev.re - od.re*tcos[i] sign1 od.im*tsin[i]; \ + data[i2+1] = -ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; \ + } + + if (s->negative_sin) { + RDFT_UNMANGLE(+,-) + } else { + RDFT_UNMANGLE(-,+) } + data[2*i+1]=s->sign_convention*data[2*i+1]; if (s->inverse) { data[0] *= k1; @@ -104,6 +91,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans) s->nbits = nbits; s->inverse = trans == IDFT_C2R || trans == DFT_C2R; s->sign_convention = trans == IDFT_R2C || trans == DFT_C2R ? 1 : -1; + s->negative_sin = trans == DFT_C2R || trans == DFT_R2C; if (nbits < 4 || nbits > 16) return AVERROR(EINVAL); @@ -113,15 +101,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans) ff_init_ff_cos_tabs(nbits); s->tcos = ff_cos_tabs[nbits]; - s->tsin = ff_sin_tabs[nbits]+(trans == DFT_R2C || trans == DFT_C2R)*(n>>2); -#if !CONFIG_HARDCODED_TABLES - { - int i; - const double theta = (trans == DFT_R2C || trans == DFT_C2R ? -1 : 1) * 2 * M_PI / n; - for (i = 0; i < (n >> 2); i++) - s->tsin[i] = sin(i * theta); - } -#endif + s->tsin = ff_cos_tabs[nbits] + (n >> 2); s->rdft_calc = rdft_calc_c; if (ARCH_ARM) ff_rdft_init_arm(s); diff --git a/libavcodec/rdft.h b/libavcodec/rdft.h index 37c40e7c80..ffafca7f24 100644 --- a/libavcodec/rdft.h +++ b/libavcodec/rdft.h @@ -25,29 +25,6 @@ #include "config.h" #include "fft.h" -#if CONFIG_HARDCODED_TABLES -# define SINTABLE_CONST const -#else -# define SINTABLE_CONST -#endif - -#define SINTABLE(size) \ - SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2] - -extern SINTABLE(16); -extern SINTABLE(32); -extern SINTABLE(64); -extern SINTABLE(128); -extern SINTABLE(256); -extern SINTABLE(512); -extern SINTABLE(1024); -extern SINTABLE(2048); -extern SINTABLE(4096); -extern SINTABLE(8192); -extern SINTABLE(16384); -extern SINTABLE(32768); -extern SINTABLE(65536); - struct RDFTContext { int nbits; int inverse; @@ -55,7 +32,8 @@ struct RDFTContext { /* pre/post rotation tables */ const FFTSample *tcos; - SINTABLE_CONST FFTSample *tsin; + const FFTSample *tsin; + int negative_sin; FFTContext fft; void (*rdft_calc)(struct RDFTContext *s, FFTSample *z); };