From f20b67173ca6a05b8c3dee02dad3b7243b96292b Mon Sep 17 00:00:00 2001
From: Rostislav Pehlivanov <atomnuker@gmail.com>
Date: Sat, 29 Aug 2015 06:47:31 +0100
Subject: [PATCH] aacenc_tns: rework the way coefficients are calculated

This commit abandons the way the specifications state to
quantize the coefficients, makes use of the new LPC float
functions and is much better.

The original way of converting non-normalized float samples
to int32_t which out LPC system expects was wrong and it was
wrong to assume the coefficients that are generated are also
valid. It was essentially a full garbage-in, garbage-out
system and it definitely shows when looking at spectrals
and listening. The high frequencies were very overattenuated.
The new LPC function performs the analysis directly.

The specifications state to quantize the coefficients into
four bit index values using an asin() function which of course
had to have ugly ternary operators because the function turns
negative if the coefficients are negative which when encoding
causes invalid bitstream to get generated.

This deviates from this by using the direct TNS tables, which
are fairly small since you only have 4 bits at most for index
values. The LPC values are directly quantized against the tables
and are then used to perform filtering after the requantization,
which simply fetches the array values.

The end result is that TNS works much better now and doesn't
attenuate anything but the actual signal, e.g. TNS removes
quantization errors and does it's job correctly now.

It might be enabled by default soon since it doesn't hurt and
helps reduce nastyness at low bitrates.

Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
---
 libavcodec/aaccoder.c   |   4 +
 libavcodec/aacenc.c     |   5 +-
 libavcodec/aacenc.h     |   1 +
 libavcodec/aacenc_tns.c | 211 +++++++++++++++++++---------------------
 libavcodec/aacenc_tns.h |  11 +--
 5 files changed, 110 insertions(+), 122 deletions(-)

diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index 8256f961c0..86d598f021 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -964,6 +964,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
         ff_aac_encode_main_pred,
         ff_aac_adjust_common_prediction,
         ff_aac_apply_main_pred,
+        ff_aac_apply_tns,
         set_special_band_scalefactors,
         search_for_pns,
         ff_aac_search_for_tns,
@@ -979,6 +980,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
         ff_aac_encode_main_pred,
         ff_aac_adjust_common_prediction,
         ff_aac_apply_main_pred,
+        ff_aac_apply_tns,
         set_special_band_scalefactors,
         search_for_pns,
         ff_aac_search_for_tns,
@@ -994,6 +996,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
         ff_aac_encode_main_pred,
         ff_aac_adjust_common_prediction,
         ff_aac_apply_main_pred,
+        ff_aac_apply_tns,
         set_special_band_scalefactors,
         search_for_pns,
         ff_aac_search_for_tns,
@@ -1009,6 +1012,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
         ff_aac_encode_main_pred,
         ff_aac_adjust_common_prediction,
         ff_aac_apply_main_pred,
+        ff_aac_apply_tns,
         set_special_band_scalefactors,
         search_for_pns,
         ff_aac_search_for_tns,
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index a7c43c7411..9f59b9213a 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -404,10 +404,9 @@ static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
     encode_band_info(s, sce);
     encode_scale_factors(avctx, s, sce);
     encode_pulses(s, &sce->pulse);
+    put_bits(&s->pb, 1, !!sce->tns.present);
     if (s->coder->encode_tns_info)
         s->coder->encode_tns_info(s, sce);
-    else
-        put_bits(&s->pb, 1, 0);
     put_bits(&s->pb, 1, 0); //ssr
     encode_spectral_coeffs(s, sce);
     return 0;
@@ -609,6 +608,8 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                     s->coder->search_for_pns(s, avctx, sce);
                 if (s->options.tns && s->coder->search_for_tns)
                     s->coder->search_for_tns(s, sce);
+                if (s->options.tns && s->coder->apply_tns_filt)
+                    s->coder->apply_tns_filt(sce);
                 if (sce->tns.present)
                     tns_mode = 1;
             }
diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
index 69a8c01d82..51dce8a0fd 100644
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@@ -63,6 +63,7 @@ typedef struct AACCoefficientsEncoder {
     void (*encode_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
     void (*adjust_common_prediction)(struct AACEncContext *s, ChannelElement *cpe);
     void (*apply_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
+    void (*apply_tns_filt)(SingleChannelElement *sce);
     void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce);
     void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce);
     void (*search_for_tns)(struct AACEncContext *s, SingleChannelElement *sce);
diff --git a/libavcodec/aacenc_tns.c b/libavcodec/aacenc_tns.c
index 8c21255f7b..912229e692 100644
--- a/libavcodec/aacenc_tns.c
+++ b/libavcodec/aacenc_tns.c
@@ -31,62 +31,30 @@
 #include "aacenc_utils.h"
 #include "aacenc_quantization.h"
 
-static inline void conv_to_int32(int32_t *loc, float *samples, int num, float norm)
-{
-    int i;
-    for (i = 0; i < num; i++)
-        loc[i] = ceilf((samples[i]/norm)*INT32_MAX);
-}
-
-static inline void conv_to_float(float *arr, int32_t *cof, int num)
-{
-    int i;
-    for (i = 0; i < num; i++)
-        arr[i] = (float)cof[i]/INT32_MAX;
-}
-
-/* Input: quantized 4 bit coef, output: 1 if first (MSB) 2 bits are the same */
-static inline int coef_test_compression(int coef)
-{
-    int tmp = coef >> 2;
-    int res = ff_ctz(tmp);
-    if (res > 1)
-        return 1;       /* ...00 ->  compressable    */
-    else if (res == 1)
-        return 0;       /* ...10 ->  uncompressable  */
-    else if (ff_ctz(tmp >> 1) > 0)
-        return 0;       /* ...0 1 -> uncompressable  */
-    else
-        return 1;       /* ...1 1 -> compressable    */
-}
-
 static inline int compress_coef(int *coefs, int num)
 {
-    int i, res = 0;
+    int i, c = 0;
     for (i = 0; i < num; i++)
-        res += coef_test_compression(coefs[i]);
-    return res == num ? 1 : 0;
+        c += coefs[i] < 4 || coefs[i] > 11;
+    return c == num;
 }
 
 /**
  * Encode TNS data.
- * Coefficient compression saves a single bit.
+ * Coefficient compression saves a single bit per coefficient.
  */
 void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce)
 {
     int i, w, filt, coef_len, coef_compress;
-    const int coef_res = MAX_LPC_PRECISION == 4 ? 1 : 0;
     const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE;
 
-    put_bits(&s->pb, 1, !!sce->tns.present);
-
     if (!sce->tns.present)
         return;
 
     for (i = 0; i < sce->ics.num_windows; i++) {
         put_bits(&s->pb, 2 - is8, sce->tns.n_filt[i]);
         if (sce->tns.n_filt[i]) {
-            put_bits(&s->pb, 1, !!coef_res);
+            put_bits(&s->pb, 1, 1);
             for (filt = 0; filt < sce->tns.n_filt[i]; filt++) {
                 put_bits(&s->pb, 6 - 2 * is8, sce->tns.length[i][filt]);
                 put_bits(&s->pb, 5 - 2 * is8, sce->tns.order[i][filt]);
@@ -95,7 +63,7 @@ void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce)
                                                   sce->tns.order[i][filt]);
                     put_bits(&s->pb, 1, !!sce->tns.direction[i][filt]);
                     put_bits(&s->pb, 1, !!coef_compress);
-                    coef_len = coef_res + 3 - coef_compress;
+                    coef_len = 4 - coef_compress;
                     for (w = 0; w < sce->tns.order[i][filt]; w++)
                         put_bits(&s->pb, coef_len, sce->tns.coef_idx[i][filt][w]);
                 }
@@ -104,24 +72,25 @@ void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce)
     }
 }
 
-static int process_tns_coeffs(TemporalNoiseShaping *tns, float *tns_coefs_raw,
-                              int order, int w, int filt)
+static void process_tns_coeffs(TemporalNoiseShaping *tns, double *coef_raw,
+                               int *order_p, int w, int filt)
 {
-    int i, j;
+    int i, j, order = *order_p;
     int *idx = tns->coef_idx[w][filt];
     float *lpc = tns->coef[w][filt];
-    const int iqfac_p = ((1 << (MAX_LPC_PRECISION-1)) - 0.5)/(M_PI/2.0);
-    const int iqfac_m = ((1 << (MAX_LPC_PRECISION-1)) + 0.5)/(M_PI/2.0);
     float temp[TNS_MAX_ORDER] = {0.0f}, out[TNS_MAX_ORDER] = {0.0f};
 
-    /* Quantization */
+    if (!order)
+        return;
+
+    /* Not what the specs say, but it's better */
     for (i = 0; i < order; i++) {
-        idx[i] = ceilf(asin(tns_coefs_raw[i])*((tns_coefs_raw[i] >= 0) ? iqfac_p : iqfac_m));
-        lpc[i] = 2*sin(idx[i]/((idx[i] >= 0) ? iqfac_p : iqfac_m));
+        idx[i] = quant_array_idx(coef_raw[i], tns_tmp2_map_0_4, 16);
+        lpc[i] = tns_tmp2_map_0_4[idx[i]];
     }
 
     /* Trim any coeff less than 0.1f from the end */
-    for (i = order; i > -1; i--) {
+    for (i = order-1; i > -1; i--) {
         lpc[i] = (fabs(lpc[i]) > 0.1f) ? lpc[i] : 0.0f;
         if (lpc[i] != 0.0 ) {
             order = i;
@@ -129,9 +98,6 @@ static int process_tns_coeffs(TemporalNoiseShaping *tns, float *tns_coefs_raw,
         }
     }
 
-    if (!order)
-        return 0;
-
     /* Step up procedure, convert to LPC coeffs */
     out[0] = 1.0f;
     for (i = 1; i <= order; i++) {
@@ -143,35 +109,59 @@ static int process_tns_coeffs(TemporalNoiseShaping *tns, float *tns_coefs_raw,
         }
         out[i] = lpc[i-1];
     }
+    *order_p = order;
     memcpy(lpc, out, TNS_MAX_ORDER*sizeof(float));
-
-    return order;
 }
 
-static void apply_tns_filter(float *out, float *in, int order, int direction,
-                             float *tns_coefs, int ltp_used, int w, int filt,
-                             int start_i, int len)
+/* Apply TNS filter */
+void ff_aac_apply_tns(SingleChannelElement *sce)
 {
-    int i, j, inc, start = start_i;
-    float tmp[TNS_MAX_ORDER+1];
-    if (direction) {
-        inc = -1;
-        start = (start + len) - 1;
-    } else {
-        inc = 1;
-    }
-    if (!ltp_used) {    /* AR filter */
-        for (i = 0; i < len; i++, start += inc)
-            out[i] = in[start];
-            for (j = 1; j <= FFMIN(i, order); j++)
-                out[i] += tns_coefs[j]*in[start - j*inc];
-    } else {            /* MA filter */
-        for (i = 0; i < len; i++, start += inc) {
-            tmp[0] = out[i] = in[start];
-            for (j = 1; j <= FFMIN(i, order); j++)
-                out[i] += tmp[j]*tns_coefs[j];
-            for (j = order; j > 0; j--)
-                tmp[j] = tmp[j - 1];
+    const int mmm = FFMIN(sce->ics.tns_max_bands, sce->ics.max_sfb);
+    float *coef = sce->pcoeffs;
+    TemporalNoiseShaping *tns = &sce->tns;
+    int w, filt, m, i;
+    int bottom, top, order, start, end, size, inc;
+    float *lpc, tmp[TNS_MAX_ORDER+1];
+
+    return;
+
+    for (w = 0; w < sce->ics.num_windows; w++) {
+        bottom = sce->ics.num_swb;
+        for (filt = 0; filt < tns->n_filt[w]; filt++) {
+            top    = bottom;
+            bottom = FFMAX(0, top - tns->length[w][filt]);
+            order  = tns->order[w][filt];
+            lpc    = tns->coef[w][filt];
+            if (!order)
+                continue;
+
+            start = sce->ics.swb_offset[FFMIN(bottom, mmm)];
+            end   = sce->ics.swb_offset[FFMIN(   top, mmm)];
+            if ((size = end - start) <= 0)
+                continue;
+            if (tns->direction[w][filt]) {
+                inc = -1;
+                start = end - 1;
+            } else {
+                inc = 1;
+            }
+            start += w * 128;
+
+            if (!sce->ics.ltp.present) {
+                // ar filter
+                for (m = 0; m < size; m++, start += inc)
+                    for (i = 1; i <= FFMIN(m, order); i++)
+                        coef[start] += coef[start - i * inc]*lpc[i - 1];
+            } else {
+                // ma filter
+                for (m = 0; m < size; m++, start += inc) {
+                    tmp[0] = coef[start];
+                    for (i = 1; i <= FFMIN(m, order); i++)
+                        coef[start] += tmp[i]*lpc[i - 1];
+                    for (i = order; i > 0; i--)
+                        tmp[i] = tmp[i - 1];
+                }
+            }
         }
     }
 }
@@ -179,57 +169,54 @@ static void apply_tns_filter(float *out, float *in, int order, int direction,
 void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
 {
     TemporalNoiseShaping *tns = &sce->tns;
-    int w, g, order, sfb_start, sfb_len, coef_start, shift[MAX_LPC_ORDER], count = 0;
+    int w, g, w2, prev_end_sfb = 0, count = 0;
     const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE;
     const int tns_max_order = is8 ? 7 : s->profile == FF_PROFILE_AAC_LOW ? 12 : TNS_MAX_ORDER;
-    const float freq_mult = mpeg4audio_sample_rates[s->samplerate_index]/(1024.0f/sce->ics.num_windows)/2.0f;
-    float max_coef = 0.0f;
-
-    sce->tns.present = 0;
-    return;
-
-    for (coef_start = 0; coef_start < 1024; coef_start++)
-        max_coef = FFMAX(max_coef, sce->pcoeffs[coef_start]);
 
-    for (w = 0; w < sce->ics.num_windows; w++) {
-        int filters = 1, start = 0, coef_len = 0;
-        int32_t conv_coeff[1024] = {0};
-        int32_t coefs_t[MAX_LPC_ORDER][MAX_LPC_ORDER] = {{0}};
-
-        /* Determine start sfb + coef - excludes anything below threshold */
+    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+        int order = 0, filters = 1;
+        int sfb_start = 0, sfb_len = 0;
+        int coef_start = 0, coef_len = 0;
+        float energy = 0.0f, threshold = 0.0f;
+        double coefs[MAX_LPC_ORDER][MAX_LPC_ORDER] = {{0}};
         for (g = 0;  g < sce->ics.num_swb; g++) {
-            if (start*freq_mult > TNS_LOW_LIMIT) {
+            if (!sfb_start && w*16+g > TNS_LOW_LIMIT && w*16+g > prev_end_sfb) {
                 sfb_start = w*16+g;
-                sfb_len   = (w+1)*16 + g - sfb_start;
-                coef_start = sce->ics.swb_offset[sfb_start];
-                coef_len  = sce->ics.swb_offset[sfb_start + sfb_len] - coef_start;
-                break;
+                coef_start =  sce->ics.swb_offset[sfb_start];
+            }
+            if (sfb_start) {
+                for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+                    FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
+                    if (!sfb_len && band->energy < band->threshold*1.3f) {
+                        sfb_len = (w+w2)*16+g - sfb_start;
+                        prev_end_sfb = sfb_start + sfb_len;
+                        coef_len = sce->ics.swb_offset[sfb_start + sfb_len] - coef_start;
+                        break;
+                    }
+                    energy += band->energy;
+                    threshold += band->threshold;
+                }
+                if (!sfb_len) {
+                    sfb_len = (w+sce->ics.group_len[w])*16+g - sfb_start;
+                    coef_len = sce->ics.swb_offset[sfb_start + sfb_len] - coef_start;
+                }
             }
-            start += sce->ics.swb_sizes[g];
         }
 
-        if (coef_len <= 0)
+        if (sfb_len <= 0 || coef_len <= 0)
             continue;
-
-        conv_to_int32(conv_coeff, &sce->pcoeffs[coef_start], coef_len, max_coef);
+        if (coef_start + coef_len > 1024)
+            coef_len = 1024 - coef_start;
 
         /* LPC */
-        order = ff_lpc_calc_coefs(&s->lpc, conv_coeff, coef_len,
-                                  TNS_MIN_PRED_ORDER, tns_max_order,
-                                  32, coefs_t, shift,
-                                  FF_LPC_TYPE_LEVINSON, 10,
-                                  ORDER_METHOD_EST, MAX_LPC_SHIFT, 0) - 1;
-
-        /* Works surprisingly well, remember to tweak MAX_LPC_SHIFT if you want to play around with this */
-        if (shift[order] > 3) {
+        order = ff_lpc_calc_levinsion(&s->lpc, &sce->coeffs[coef_start], coef_len,
+                                      coefs, 0, tns_max_order, ORDER_METHOD_LOG);
+
+        if (energy > threshold) {
             int direction = 0;
-            float tns_coefs_raw[TNS_MAX_ORDER];
             tns->n_filt[w] = filters++;
-            conv_to_float(tns_coefs_raw, coefs_t[order], order);
             for (g = 0; g < tns->n_filt[w]; g++) {
-                process_tns_coeffs(tns, tns_coefs_raw, order, w, g);
-                apply_tns_filter(&sce->coeffs[coef_start], sce->pcoeffs, order, direction, tns->coef[w][g],
-                                 sce->ics.ltp.present, w, g, coef_start, coef_len);
+                process_tns_coeffs(tns, coefs[order], &order, w, g);
                 tns->order[w][g]     = order;
                 tns->length[w][g]    = sfb_len;
                 tns->direction[w][g] = direction;
diff --git a/libavcodec/aacenc_tns.h b/libavcodec/aacenc_tns.h
index 789e9a64fe..72c91239a3 100644
--- a/libavcodec/aacenc_tns.h
+++ b/libavcodec/aacenc_tns.h
@@ -30,16 +30,11 @@
 
 #include "aacenc.h"
 
-/** Frequency in Hz for lower limit of TNS **/
-#define TNS_LOW_LIMIT 2150
-
-/** LPC settings */
-#define TNS_MIN_PRED_ORDER 0
-#define MAX_LPC_PRECISION  4   /* 4 bits ltp coeff precision */
-#define TNS_LPC_PASSES     2
-#define MAX_LPC_SHIFT      4
+/** Lower limit of TNS in SFBs **/
+#define TNS_LOW_LIMIT 24
 
 void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce);
+void ff_aac_apply_tns(SingleChannelElement *sce);
 void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce);
 
 #endif /* AVCODEC_AACENC_TNS_H */