aacenc_tns: rework the way coefficients are calculated

This commit abandons the way the specifications state to
quantize the coefficients, makes use of the new LPC float
functions and is much better.

The original way of converting non-normalized float samples
to int32_t which out LPC system expects was wrong and it was
wrong to assume the coefficients that are generated are also
valid. It was essentially a full garbage-in, garbage-out
system and it definitely shows when looking at spectrals
and listening. The high frequencies were very overattenuated.
The new LPC function performs the analysis directly.

The specifications state to quantize the coefficients into
four bit index values using an asin() function which of course
had to have ugly ternary operators because the function turns
negative if the coefficients are negative which when encoding
causes invalid bitstream to get generated.

This deviates from this by using the direct TNS tables, which
are fairly small since you only have 4 bits at most for index
values. The LPC values are directly quantized against the tables
and are then used to perform filtering after the requantization,
which simply fetches the array values.

The end result is that TNS works much better now and doesn't
attenuate anything but the actual signal, e.g. TNS removes
quantization errors and does it's job correctly now.

It might be enabled by default soon since it doesn't hurt and
helps reduce nastyness at low bitrates.

Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
pull/146/head
Rostislav Pehlivanov 9 years ago
parent 1cd5daee20
commit f20b67173c
  1. 4
      libavcodec/aaccoder.c
  2. 5
      libavcodec/aacenc.c
  3. 1
      libavcodec/aacenc.h
  4. 211
      libavcodec/aacenc_tns.c
  5. 11
      libavcodec/aacenc_tns.h

@ -964,6 +964,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_encode_main_pred, ff_aac_encode_main_pred,
ff_aac_adjust_common_prediction, ff_aac_adjust_common_prediction,
ff_aac_apply_main_pred, ff_aac_apply_main_pred,
ff_aac_apply_tns,
set_special_band_scalefactors, set_special_band_scalefactors,
search_for_pns, search_for_pns,
ff_aac_search_for_tns, ff_aac_search_for_tns,
@ -979,6 +980,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_encode_main_pred, ff_aac_encode_main_pred,
ff_aac_adjust_common_prediction, ff_aac_adjust_common_prediction,
ff_aac_apply_main_pred, ff_aac_apply_main_pred,
ff_aac_apply_tns,
set_special_band_scalefactors, set_special_band_scalefactors,
search_for_pns, search_for_pns,
ff_aac_search_for_tns, ff_aac_search_for_tns,
@ -994,6 +996,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_encode_main_pred, ff_aac_encode_main_pred,
ff_aac_adjust_common_prediction, ff_aac_adjust_common_prediction,
ff_aac_apply_main_pred, ff_aac_apply_main_pred,
ff_aac_apply_tns,
set_special_band_scalefactors, set_special_band_scalefactors,
search_for_pns, search_for_pns,
ff_aac_search_for_tns, ff_aac_search_for_tns,
@ -1009,6 +1012,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_encode_main_pred, ff_aac_encode_main_pred,
ff_aac_adjust_common_prediction, ff_aac_adjust_common_prediction,
ff_aac_apply_main_pred, ff_aac_apply_main_pred,
ff_aac_apply_tns,
set_special_band_scalefactors, set_special_band_scalefactors,
search_for_pns, search_for_pns,
ff_aac_search_for_tns, ff_aac_search_for_tns,

@ -404,10 +404,9 @@ static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
encode_band_info(s, sce); encode_band_info(s, sce);
encode_scale_factors(avctx, s, sce); encode_scale_factors(avctx, s, sce);
encode_pulses(s, &sce->pulse); encode_pulses(s, &sce->pulse);
put_bits(&s->pb, 1, !!sce->tns.present);
if (s->coder->encode_tns_info) if (s->coder->encode_tns_info)
s->coder->encode_tns_info(s, sce); s->coder->encode_tns_info(s, sce);
else
put_bits(&s->pb, 1, 0);
put_bits(&s->pb, 1, 0); //ssr put_bits(&s->pb, 1, 0); //ssr
encode_spectral_coeffs(s, sce); encode_spectral_coeffs(s, sce);
return 0; return 0;
@ -609,6 +608,8 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
s->coder->search_for_pns(s, avctx, sce); s->coder->search_for_pns(s, avctx, sce);
if (s->options.tns && s->coder->search_for_tns) if (s->options.tns && s->coder->search_for_tns)
s->coder->search_for_tns(s, sce); s->coder->search_for_tns(s, sce);
if (s->options.tns && s->coder->apply_tns_filt)
s->coder->apply_tns_filt(sce);
if (sce->tns.present) if (sce->tns.present)
tns_mode = 1; tns_mode = 1;
} }

@ -63,6 +63,7 @@ typedef struct AACCoefficientsEncoder {
void (*encode_main_pred)(struct AACEncContext *s, SingleChannelElement *sce); void (*encode_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
void (*adjust_common_prediction)(struct AACEncContext *s, ChannelElement *cpe); void (*adjust_common_prediction)(struct AACEncContext *s, ChannelElement *cpe);
void (*apply_main_pred)(struct AACEncContext *s, SingleChannelElement *sce); void (*apply_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
void (*apply_tns_filt)(SingleChannelElement *sce);
void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce); void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce);
void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce); void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce);
void (*search_for_tns)(struct AACEncContext *s, SingleChannelElement *sce); void (*search_for_tns)(struct AACEncContext *s, SingleChannelElement *sce);

@ -31,62 +31,30 @@
#include "aacenc_utils.h" #include "aacenc_utils.h"
#include "aacenc_quantization.h" #include "aacenc_quantization.h"
static inline void conv_to_int32(int32_t *loc, float *samples, int num, float norm)
{
int i;
for (i = 0; i < num; i++)
loc[i] = ceilf((samples[i]/norm)*INT32_MAX);
}
static inline void conv_to_float(float *arr, int32_t *cof, int num)
{
int i;
for (i = 0; i < num; i++)
arr[i] = (float)cof[i]/INT32_MAX;
}
/* Input: quantized 4 bit coef, output: 1 if first (MSB) 2 bits are the same */
static inline int coef_test_compression(int coef)
{
int tmp = coef >> 2;
int res = ff_ctz(tmp);
if (res > 1)
return 1; /* ...00 -> compressable */
else if (res == 1)
return 0; /* ...10 -> uncompressable */
else if (ff_ctz(tmp >> 1) > 0)
return 0; /* ...0 1 -> uncompressable */
else
return 1; /* ...1 1 -> compressable */
}
static inline int compress_coef(int *coefs, int num) static inline int compress_coef(int *coefs, int num)
{ {
int i, res = 0; int i, c = 0;
for (i = 0; i < num; i++) for (i = 0; i < num; i++)
res += coef_test_compression(coefs[i]); c += coefs[i] < 4 || coefs[i] > 11;
return res == num ? 1 : 0; return c == num;
} }
/** /**
* Encode TNS data. * Encode TNS data.
* Coefficient compression saves a single bit. * Coefficient compression saves a single bit per coefficient.
*/ */
void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce) void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce)
{ {
int i, w, filt, coef_len, coef_compress; int i, w, filt, coef_len, coef_compress;
const int coef_res = MAX_LPC_PRECISION == 4 ? 1 : 0;
const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE; const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE;
put_bits(&s->pb, 1, !!sce->tns.present);
if (!sce->tns.present) if (!sce->tns.present)
return; return;
for (i = 0; i < sce->ics.num_windows; i++) { for (i = 0; i < sce->ics.num_windows; i++) {
put_bits(&s->pb, 2 - is8, sce->tns.n_filt[i]); put_bits(&s->pb, 2 - is8, sce->tns.n_filt[i]);
if (sce->tns.n_filt[i]) { if (sce->tns.n_filt[i]) {
put_bits(&s->pb, 1, !!coef_res); put_bits(&s->pb, 1, 1);
for (filt = 0; filt < sce->tns.n_filt[i]; filt++) { for (filt = 0; filt < sce->tns.n_filt[i]; filt++) {
put_bits(&s->pb, 6 - 2 * is8, sce->tns.length[i][filt]); put_bits(&s->pb, 6 - 2 * is8, sce->tns.length[i][filt]);
put_bits(&s->pb, 5 - 2 * is8, sce->tns.order[i][filt]); put_bits(&s->pb, 5 - 2 * is8, sce->tns.order[i][filt]);
@ -95,7 +63,7 @@ void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce)
sce->tns.order[i][filt]); sce->tns.order[i][filt]);
put_bits(&s->pb, 1, !!sce->tns.direction[i][filt]); put_bits(&s->pb, 1, !!sce->tns.direction[i][filt]);
put_bits(&s->pb, 1, !!coef_compress); put_bits(&s->pb, 1, !!coef_compress);
coef_len = coef_res + 3 - coef_compress; coef_len = 4 - coef_compress;
for (w = 0; w < sce->tns.order[i][filt]; w++) for (w = 0; w < sce->tns.order[i][filt]; w++)
put_bits(&s->pb, coef_len, sce->tns.coef_idx[i][filt][w]); put_bits(&s->pb, coef_len, sce->tns.coef_idx[i][filt][w]);
} }
@ -104,24 +72,25 @@ void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce)
} }
} }
static int process_tns_coeffs(TemporalNoiseShaping *tns, float *tns_coefs_raw, static void process_tns_coeffs(TemporalNoiseShaping *tns, double *coef_raw,
int order, int w, int filt) int *order_p, int w, int filt)
{ {
int i, j; int i, j, order = *order_p;
int *idx = tns->coef_idx[w][filt]; int *idx = tns->coef_idx[w][filt];
float *lpc = tns->coef[w][filt]; float *lpc = tns->coef[w][filt];
const int iqfac_p = ((1 << (MAX_LPC_PRECISION-1)) - 0.5)/(M_PI/2.0);
const int iqfac_m = ((1 << (MAX_LPC_PRECISION-1)) + 0.5)/(M_PI/2.0);
float temp[TNS_MAX_ORDER] = {0.0f}, out[TNS_MAX_ORDER] = {0.0f}; float temp[TNS_MAX_ORDER] = {0.0f}, out[TNS_MAX_ORDER] = {0.0f};
/* Quantization */ if (!order)
return;
/* Not what the specs say, but it's better */
for (i = 0; i < order; i++) { for (i = 0; i < order; i++) {
idx[i] = ceilf(asin(tns_coefs_raw[i])*((tns_coefs_raw[i] >= 0) ? iqfac_p : iqfac_m)); idx[i] = quant_array_idx(coef_raw[i], tns_tmp2_map_0_4, 16);
lpc[i] = 2*sin(idx[i]/((idx[i] >= 0) ? iqfac_p : iqfac_m)); lpc[i] = tns_tmp2_map_0_4[idx[i]];
} }
/* Trim any coeff less than 0.1f from the end */ /* Trim any coeff less than 0.1f from the end */
for (i = order; i > -1; i--) { for (i = order-1; i > -1; i--) {
lpc[i] = (fabs(lpc[i]) > 0.1f) ? lpc[i] : 0.0f; lpc[i] = (fabs(lpc[i]) > 0.1f) ? lpc[i] : 0.0f;
if (lpc[i] != 0.0 ) { if (lpc[i] != 0.0 ) {
order = i; order = i;
@ -129,9 +98,6 @@ static int process_tns_coeffs(TemporalNoiseShaping *tns, float *tns_coefs_raw,
} }
} }
if (!order)
return 0;
/* Step up procedure, convert to LPC coeffs */ /* Step up procedure, convert to LPC coeffs */
out[0] = 1.0f; out[0] = 1.0f;
for (i = 1; i <= order; i++) { for (i = 1; i <= order; i++) {
@ -143,35 +109,59 @@ static int process_tns_coeffs(TemporalNoiseShaping *tns, float *tns_coefs_raw,
} }
out[i] = lpc[i-1]; out[i] = lpc[i-1];
} }
*order_p = order;
memcpy(lpc, out, TNS_MAX_ORDER*sizeof(float)); memcpy(lpc, out, TNS_MAX_ORDER*sizeof(float));
return order;
} }
static void apply_tns_filter(float *out, float *in, int order, int direction, /* Apply TNS filter */
float *tns_coefs, int ltp_used, int w, int filt, void ff_aac_apply_tns(SingleChannelElement *sce)
int start_i, int len)
{ {
int i, j, inc, start = start_i; const int mmm = FFMIN(sce->ics.tns_max_bands, sce->ics.max_sfb);
float tmp[TNS_MAX_ORDER+1]; float *coef = sce->pcoeffs;
if (direction) { TemporalNoiseShaping *tns = &sce->tns;
inc = -1; int w, filt, m, i;
start = (start + len) - 1; int bottom, top, order, start, end, size, inc;
} else { float *lpc, tmp[TNS_MAX_ORDER+1];
inc = 1;
} return;
if (!ltp_used) { /* AR filter */
for (i = 0; i < len; i++, start += inc) for (w = 0; w < sce->ics.num_windows; w++) {
out[i] = in[start]; bottom = sce->ics.num_swb;
for (j = 1; j <= FFMIN(i, order); j++) for (filt = 0; filt < tns->n_filt[w]; filt++) {
out[i] += tns_coefs[j]*in[start - j*inc]; top = bottom;
} else { /* MA filter */ bottom = FFMAX(0, top - tns->length[w][filt]);
for (i = 0; i < len; i++, start += inc) { order = tns->order[w][filt];
tmp[0] = out[i] = in[start]; lpc = tns->coef[w][filt];
for (j = 1; j <= FFMIN(i, order); j++) if (!order)
out[i] += tmp[j]*tns_coefs[j]; continue;
for (j = order; j > 0; j--)
tmp[j] = tmp[j - 1]; start = sce->ics.swb_offset[FFMIN(bottom, mmm)];
end = sce->ics.swb_offset[FFMIN( top, mmm)];
if ((size = end - start) <= 0)
continue;
if (tns->direction[w][filt]) {
inc = -1;
start = end - 1;
} else {
inc = 1;
}
start += w * 128;
if (!sce->ics.ltp.present) {
// ar filter
for (m = 0; m < size; m++, start += inc)
for (i = 1; i <= FFMIN(m, order); i++)
coef[start] += coef[start - i * inc]*lpc[i - 1];
} else {
// ma filter
for (m = 0; m < size; m++, start += inc) {
tmp[0] = coef[start];
for (i = 1; i <= FFMIN(m, order); i++)
coef[start] += tmp[i]*lpc[i - 1];
for (i = order; i > 0; i--)
tmp[i] = tmp[i - 1];
}
}
} }
} }
} }
@ -179,57 +169,54 @@ static void apply_tns_filter(float *out, float *in, int order, int direction,
void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce) void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
{ {
TemporalNoiseShaping *tns = &sce->tns; TemporalNoiseShaping *tns = &sce->tns;
int w, g, order, sfb_start, sfb_len, coef_start, shift[MAX_LPC_ORDER], count = 0; int w, g, w2, prev_end_sfb = 0, count = 0;
const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE; const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE;
const int tns_max_order = is8 ? 7 : s->profile == FF_PROFILE_AAC_LOW ? 12 : TNS_MAX_ORDER; const int tns_max_order = is8 ? 7 : s->profile == FF_PROFILE_AAC_LOW ? 12 : TNS_MAX_ORDER;
const float freq_mult = mpeg4audio_sample_rates[s->samplerate_index]/(1024.0f/sce->ics.num_windows)/2.0f;
float max_coef = 0.0f;
sce->tns.present = 0;
return;
for (coef_start = 0; coef_start < 1024; coef_start++)
max_coef = FFMAX(max_coef, sce->pcoeffs[coef_start]);
for (w = 0; w < sce->ics.num_windows; w++) { for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
int filters = 1, start = 0, coef_len = 0; int order = 0, filters = 1;
int32_t conv_coeff[1024] = {0}; int sfb_start = 0, sfb_len = 0;
int32_t coefs_t[MAX_LPC_ORDER][MAX_LPC_ORDER] = {{0}}; int coef_start = 0, coef_len = 0;
float energy = 0.0f, threshold = 0.0f;
/* Determine start sfb + coef - excludes anything below threshold */ double coefs[MAX_LPC_ORDER][MAX_LPC_ORDER] = {{0}};
for (g = 0; g < sce->ics.num_swb; g++) { for (g = 0; g < sce->ics.num_swb; g++) {
if (start*freq_mult > TNS_LOW_LIMIT) { if (!sfb_start && w*16+g > TNS_LOW_LIMIT && w*16+g > prev_end_sfb) {
sfb_start = w*16+g; sfb_start = w*16+g;
sfb_len = (w+1)*16 + g - sfb_start; coef_start = sce->ics.swb_offset[sfb_start];
coef_start = sce->ics.swb_offset[sfb_start]; }
coef_len = sce->ics.swb_offset[sfb_start + sfb_len] - coef_start; if (sfb_start) {
break; for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
if (!sfb_len && band->energy < band->threshold*1.3f) {
sfb_len = (w+w2)*16+g - sfb_start;
prev_end_sfb = sfb_start + sfb_len;
coef_len = sce->ics.swb_offset[sfb_start + sfb_len] - coef_start;
break;
}
energy += band->energy;
threshold += band->threshold;
}
if (!sfb_len) {
sfb_len = (w+sce->ics.group_len[w])*16+g - sfb_start;
coef_len = sce->ics.swb_offset[sfb_start + sfb_len] - coef_start;
}
} }
start += sce->ics.swb_sizes[g];
} }
if (coef_len <= 0) if (sfb_len <= 0 || coef_len <= 0)
continue; continue;
if (coef_start + coef_len > 1024)
conv_to_int32(conv_coeff, &sce->pcoeffs[coef_start], coef_len, max_coef); coef_len = 1024 - coef_start;
/* LPC */ /* LPC */
order = ff_lpc_calc_coefs(&s->lpc, conv_coeff, coef_len, order = ff_lpc_calc_levinsion(&s->lpc, &sce->coeffs[coef_start], coef_len,
TNS_MIN_PRED_ORDER, tns_max_order, coefs, 0, tns_max_order, ORDER_METHOD_LOG);
32, coefs_t, shift,
FF_LPC_TYPE_LEVINSON, 10, if (energy > threshold) {
ORDER_METHOD_EST, MAX_LPC_SHIFT, 0) - 1;
/* Works surprisingly well, remember to tweak MAX_LPC_SHIFT if you want to play around with this */
if (shift[order] > 3) {
int direction = 0; int direction = 0;
float tns_coefs_raw[TNS_MAX_ORDER];
tns->n_filt[w] = filters++; tns->n_filt[w] = filters++;
conv_to_float(tns_coefs_raw, coefs_t[order], order);
for (g = 0; g < tns->n_filt[w]; g++) { for (g = 0; g < tns->n_filt[w]; g++) {
process_tns_coeffs(tns, tns_coefs_raw, order, w, g); process_tns_coeffs(tns, coefs[order], &order, w, g);
apply_tns_filter(&sce->coeffs[coef_start], sce->pcoeffs, order, direction, tns->coef[w][g],
sce->ics.ltp.present, w, g, coef_start, coef_len);
tns->order[w][g] = order; tns->order[w][g] = order;
tns->length[w][g] = sfb_len; tns->length[w][g] = sfb_len;
tns->direction[w][g] = direction; tns->direction[w][g] = direction;

@ -30,16 +30,11 @@
#include "aacenc.h" #include "aacenc.h"
/** Frequency in Hz for lower limit of TNS **/ /** Lower limit of TNS in SFBs **/
#define TNS_LOW_LIMIT 2150 #define TNS_LOW_LIMIT 24
/** LPC settings */
#define TNS_MIN_PRED_ORDER 0
#define MAX_LPC_PRECISION 4 /* 4 bits ltp coeff precision */
#define TNS_LPC_PASSES 2
#define MAX_LPC_SHIFT 4
void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce); void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce);
void ff_aac_apply_tns(SingleChannelElement *sce);
void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce); void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce);
#endif /* AVCODEC_AACENC_TNS_H */ #endif /* AVCODEC_AACENC_TNS_H */

Loading…
Cancel
Save