From 44ddee945a2e8cfc1b7074de6e35595ed41da4e4 Mon Sep 17 00:00:00 2001 From: Rostislav Pehlivanov Date: Sat, 29 Aug 2015 06:34:08 +0100 Subject: [PATCH] aacenc_pred: rework the way prediction is done This commit completely alters the algorithm of prediction. The original commit which introduced prediction was completely incorrect to even remotely care about what the actual coefficients contain or whether any options were enabled. Not my actual fault. This commit treats prediction the way the decoder does and expects to do: like lossy encryption. Everything related to prediction now happens at the very end but just before quantization and encoding of coefficients. On the decoder side, prediction happens before anything has had a chance to even access the coefficients. Also the original implementation had problems because it actually touched the band_type of special bands which already had their scalefactor indices marked and it's a wonder the asserion wasn't triggered when transmitting those. Overall, this now drastically increases audio quality and you should think about enabling it if you don't plan on playing anything encoded on really old low power ultra-embedded devices since they might not support decoding of prediction or AAC-Main. Though the specifications were written ages ago and as times change so do the FLOPS. Signed-off-by: Rostislav Pehlivanov --- libavcodec/aac.h | 2 +- libavcodec/aaccoder.c | 4 - libavcodec/aacenc.c | 41 +++-- libavcodec/aacenc.h | 1 - libavcodec/aacenc_pred.c | 361 ++++++++++++++++++--------------------- libavcodec/aacenc_pred.h | 6 +- 6 files changed, 190 insertions(+), 225 deletions(-) diff --git a/libavcodec/aac.h b/libavcodec/aac.h index dc6b439854..aa4b53b549 100644 --- a/libavcodec/aac.h +++ b/libavcodec/aac.h @@ -247,7 +247,7 @@ typedef struct SingleChannelElement { TemporalNoiseShaping tns; Pulse pulse; enum BandType band_type[128]; ///< band types - enum BandType orig_band_type[128]; ///< band type backups for undoing prediction + enum BandType band_alt[128]; ///< alternative band type (used by encoder) int band_type_run_end[120]; ///< band type run end points INTFLOAT sf[120]; ///< scalefactors int sf_idx[128]; ///< scalefactor indices (used by encoder) diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c index c273c54078..8256f961c0 100644 --- a/libavcodec/aaccoder.c +++ b/libavcodec/aaccoder.c @@ -964,7 +964,6 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { ff_aac_encode_main_pred, ff_aac_adjust_common_prediction, ff_aac_apply_main_pred, - ff_aac_update_main_pred, set_special_band_scalefactors, search_for_pns, ff_aac_search_for_tns, @@ -980,7 +979,6 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { ff_aac_encode_main_pred, ff_aac_adjust_common_prediction, ff_aac_apply_main_pred, - ff_aac_update_main_pred, set_special_band_scalefactors, search_for_pns, ff_aac_search_for_tns, @@ -996,7 +994,6 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { ff_aac_encode_main_pred, ff_aac_adjust_common_prediction, ff_aac_apply_main_pred, - ff_aac_update_main_pred, set_special_band_scalefactors, search_for_pns, ff_aac_search_for_tns, @@ -1012,7 +1009,6 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { ff_aac_encode_main_pred, ff_aac_adjust_common_prediction, ff_aac_apply_main_pred, - ff_aac_update_main_pred, set_special_band_scalefactors, search_for_pns, ff_aac_search_for_tns, diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c index ece2328f66..a7c43c7411 100644 --- a/libavcodec/aacenc.c +++ b/libavcodec/aacenc.c @@ -354,15 +354,15 @@ static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce) start += sce->ics.swb_sizes[i]; continue; } - for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++) + for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++) { s->coder->quantize_and_encode_band(s, &s->pb, &sce->coeffs[start + w2*128], - &sce->pqcoeffs[start + w2*128], - sce->ics.swb_sizes[i], + NULL, sce->ics.swb_sizes[i], sce->sf_idx[w*16 + i], sce->band_type[w*16 + i], s->lambda, sce->ics.window_clipping[w]); + } start += sce->ics.swb_sizes[i]; } } @@ -609,12 +609,8 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, s->coder->search_for_pns(s, avctx, sce); if (s->options.tns && s->coder->search_for_tns) s->coder->search_for_tns(s, sce); - if (s->options.pred && s->coder->search_for_pred) - s->coder->search_for_pred(s, sce); if (sce->tns.present) tns_mode = 1; - if (sce->ics.predictor_present) - pred_mode = 1; } s->cur_channel = start_ch; if (s->options.stereo_mode && cpe->common_window) { @@ -631,15 +627,26 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, s->coder->search_for_is(s, avctx, cpe); if (cpe->is_mode) is_mode = 1; } - if (s->options.pred && s->coder->adjust_common_prediction) - s->coder->adjust_common_prediction(s, cpe); if (s->coder->set_special_band_scalefactors) for (ch = 0; ch < chans; ch++) s->coder->set_special_band_scalefactors(s, &cpe->ch[ch]); - if (s->options.pred && s->coder->apply_main_pred) - for (ch = 0; ch < chans; ch++) - s->coder->apply_main_pred(s, &cpe->ch[ch]); adjust_frame_information(cpe, chans); + for (ch = 0; ch < chans; ch++) { + sce = &cpe->ch[ch]; + s->cur_channel = start_ch + ch; + if (s->options.pred && s->coder->search_for_pred) + s->coder->search_for_pred(s, sce); + if (cpe->ch[ch].ics.predictor_present) pred_mode = 1; + } + if (s->options.pred && s->coder->adjust_common_prediction) + s->coder->adjust_common_prediction(s, cpe); + for (ch = 0; ch < chans; ch++) { + sce = &cpe->ch[ch]; + s->cur_channel = start_ch + ch; + if (s->options.pred && s->coder->apply_main_pred) + s->coder->apply_main_pred(s, sce); + } + s->cur_channel = start_ch; if (chans == 2) { put_bits(&s->pb, 1, cpe->common_window); if (cpe->common_window) { @@ -676,16 +683,6 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, } while (1); - // update predictor state - if (s->options.pred && s->coder->update_main_pred) { - for (i = 0; i < s->chan_map[0]; i++) { - cpe = &s->cpe[i]; - for (ch = 0; ch < chans; ch++) - s->coder->update_main_pred(s, &cpe->ch[ch], - (cpe->common_window && !ch) ? cpe : NULL); - } - } - put_bits(&s->pb, 3, TYPE_END); flush_put_bits(&s->pb); avctx->frame_bits = put_bits_count(&s->pb); diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h index 9ee854f317..69a8c01d82 100644 --- a/libavcodec/aacenc.h +++ b/libavcodec/aacenc.h @@ -63,7 +63,6 @@ typedef struct AACCoefficientsEncoder { void (*encode_main_pred)(struct AACEncContext *s, SingleChannelElement *sce); void (*adjust_common_prediction)(struct AACEncContext *s, ChannelElement *cpe); void (*apply_main_pred)(struct AACEncContext *s, SingleChannelElement *sce); - void (*update_main_pred)(struct AACEncContext *s, SingleChannelElement *sce, ChannelElement *cpe); void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce); void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce); void (*search_for_tns)(struct AACEncContext *s, SingleChannelElement *sce); diff --git a/libavcodec/aacenc_pred.c b/libavcodec/aacenc_pred.c index c638b70b18..fafe0029d4 100644 --- a/libavcodec/aacenc_pred.c +++ b/libavcodec/aacenc_pred.c @@ -21,15 +21,22 @@ /** * @file - * AAC encoder main prediction + * AAC encoder Intensity Stereo * @author Rostislav Pehlivanov ( atomnuker gmail com ) */ #include "aactab.h" #include "aacenc_pred.h" #include "aacenc_utils.h" +#include "aacenc_is.h" /* <- Needed for common window distortions */ #include "aacenc_quantization.h" +#define RESTORE_PRED(sce, sfb) \ + if (sce->ics.prediction_used[sfb]) {\ + sce->ics.prediction_used[sfb] = 0;\ + sce->band_type[sfb] = sce->band_alt[sfb];\ + } + static inline float flt16_round(float pf) { union av_intfloat32 tmp; @@ -54,73 +61,57 @@ static inline float flt16_trunc(float pf) return pun.f; } -static inline void predict(PredictorState *ps, float *coef, float *rcoef, - int output_enable) +static inline void predict(PredictorState *ps, float *coef, float *rcoef, int set) { - const float a = 0.953125; // 61.0 / 64 float k2; - float r0 = ps->r0, r1 = ps->r1; - float cor0 = ps->cor0, cor1 = ps->cor1; - float var0 = ps->var0, var1 = ps->var1; - - ps->k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0; - k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0; - - ps->x_est = flt16_round(ps->k1*r0 + k2*r1); - - if (output_enable) - *coef -= ps->x_est; - else - *rcoef = *coef - ps->x_est; -} - -static inline void update_predictor(PredictorState *ps, float qcoef) -{ - const float alpha = 0.90625; // 29.0 / 32 const float a = 0.953125; // 61.0 / 64 - float k1 = ps->k1; - float r0 = ps->r0; - float r1 = ps->r1; - float e0 = qcoef + ps->x_est; - float e1 = e0 - k1 * r0; - float cor0 = ps->cor0, cor1 = ps->cor1; - float var0 = ps->var0, var1 = ps->var1; + const float alpha = 0.90625; // 29.0 / 32 + const float k1 = ps->k1; + const float r0 = ps->r0, r1 = ps->r1; + const float cor0 = ps->cor0, cor1 = ps->cor1; + const float var0 = ps->var0, var1 = ps->var1; + const float e0 = *coef - ps->x_est; + const float e1 = e0 - k1 * r0; + + if (set) + *coef = e0; ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1); ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1)); ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0); ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0)); + ps->r1 = flt16_trunc(a * (r0 - k1 * e0)); + ps->r0 = flt16_trunc(a * e0); - ps->r1 = flt16_trunc(a * (r0 - k1 * e0)); - ps->r0 = flt16_trunc(a * e0); + /* Prediction for next frame */ + ps->k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0; + k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0; + *rcoef = ps->x_est = flt16_round(ps->k1*ps->r0 + k2*ps->r1); } static inline void reset_predict_state(PredictorState *ps) { - ps->r0 = 0.0f; - ps->r1 = 0.0f; - ps->cor0 = 0.0f; - ps->cor1 = 0.0f; - ps->var0 = 1.0f; - ps->var1 = 1.0f; - ps->k1 = 0.0f; - ps->x_est= 0.0f; + ps->r0 = 0.0f; + ps->r1 = 0.0f; + ps->k1 = 0.0f; + ps->cor0 = 0.0f; + ps->cor1 = 0.0f; + ps->var0 = 1.0f; + ps->var1 = 1.0f; + ps->x_est = 0.0f; } -static inline void reset_all_predictors(SingleChannelElement *sce) +static inline void reset_all_predictors(PredictorState *ps) { int i; for (i = 0; i < MAX_PREDICTORS; i++) - reset_predict_state(&sce->predictor_state[i]); - for (i = 1; i < 31; i++) - sce->ics.predictor_reset_count[i] = 0; + reset_predict_state(&ps[i]); } static inline void reset_predictor_group(SingleChannelElement *sce, int group_num) { int i; PredictorState *ps = sce->predictor_state; - sce->ics.predictor_reset_count[group_num] = 0; for (i = group_num - 1; i < MAX_PREDICTORS; i += 30) reset_predict_state(&ps[i]); } @@ -128,136 +119,89 @@ static inline void reset_predictor_group(SingleChannelElement *sce, int group_nu void ff_aac_apply_main_pred(AACEncContext *s, SingleChannelElement *sce) { int sfb, k; + const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) { - for (sfb = 0; sfb < ff_aac_pred_sfb_max[s->samplerate_index]; sfb++) { - for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) + for (sfb = 0; sfb < pmax; sfb++) { + for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) { predict(&sce->predictor_state[k], &sce->coeffs[k], &sce->prcoeffs[k], - (sce->ics.predictor_present && sce->ics.prediction_used[sfb])); - } - } -} - -static void decode_joint_stereo(ChannelElement *cpe) -{ - int i, w, w2, g; - SingleChannelElement *sce0 = &cpe->ch[0]; - SingleChannelElement *sce1 = &cpe->ch[1]; - IndividualChannelStream *ics; - - for (i = 0; i < MAX_PREDICTORS; i++) - sce0->prcoeffs[i] = sce0->predictor_state[i].x_est; - - ics = &sce0->ics; - for (w = 0; w < ics->num_windows; w += ics->group_len[w]) { - for (w2 = 0; w2 < ics->group_len[w]; w2++) { - int start = (w+w2) * 128; - for (g = 0; g < ics->num_swb; g++) { - int sfb = w*16 + g; - //apply Intensity stereo coeffs transformation - if (cpe->is_mask[sfb]) { - int p = -1 + 2 * (sce1->band_type[sfb] - 14); - float rscale = ff_aac_pow2sf_tab[-sce1->sf_idx[sfb] + POW_SF2_ZERO]; - p *= 1 - 2 * cpe->ms_mask[sfb]; - for (i = 0; i < ics->swb_sizes[g]; i++) { - sce0->pqcoeffs[start+i] = (sce0->prcoeffs[start+i] + p*sce0->pqcoeffs[start+i]) * rscale; - } - } else if (cpe->ms_mask[sfb] && - sce0->band_type[sfb] < NOISE_BT && - sce1->band_type[sfb] < NOISE_BT) { - for (i = 0; i < ics->swb_sizes[g]; i++) { - float L = sce0->pqcoeffs[start+i] + sce1->pqcoeffs[start+i]; - float R = sce0->pqcoeffs[start+i] - sce1->pqcoeffs[start+i]; - sce0->pqcoeffs[start+i] = L; - sce1->pqcoeffs[start+i] = R; - } - } - start += ics->swb_sizes[g]; + sce->ics.predictor_present && sce->ics.prediction_used[sfb]); } } + if (sce->ics.predictor_reset_group) { + reset_predictor_group(sce, sce->ics.predictor_reset_group); + } + } else { + reset_all_predictors(sce->predictor_state); } } -static inline void prepare_predictors(SingleChannelElement *sce) -{ - int k; - for (k = 0; k < MAX_PREDICTORS; k++) - predict(&sce->predictor_state[k], &sce->coeffs[k], &sce->prcoeffs[k], 0); -} - -void ff_aac_update_main_pred(AACEncContext *s, SingleChannelElement *sce, ChannelElement *cpe) -{ - int k; - - if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) - return; - - if (cpe && cpe->common_window) - decode_joint_stereo(cpe); - - for (k = 0; k < MAX_PREDICTORS; k++) - update_predictor(&sce->predictor_state[k], sce->pqcoeffs[k]); - - if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) { - reset_all_predictors(sce); - } - - if (sce->ics.predictor_reset_group) - reset_predictor_group(sce, sce->ics.predictor_reset_group); -} - -/* If inc == 0 check if it returns 0 to see if you can reset freely */ +/* If inc = 0 you can check if this returns 0 to see if you can reset freely */ static inline int update_counters(IndividualChannelStream *ics, int inc) { - int i, rg = 0; + int i; for (i = 1; i < 31; i++) { ics->predictor_reset_count[i] += inc; - if (!rg && ics->predictor_reset_count[i] > PRED_RESET_FRAME_MIN) - rg = i; /* Reset this immediately */ + if (ics->predictor_reset_count[i] > PRED_RESET_FRAME_MIN) + return i; /* Reset this immediately */ } - return rg; + return 0; } void ff_aac_adjust_common_prediction(AACEncContext *s, ChannelElement *cpe) { - int start, w, g, count = 0; + int start, w, w2, g, i, count = 0; SingleChannelElement *sce0 = &cpe->ch[0]; SingleChannelElement *sce1 = &cpe->ch[1]; + const int pmax0 = FFMIN(sce0->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); + const int pmax1 = FFMIN(sce1->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); + const int pmax = FFMIN(pmax0, pmax1); - if (!cpe->common_window || sce0->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) + if (!cpe->common_window || + sce0->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE || + sce1->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) return; - /* Predict if IS or MS is on and at least one channel is marked or when both are */ for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) { start = 0; for (g = 0; g < sce0->ics.num_swb; g++) { int sfb = w*16+g; - if (sfb < PRED_SFB_START || sfb > ff_aac_pred_sfb_max[s->samplerate_index]) { - ; - } else if ((cpe->is_mask[sfb] || cpe->ms_mask[sfb]) && - (sce0->ics.prediction_used[sfb] || sce1->ics.prediction_used[sfb])) { - sce0->ics.prediction_used[sfb] = sce1->ics.prediction_used[sfb] = 1; - count++; - } else if (sce0->ics.prediction_used[sfb] && sce1->ics.prediction_used[sfb]) { + int sum = sce0->ics.prediction_used[sfb] + sce1->ics.prediction_used[sfb]; + float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f; + struct AACISError ph_err1, ph_err2, *erf; + if (sfb < PRED_SFB_START || sfb > pmax || sum != 2) { + RESTORE_PRED(sce0, sfb); + RESTORE_PRED(sce1, sfb); + start += sce0->ics.swb_sizes[g]; + continue; + } + for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { + for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { + float coef0 = sce0->pcoeffs[start+(w+w2)*128+i]; + float coef1 = sce1->pcoeffs[start+(w+w2)*128+i]; + ener0 += coef0*coef0; + ener1 += coef1*coef1; + ener01 += (coef0 + coef1)*(coef0 + coef1); + } + } + ph_err1 = ff_aac_is_encoding_err(s, cpe, start, w, g, + ener0, ener1, ener01, -1); + ph_err2 = ff_aac_is_encoding_err(s, cpe, start, w, g, + ener0, ener1, ener01, +1); + erf = ph_err1.error < ph_err2.error ? &ph_err1 : &ph_err2; + if (erf->pass) { + sce0->ics.prediction_used[sfb] = 1; + sce1->ics.prediction_used[sfb] = 1; count++; } else { - /* Restore band types, if changed - prediction never sets > RESERVED_BT */ - if (sce0->ics.prediction_used[sfb] && sce0->band_type[sfb] < RESERVED_BT) - sce0->band_type[sfb] = sce0->orig_band_type[sfb]; - if (sce1->ics.prediction_used[sfb] && sce1->band_type[sfb] < RESERVED_BT) - sce1->band_type[sfb] = sce1->orig_band_type[sfb]; - sce0->ics.prediction_used[sfb] = sce1->ics.prediction_used[sfb] = 0; + RESTORE_PRED(sce0, sfb); + RESTORE_PRED(sce1, sfb); } start += sce0->ics.swb_sizes[g]; } } sce1->ics.predictor_present = sce0->ics.predictor_present = !!count; - - if (!count) - return; - - sce1->ics.predictor_reset_group = sce0->ics.predictor_reset_group; } static void update_pred_resets(SingleChannelElement *sce) @@ -266,14 +210,12 @@ static void update_pred_resets(SingleChannelElement *sce) float avg_frame = 0.0f; IndividualChannelStream *ics = &sce->ics; - /* Some other code probably chose the reset group */ - if (ics->predictor_reset_group) - return; - + /* Update the counters and immediately update any frame behind schedule */ if ((ics->predictor_reset_group = update_counters(&sce->ics, 1))) return; for (i = 1; i < 31; i++) { + /* Count-based */ if (ics->predictor_reset_count[i] > max_frame) { max_group_id_c = i; max_frame = ics->predictor_reset_count[i]; @@ -281,8 +223,7 @@ static void update_pred_resets(SingleChannelElement *sce) avg_frame = (ics->predictor_reset_count[i] + avg_frame)/2; } - if (avg_frame*2 > max_frame && max_frame > PRED_RESET_MIN || - max_frame > (2*PRED_RESET_MIN)/3) { + if (max_frame > PRED_RESET_MIN) { ics->predictor_reset_group = max_group_id_c; } else { ics->predictor_reset_group = 0; @@ -291,56 +232,91 @@ static void update_pred_resets(SingleChannelElement *sce) void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce) { - int sfb, i, count = 0; - float *O34 = &s->scoefs[256*0], *P34 = &s->scoefs[256*1]; - int cost_coeffs = PRICE_OFFSET; - int cost_pred = 1+(sce->ics.predictor_reset_group ? 5 : 0) + - FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); + int sfb, i, count = 0, cost_coeffs = 0, cost_pred = 0; + const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); + float *O34 = &s->scoefs[128*0], *P34 = &s->scoefs[128*1]; + float *SENT = &s->scoefs[128*2], *S34 = &s->scoefs[128*3]; + float *QERR = &s->scoefs[128*4]; - memcpy(sce->orig_band_type, sce->band_type, 128*sizeof(enum BandType)); + if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) { + sce->ics.predictor_present = 0; + return; + } - if (!sce->ics.predictor_initialized || - sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) { - reset_all_predictors(sce); + if (!sce->ics.predictor_initialized) { + reset_all_predictors(sce->predictor_state); + sce->ics.predictor_initialized = 1; + memcpy(sce->prcoeffs, sce->coeffs, 1024*sizeof(float)); for (i = 1; i < 31; i++) sce->ics.predictor_reset_count[i] = i; - sce->ics.predictor_initialized = 1; } update_pred_resets(sce); - prepare_predictors(sce); - sce->ics.predictor_reset_group = 0; - - for (sfb = PRED_SFB_START; sfb < ff_aac_pred_sfb_max[s->samplerate_index]; sfb++) { - float dist1 = 0.0f, dist2 = 0.0f; - int swb_start = sce->ics.swb_offset[sfb]; - int swb_len = sce->ics.swb_offset[sfb + 1] - swb_start; - int cb1 = sce->band_type[sfb], cb2, bits1 = 0, bits2 = 0; - FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[sfb]; - abs_pow34_v(O34, &sce->coeffs[swb_start], swb_len); - abs_pow34_v(P34, &sce->prcoeffs[swb_start], swb_len); - cb2 = find_min_book(find_max_val(1, swb_len, P34), sce->sf_idx[sfb]); - if (cb2 <= cb1) { - dist1 += quantize_band_cost(s, &sce->coeffs[swb_start], O34, swb_len, - sce->sf_idx[sfb], cb1, s->lambda / band->threshold, - INFINITY, &bits1, 0); - dist2 += quantize_band_cost(s, &sce->prcoeffs[swb_start], P34, swb_len, - sce->sf_idx[sfb], cb2, s->lambda / band->threshold, - INFINITY, &bits2, 0); - if (dist2 <= dist1) { - sce->ics.prediction_used[sfb] = 1; - sce->band_type[sfb] = cb2; - count++; - } - cost_coeffs += bits1; - cost_pred += bits2; + memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type)); + + for (sfb = PRED_SFB_START; sfb < pmax; sfb++) { + int cost1, cost2, cb_p; + float dist1, dist2, dist_spec_err = 0.0f; + const int cb_n = sce->band_type[sfb]; + const int start_coef = sce->ics.swb_offset[sfb]; + const int num_coeffs = sce->ics.swb_offset[sfb + 1] - start_coef; + const FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[sfb]; + + if (start_coef + num_coeffs > MAX_PREDICTORS) + continue; + + /* Normal coefficients */ + abs_pow34_v(O34, &sce->coeffs[start_coef], num_coeffs); + dist1 = quantize_and_encode_band_cost(s, NULL, &sce->coeffs[start_coef], NULL, + O34, num_coeffs, sce->sf_idx[sfb], + cb_n, s->lambda / band->threshold, INFINITY, &cost1, 0); + cost_coeffs += cost1; + + /* Encoded coefficients - needed for #bits, band type and quant. error */ + for (i = 0; i < num_coeffs; i++) + SENT[i] = sce->coeffs[start_coef + i] - sce->prcoeffs[start_coef + i]; + abs_pow34_v(S34, SENT, num_coeffs); + if (cb_n < RESERVED_BT) + cb_p = find_min_book(find_max_val(1, num_coeffs, S34), sce->sf_idx[sfb]); + else + cb_p = cb_n; + quantize_and_encode_band_cost(s, NULL, SENT, QERR, S34, num_coeffs, + sce->sf_idx[sfb], cb_p, s->lambda / band->threshold, INFINITY, + &cost2, 0); + + /* Reconstructed coefficients - needed for distortion measurements */ + for (i = 0; i < num_coeffs; i++) + sce->prcoeffs[start_coef + i] += QERR[i] != 0.0f ? (sce->prcoeffs[start_coef + i] - QERR[i]) : 0.0f; + abs_pow34_v(P34, &sce->prcoeffs[start_coef], num_coeffs); + if (cb_n < RESERVED_BT) + cb_p = find_min_book(find_max_val(1, num_coeffs, P34), sce->sf_idx[sfb]); + else + cb_p = cb_n; + dist2 = quantize_and_encode_band_cost(s, NULL, &sce->prcoeffs[start_coef], NULL, + P34, num_coeffs, sce->sf_idx[sfb], + cb_p, s->lambda / band->threshold, INFINITY, NULL, 0); + for (i = 0; i < num_coeffs; i++) + dist_spec_err += (O34[i] - P34[i])*(O34[i] - P34[i]); + dist_spec_err *= s->lambda / band->threshold; + dist2 += dist_spec_err; + + if (dist2 <= dist1 && cb_p <= cb_n) { + cost_pred += cost2; + sce->ics.prediction_used[sfb] = 1; + sce->band_alt[sfb] = cb_n; + sce->band_type[sfb] = cb_p; + count++; + } else { + cost_pred += cost1; + sce->band_alt[sfb] = cb_p; } } - if (count && cost_pred > cost_coeffs) { - memset(sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used)); - memcpy(sce->band_type, sce->orig_band_type, sizeof(sce->band_type)); + if (count && cost_coeffs < cost_pred) { count = 0; + for (sfb = PRED_SFB_START; sfb < pmax; sfb++) + RESTORE_PRED(sce, sfb); + memset(&sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used)); } sce->ics.predictor_present = !!count; @@ -352,14 +328,15 @@ void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce) void ff_aac_encode_main_pred(AACEncContext *s, SingleChannelElement *sce) { int sfb; + IndividualChannelStream *ics = &sce->ics; + const int pmax = FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); - if (!sce->ics.predictor_present || - sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) + if (!ics->predictor_present) return; - put_bits(&s->pb, 1, !!sce->ics.predictor_reset_group); - if (sce->ics.predictor_reset_group) - put_bits(&s->pb, 5, sce->ics.predictor_reset_group); - for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); sfb++) - put_bits(&s->pb, 1, sce->ics.prediction_used[sfb]); + put_bits(&s->pb, 1, !!ics->predictor_reset_group); + if (ics->predictor_reset_group) + put_bits(&s->pb, 5, ics->predictor_reset_group); + for (sfb = 0; sfb < pmax; sfb++) + put_bits(&s->pb, 1, ics->prediction_used[sfb]); } diff --git a/libavcodec/aacenc_pred.h b/libavcodec/aacenc_pred.h index 0694c8a56c..999af869f5 100644 --- a/libavcodec/aacenc_pred.h +++ b/libavcodec/aacenc_pred.h @@ -34,16 +34,12 @@ #define PRED_RESET_FRAME_MIN 240 /* Any frame with less than this amount of frames since last reset is ok */ -#define PRED_RESET_MIN 128 +#define PRED_RESET_MIN 64 /* Raise to filter any low frequency artifacts due to prediction */ #define PRED_SFB_START 10 -/* Offset for the number of bits to encode normal coefficients */ -#define PRICE_OFFSET 440 - void ff_aac_apply_main_pred(AACEncContext *s, SingleChannelElement *sce); -void ff_aac_update_main_pred(AACEncContext *s, SingleChannelElement *sce, ChannelElement *cpe); void ff_aac_adjust_common_prediction(AACEncContext *s, ChannelElement *cpe); void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce); void ff_aac_encode_main_pred(AACEncContext *s, SingleChannelElement *sce);