aacenc_pred: rework the way prediction is done

This commit completely alters the algorithm of prediction.
The original commit which introduced prediction was completely
incorrect to even remotely care about what the actual coefficients
contain or whether any options were enabled. Not my actual fault.

This commit treats prediction the way the decoder does and expects
to do: like lossy encryption. Everything related to prediction now
happens at the very end but just before quantization and encoding
of coefficients. On the decoder side, prediction happens before
anything has had a chance to even access the coefficients.

Also the original implementation had problems because it actually
touched the band_type of special bands which already had their
scalefactor indices marked and it's a wonder the asserion wasn't
triggered when transmitting those.

Overall, this now drastically increases audio quality and you should
think about enabling it if you don't plan on playing anything encoded
on really old low power ultra-embedded devices since they might not
support decoding of prediction or AAC-Main. Though the specifications
were written ages ago and as times change so do the FLOPS.

Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
pull/146/head
Rostislav Pehlivanov 9 years ago
parent 949a4892fa
commit 44ddee945a
  1. 2
      libavcodec/aac.h
  2. 4
      libavcodec/aaccoder.c
  3. 41
      libavcodec/aacenc.c
  4. 1
      libavcodec/aacenc.h
  5. 361
      libavcodec/aacenc_pred.c
  6. 6
      libavcodec/aacenc_pred.h

@ -247,7 +247,7 @@ typedef struct SingleChannelElement {
TemporalNoiseShaping tns;
Pulse pulse;
enum BandType band_type[128]; ///< band types
enum BandType orig_band_type[128]; ///< band type backups for undoing prediction
enum BandType band_alt[128]; ///< alternative band type (used by encoder)
int band_type_run_end[120]; ///< band type run end points
INTFLOAT sf[120]; ///< scalefactors
int sf_idx[128]; ///< scalefactor indices (used by encoder)

@ -964,7 +964,6 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_encode_main_pred,
ff_aac_adjust_common_prediction,
ff_aac_apply_main_pred,
ff_aac_update_main_pred,
set_special_band_scalefactors,
search_for_pns,
ff_aac_search_for_tns,
@ -980,7 +979,6 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_encode_main_pred,
ff_aac_adjust_common_prediction,
ff_aac_apply_main_pred,
ff_aac_update_main_pred,
set_special_band_scalefactors,
search_for_pns,
ff_aac_search_for_tns,
@ -996,7 +994,6 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_encode_main_pred,
ff_aac_adjust_common_prediction,
ff_aac_apply_main_pred,
ff_aac_update_main_pred,
set_special_band_scalefactors,
search_for_pns,
ff_aac_search_for_tns,
@ -1012,7 +1009,6 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_encode_main_pred,
ff_aac_adjust_common_prediction,
ff_aac_apply_main_pred,
ff_aac_update_main_pred,
set_special_band_scalefactors,
search_for_pns,
ff_aac_search_for_tns,

@ -354,15 +354,15 @@ static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
start += sce->ics.swb_sizes[i];
continue;
}
for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++) {
s->coder->quantize_and_encode_band(s, &s->pb,
&sce->coeffs[start + w2*128],
&sce->pqcoeffs[start + w2*128],
sce->ics.swb_sizes[i],
NULL, sce->ics.swb_sizes[i],
sce->sf_idx[w*16 + i],
sce->band_type[w*16 + i],
s->lambda,
sce->ics.window_clipping[w]);
}
start += sce->ics.swb_sizes[i];
}
}
@ -609,12 +609,8 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
s->coder->search_for_pns(s, avctx, sce);
if (s->options.tns && s->coder->search_for_tns)
s->coder->search_for_tns(s, sce);
if (s->options.pred && s->coder->search_for_pred)
s->coder->search_for_pred(s, sce);
if (sce->tns.present)
tns_mode = 1;
if (sce->ics.predictor_present)
pred_mode = 1;
}
s->cur_channel = start_ch;
if (s->options.stereo_mode && cpe->common_window) {
@ -631,15 +627,26 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
s->coder->search_for_is(s, avctx, cpe);
if (cpe->is_mode) is_mode = 1;
}
if (s->options.pred && s->coder->adjust_common_prediction)
s->coder->adjust_common_prediction(s, cpe);
if (s->coder->set_special_band_scalefactors)
for (ch = 0; ch < chans; ch++)
s->coder->set_special_band_scalefactors(s, &cpe->ch[ch]);
if (s->options.pred && s->coder->apply_main_pred)
for (ch = 0; ch < chans; ch++)
s->coder->apply_main_pred(s, &cpe->ch[ch]);
adjust_frame_information(cpe, chans);
for (ch = 0; ch < chans; ch++) {
sce = &cpe->ch[ch];
s->cur_channel = start_ch + ch;
if (s->options.pred && s->coder->search_for_pred)
s->coder->search_for_pred(s, sce);
if (cpe->ch[ch].ics.predictor_present) pred_mode = 1;
}
if (s->options.pred && s->coder->adjust_common_prediction)
s->coder->adjust_common_prediction(s, cpe);
for (ch = 0; ch < chans; ch++) {
sce = &cpe->ch[ch];
s->cur_channel = start_ch + ch;
if (s->options.pred && s->coder->apply_main_pred)
s->coder->apply_main_pred(s, sce);
}
s->cur_channel = start_ch;
if (chans == 2) {
put_bits(&s->pb, 1, cpe->common_window);
if (cpe->common_window) {
@ -676,16 +683,6 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
} while (1);
// update predictor state
if (s->options.pred && s->coder->update_main_pred) {
for (i = 0; i < s->chan_map[0]; i++) {
cpe = &s->cpe[i];
for (ch = 0; ch < chans; ch++)
s->coder->update_main_pred(s, &cpe->ch[ch],
(cpe->common_window && !ch) ? cpe : NULL);
}
}
put_bits(&s->pb, 3, TYPE_END);
flush_put_bits(&s->pb);
avctx->frame_bits = put_bits_count(&s->pb);

@ -63,7 +63,6 @@ typedef struct AACCoefficientsEncoder {
void (*encode_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
void (*adjust_common_prediction)(struct AACEncContext *s, ChannelElement *cpe);
void (*apply_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
void (*update_main_pred)(struct AACEncContext *s, SingleChannelElement *sce, ChannelElement *cpe);
void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce);
void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce);
void (*search_for_tns)(struct AACEncContext *s, SingleChannelElement *sce);

@ -21,15 +21,22 @@
/**
* @file
* AAC encoder main prediction
* AAC encoder Intensity Stereo
* @author Rostislav Pehlivanov ( atomnuker gmail com )
*/
#include "aactab.h"
#include "aacenc_pred.h"
#include "aacenc_utils.h"
#include "aacenc_is.h" /* <- Needed for common window distortions */
#include "aacenc_quantization.h"
#define RESTORE_PRED(sce, sfb) \
if (sce->ics.prediction_used[sfb]) {\
sce->ics.prediction_used[sfb] = 0;\
sce->band_type[sfb] = sce->band_alt[sfb];\
}
static inline float flt16_round(float pf)
{
union av_intfloat32 tmp;
@ -54,73 +61,57 @@ static inline float flt16_trunc(float pf)
return pun.f;
}
static inline void predict(PredictorState *ps, float *coef, float *rcoef,
int output_enable)
static inline void predict(PredictorState *ps, float *coef, float *rcoef, int set)
{
const float a = 0.953125; // 61.0 / 64
float k2;
float r0 = ps->r0, r1 = ps->r1;
float cor0 = ps->cor0, cor1 = ps->cor1;
float var0 = ps->var0, var1 = ps->var1;
ps->k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
ps->x_est = flt16_round(ps->k1*r0 + k2*r1);
if (output_enable)
*coef -= ps->x_est;
else
*rcoef = *coef - ps->x_est;
}
static inline void update_predictor(PredictorState *ps, float qcoef)
{
const float alpha = 0.90625; // 29.0 / 32
const float a = 0.953125; // 61.0 / 64
float k1 = ps->k1;
float r0 = ps->r0;
float r1 = ps->r1;
float e0 = qcoef + ps->x_est;
float e1 = e0 - k1 * r0;
float cor0 = ps->cor0, cor1 = ps->cor1;
float var0 = ps->var0, var1 = ps->var1;
const float alpha = 0.90625; // 29.0 / 32
const float k1 = ps->k1;
const float r0 = ps->r0, r1 = ps->r1;
const float cor0 = ps->cor0, cor1 = ps->cor1;
const float var0 = ps->var0, var1 = ps->var1;
const float e0 = *coef - ps->x_est;
const float e1 = e0 - k1 * r0;
if (set)
*coef = e0;
ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
ps->r0 = flt16_trunc(a * e0);
ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
ps->r0 = flt16_trunc(a * e0);
/* Prediction for next frame */
ps->k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
*rcoef = ps->x_est = flt16_round(ps->k1*ps->r0 + k2*ps->r1);
}
static inline void reset_predict_state(PredictorState *ps)
{
ps->r0 = 0.0f;
ps->r1 = 0.0f;
ps->cor0 = 0.0f;
ps->cor1 = 0.0f;
ps->var0 = 1.0f;
ps->var1 = 1.0f;
ps->k1 = 0.0f;
ps->x_est= 0.0f;
ps->r0 = 0.0f;
ps->r1 = 0.0f;
ps->k1 = 0.0f;
ps->cor0 = 0.0f;
ps->cor1 = 0.0f;
ps->var0 = 1.0f;
ps->var1 = 1.0f;
ps->x_est = 0.0f;
}
static inline void reset_all_predictors(SingleChannelElement *sce)
static inline void reset_all_predictors(PredictorState *ps)
{
int i;
for (i = 0; i < MAX_PREDICTORS; i++)
reset_predict_state(&sce->predictor_state[i]);
for (i = 1; i < 31; i++)
sce->ics.predictor_reset_count[i] = 0;
reset_predict_state(&ps[i]);
}
static inline void reset_predictor_group(SingleChannelElement *sce, int group_num)
{
int i;
PredictorState *ps = sce->predictor_state;
sce->ics.predictor_reset_count[group_num] = 0;
for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
reset_predict_state(&ps[i]);
}
@ -128,136 +119,89 @@ static inline void reset_predictor_group(SingleChannelElement *sce, int group_nu
void ff_aac_apply_main_pred(AACEncContext *s, SingleChannelElement *sce)
{
int sfb, k;
const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
for (sfb = 0; sfb < ff_aac_pred_sfb_max[s->samplerate_index]; sfb++) {
for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++)
for (sfb = 0; sfb < pmax; sfb++) {
for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
predict(&sce->predictor_state[k], &sce->coeffs[k], &sce->prcoeffs[k],
(sce->ics.predictor_present && sce->ics.prediction_used[sfb]));
}
}
}
static void decode_joint_stereo(ChannelElement *cpe)
{
int i, w, w2, g;
SingleChannelElement *sce0 = &cpe->ch[0];
SingleChannelElement *sce1 = &cpe->ch[1];
IndividualChannelStream *ics;
for (i = 0; i < MAX_PREDICTORS; i++)
sce0->prcoeffs[i] = sce0->predictor_state[i].x_est;
ics = &sce0->ics;
for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
for (w2 = 0; w2 < ics->group_len[w]; w2++) {
int start = (w+w2) * 128;
for (g = 0; g < ics->num_swb; g++) {
int sfb = w*16 + g;
//apply Intensity stereo coeffs transformation
if (cpe->is_mask[sfb]) {
int p = -1 + 2 * (sce1->band_type[sfb] - 14);
float rscale = ff_aac_pow2sf_tab[-sce1->sf_idx[sfb] + POW_SF2_ZERO];
p *= 1 - 2 * cpe->ms_mask[sfb];
for (i = 0; i < ics->swb_sizes[g]; i++) {
sce0->pqcoeffs[start+i] = (sce0->prcoeffs[start+i] + p*sce0->pqcoeffs[start+i]) * rscale;
}
} else if (cpe->ms_mask[sfb] &&
sce0->band_type[sfb] < NOISE_BT &&
sce1->band_type[sfb] < NOISE_BT) {
for (i = 0; i < ics->swb_sizes[g]; i++) {
float L = sce0->pqcoeffs[start+i] + sce1->pqcoeffs[start+i];
float R = sce0->pqcoeffs[start+i] - sce1->pqcoeffs[start+i];
sce0->pqcoeffs[start+i] = L;
sce1->pqcoeffs[start+i] = R;
}
}
start += ics->swb_sizes[g];
sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
}
}
if (sce->ics.predictor_reset_group) {
reset_predictor_group(sce, sce->ics.predictor_reset_group);
}
} else {
reset_all_predictors(sce->predictor_state);
}
}
static inline void prepare_predictors(SingleChannelElement *sce)
{
int k;
for (k = 0; k < MAX_PREDICTORS; k++)
predict(&sce->predictor_state[k], &sce->coeffs[k], &sce->prcoeffs[k], 0);
}
void ff_aac_update_main_pred(AACEncContext *s, SingleChannelElement *sce, ChannelElement *cpe)
{
int k;
if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE)
return;
if (cpe && cpe->common_window)
decode_joint_stereo(cpe);
for (k = 0; k < MAX_PREDICTORS; k++)
update_predictor(&sce->predictor_state[k], sce->pqcoeffs[k]);
if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
reset_all_predictors(sce);
}
if (sce->ics.predictor_reset_group)
reset_predictor_group(sce, sce->ics.predictor_reset_group);
}
/* If inc == 0 check if it returns 0 to see if you can reset freely */
/* If inc = 0 you can check if this returns 0 to see if you can reset freely */
static inline int update_counters(IndividualChannelStream *ics, int inc)
{
int i, rg = 0;
int i;
for (i = 1; i < 31; i++) {
ics->predictor_reset_count[i] += inc;
if (!rg && ics->predictor_reset_count[i] > PRED_RESET_FRAME_MIN)
rg = i; /* Reset this immediately */
if (ics->predictor_reset_count[i] > PRED_RESET_FRAME_MIN)
return i; /* Reset this immediately */
}
return rg;
return 0;
}
void ff_aac_adjust_common_prediction(AACEncContext *s, ChannelElement *cpe)
{
int start, w, g, count = 0;
int start, w, w2, g, i, count = 0;
SingleChannelElement *sce0 = &cpe->ch[0];
SingleChannelElement *sce1 = &cpe->ch[1];
const int pmax0 = FFMIN(sce0->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
const int pmax1 = FFMIN(sce1->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
const int pmax = FFMIN(pmax0, pmax1);
if (!cpe->common_window || sce0->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE)
if (!cpe->common_window ||
sce0->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE ||
sce1->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE)
return;
/* Predict if IS or MS is on and at least one channel is marked or when both are */
for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
start = 0;
for (g = 0; g < sce0->ics.num_swb; g++) {
int sfb = w*16+g;
if (sfb < PRED_SFB_START || sfb > ff_aac_pred_sfb_max[s->samplerate_index]) {
;
} else if ((cpe->is_mask[sfb] || cpe->ms_mask[sfb]) &&
(sce0->ics.prediction_used[sfb] || sce1->ics.prediction_used[sfb])) {
sce0->ics.prediction_used[sfb] = sce1->ics.prediction_used[sfb] = 1;
count++;
} else if (sce0->ics.prediction_used[sfb] && sce1->ics.prediction_used[sfb]) {
int sum = sce0->ics.prediction_used[sfb] + sce1->ics.prediction_used[sfb];
float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f;
struct AACISError ph_err1, ph_err2, *erf;
if (sfb < PRED_SFB_START || sfb > pmax || sum != 2) {
RESTORE_PRED(sce0, sfb);
RESTORE_PRED(sce1, sfb);
start += sce0->ics.swb_sizes[g];
continue;
}
for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
float coef0 = sce0->pcoeffs[start+(w+w2)*128+i];
float coef1 = sce1->pcoeffs[start+(w+w2)*128+i];
ener0 += coef0*coef0;
ener1 += coef1*coef1;
ener01 += (coef0 + coef1)*(coef0 + coef1);
}
}
ph_err1 = ff_aac_is_encoding_err(s, cpe, start, w, g,
ener0, ener1, ener01, -1);
ph_err2 = ff_aac_is_encoding_err(s, cpe, start, w, g,
ener0, ener1, ener01, +1);
erf = ph_err1.error < ph_err2.error ? &ph_err1 : &ph_err2;
if (erf->pass) {
sce0->ics.prediction_used[sfb] = 1;
sce1->ics.prediction_used[sfb] = 1;
count++;
} else {
/* Restore band types, if changed - prediction never sets > RESERVED_BT */
if (sce0->ics.prediction_used[sfb] && sce0->band_type[sfb] < RESERVED_BT)
sce0->band_type[sfb] = sce0->orig_band_type[sfb];
if (sce1->ics.prediction_used[sfb] && sce1->band_type[sfb] < RESERVED_BT)
sce1->band_type[sfb] = sce1->orig_band_type[sfb];
sce0->ics.prediction_used[sfb] = sce1->ics.prediction_used[sfb] = 0;
RESTORE_PRED(sce0, sfb);
RESTORE_PRED(sce1, sfb);
}
start += sce0->ics.swb_sizes[g];
}
}
sce1->ics.predictor_present = sce0->ics.predictor_present = !!count;
if (!count)
return;
sce1->ics.predictor_reset_group = sce0->ics.predictor_reset_group;
}
static void update_pred_resets(SingleChannelElement *sce)
@ -266,14 +210,12 @@ static void update_pred_resets(SingleChannelElement *sce)
float avg_frame = 0.0f;
IndividualChannelStream *ics = &sce->ics;
/* Some other code probably chose the reset group */
if (ics->predictor_reset_group)
return;
/* Update the counters and immediately update any frame behind schedule */
if ((ics->predictor_reset_group = update_counters(&sce->ics, 1)))
return;
for (i = 1; i < 31; i++) {
/* Count-based */
if (ics->predictor_reset_count[i] > max_frame) {
max_group_id_c = i;
max_frame = ics->predictor_reset_count[i];
@ -281,8 +223,7 @@ static void update_pred_resets(SingleChannelElement *sce)
avg_frame = (ics->predictor_reset_count[i] + avg_frame)/2;
}
if (avg_frame*2 > max_frame && max_frame > PRED_RESET_MIN ||
max_frame > (2*PRED_RESET_MIN)/3) {
if (max_frame > PRED_RESET_MIN) {
ics->predictor_reset_group = max_group_id_c;
} else {
ics->predictor_reset_group = 0;
@ -291,56 +232,91 @@ static void update_pred_resets(SingleChannelElement *sce)
void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce)
{
int sfb, i, count = 0;
float *O34 = &s->scoefs[256*0], *P34 = &s->scoefs[256*1];
int cost_coeffs = PRICE_OFFSET;
int cost_pred = 1+(sce->ics.predictor_reset_group ? 5 : 0) +
FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
int sfb, i, count = 0, cost_coeffs = 0, cost_pred = 0;
const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
float *O34 = &s->scoefs[128*0], *P34 = &s->scoefs[128*1];
float *SENT = &s->scoefs[128*2], *S34 = &s->scoefs[128*3];
float *QERR = &s->scoefs[128*4];
memcpy(sce->orig_band_type, sce->band_type, 128*sizeof(enum BandType));
if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
sce->ics.predictor_present = 0;
return;
}
if (!sce->ics.predictor_initialized ||
sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
reset_all_predictors(sce);
if (!sce->ics.predictor_initialized) {
reset_all_predictors(sce->predictor_state);
sce->ics.predictor_initialized = 1;
memcpy(sce->prcoeffs, sce->coeffs, 1024*sizeof(float));
for (i = 1; i < 31; i++)
sce->ics.predictor_reset_count[i] = i;
sce->ics.predictor_initialized = 1;
}
update_pred_resets(sce);
prepare_predictors(sce);
sce->ics.predictor_reset_group = 0;
for (sfb = PRED_SFB_START; sfb < ff_aac_pred_sfb_max[s->samplerate_index]; sfb++) {
float dist1 = 0.0f, dist2 = 0.0f;
int swb_start = sce->ics.swb_offset[sfb];
int swb_len = sce->ics.swb_offset[sfb + 1] - swb_start;
int cb1 = sce->band_type[sfb], cb2, bits1 = 0, bits2 = 0;
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[sfb];
abs_pow34_v(O34, &sce->coeffs[swb_start], swb_len);
abs_pow34_v(P34, &sce->prcoeffs[swb_start], swb_len);
cb2 = find_min_book(find_max_val(1, swb_len, P34), sce->sf_idx[sfb]);
if (cb2 <= cb1) {
dist1 += quantize_band_cost(s, &sce->coeffs[swb_start], O34, swb_len,
sce->sf_idx[sfb], cb1, s->lambda / band->threshold,
INFINITY, &bits1, 0);
dist2 += quantize_band_cost(s, &sce->prcoeffs[swb_start], P34, swb_len,
sce->sf_idx[sfb], cb2, s->lambda / band->threshold,
INFINITY, &bits2, 0);
if (dist2 <= dist1) {
sce->ics.prediction_used[sfb] = 1;
sce->band_type[sfb] = cb2;
count++;
}
cost_coeffs += bits1;
cost_pred += bits2;
memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
for (sfb = PRED_SFB_START; sfb < pmax; sfb++) {
int cost1, cost2, cb_p;
float dist1, dist2, dist_spec_err = 0.0f;
const int cb_n = sce->band_type[sfb];
const int start_coef = sce->ics.swb_offset[sfb];
const int num_coeffs = sce->ics.swb_offset[sfb + 1] - start_coef;
const FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[sfb];
if (start_coef + num_coeffs > MAX_PREDICTORS)
continue;
/* Normal coefficients */
abs_pow34_v(O34, &sce->coeffs[start_coef], num_coeffs);
dist1 = quantize_and_encode_band_cost(s, NULL, &sce->coeffs[start_coef], NULL,
O34, num_coeffs, sce->sf_idx[sfb],
cb_n, s->lambda / band->threshold, INFINITY, &cost1, 0);
cost_coeffs += cost1;
/* Encoded coefficients - needed for #bits, band type and quant. error */
for (i = 0; i < num_coeffs; i++)
SENT[i] = sce->coeffs[start_coef + i] - sce->prcoeffs[start_coef + i];
abs_pow34_v(S34, SENT, num_coeffs);
if (cb_n < RESERVED_BT)
cb_p = find_min_book(find_max_val(1, num_coeffs, S34), sce->sf_idx[sfb]);
else
cb_p = cb_n;
quantize_and_encode_band_cost(s, NULL, SENT, QERR, S34, num_coeffs,
sce->sf_idx[sfb], cb_p, s->lambda / band->threshold, INFINITY,
&cost2, 0);
/* Reconstructed coefficients - needed for distortion measurements */
for (i = 0; i < num_coeffs; i++)
sce->prcoeffs[start_coef + i] += QERR[i] != 0.0f ? (sce->prcoeffs[start_coef + i] - QERR[i]) : 0.0f;
abs_pow34_v(P34, &sce->prcoeffs[start_coef], num_coeffs);
if (cb_n < RESERVED_BT)
cb_p = find_min_book(find_max_val(1, num_coeffs, P34), sce->sf_idx[sfb]);
else
cb_p = cb_n;
dist2 = quantize_and_encode_band_cost(s, NULL, &sce->prcoeffs[start_coef], NULL,
P34, num_coeffs, sce->sf_idx[sfb],
cb_p, s->lambda / band->threshold, INFINITY, NULL, 0);
for (i = 0; i < num_coeffs; i++)
dist_spec_err += (O34[i] - P34[i])*(O34[i] - P34[i]);
dist_spec_err *= s->lambda / band->threshold;
dist2 += dist_spec_err;
if (dist2 <= dist1 && cb_p <= cb_n) {
cost_pred += cost2;
sce->ics.prediction_used[sfb] = 1;
sce->band_alt[sfb] = cb_n;
sce->band_type[sfb] = cb_p;
count++;
} else {
cost_pred += cost1;
sce->band_alt[sfb] = cb_p;
}
}
if (count && cost_pred > cost_coeffs) {
memset(sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
memcpy(sce->band_type, sce->orig_band_type, sizeof(sce->band_type));
if (count && cost_coeffs < cost_pred) {
count = 0;
for (sfb = PRED_SFB_START; sfb < pmax; sfb++)
RESTORE_PRED(sce, sfb);
memset(&sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
}
sce->ics.predictor_present = !!count;
@ -352,14 +328,15 @@ void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce)
void ff_aac_encode_main_pred(AACEncContext *s, SingleChannelElement *sce)
{
int sfb;
IndividualChannelStream *ics = &sce->ics;
const int pmax = FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
if (!sce->ics.predictor_present ||
sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE)
if (!ics->predictor_present)
return;
put_bits(&s->pb, 1, !!sce->ics.predictor_reset_group);
if (sce->ics.predictor_reset_group)
put_bits(&s->pb, 5, sce->ics.predictor_reset_group);
for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); sfb++)
put_bits(&s->pb, 1, sce->ics.prediction_used[sfb]);
put_bits(&s->pb, 1, !!ics->predictor_reset_group);
if (ics->predictor_reset_group)
put_bits(&s->pb, 5, ics->predictor_reset_group);
for (sfb = 0; sfb < pmax; sfb++)
put_bits(&s->pb, 1, ics->prediction_used[sfb]);
}

@ -34,16 +34,12 @@
#define PRED_RESET_FRAME_MIN 240
/* Any frame with less than this amount of frames since last reset is ok */
#define PRED_RESET_MIN 128
#define PRED_RESET_MIN 64
/* Raise to filter any low frequency artifacts due to prediction */
#define PRED_SFB_START 10
/* Offset for the number of bits to encode normal coefficients */
#define PRICE_OFFSET 440
void ff_aac_apply_main_pred(AACEncContext *s, SingleChannelElement *sce);
void ff_aac_update_main_pred(AACEncContext *s, SingleChannelElement *sce, ChannelElement *cpe);
void ff_aac_adjust_common_prediction(AACEncContext *s, ChannelElement *cpe);
void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce);
void ff_aac_encode_main_pred(AACEncContext *s, SingleChannelElement *sce);

Loading…
Cancel
Save