mirror of https://github.com/FFmpeg/FFmpeg.git
Originally committed as revision 19375 to svn://svn.ffmpeg.org/ffmpeg/trunkrelease/0.6
parent
5e039e1b4c
commit
78e65cd772
10 changed files with 1992 additions and 115 deletions
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,71 @@ |
|||||||
|
/*
|
||||||
|
* AAC encoder |
||||||
|
* Copyright (C) 2008 Konstantin Shishkov |
||||||
|
* |
||||||
|
* This file is part of FFmpeg. |
||||||
|
* |
||||||
|
* FFmpeg is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* FFmpeg is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with FFmpeg; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef AVCODEC_AACENC_H |
||||||
|
#define AVCODEC_AACENC_H |
||||||
|
|
||||||
|
#include "avcodec.h" |
||||||
|
#include "put_bits.h" |
||||||
|
#include "dsputil.h" |
||||||
|
|
||||||
|
#include "aac.h" |
||||||
|
|
||||||
|
#include "psymodel.h" |
||||||
|
|
||||||
|
struct AACEncContext; |
||||||
|
|
||||||
|
typedef struct AACCoefficientsEncoder{ |
||||||
|
void (*search_for_quantizers)(AVCodecContext *avctx, struct AACEncContext *s, |
||||||
|
SingleChannelElement *sce, const float lambda); |
||||||
|
void (*encode_window_bands_info)(struct AACEncContext *s, SingleChannelElement *sce, |
||||||
|
int win, int group_len, const float lambda); |
||||||
|
void (*quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, int size, |
||||||
|
int scale_idx, int cb, const float lambda); |
||||||
|
void (*search_for_ms)(struct AACEncContext *s, ChannelElement *cpe, const float lambda); |
||||||
|
}AACCoefficientsEncoder; |
||||||
|
|
||||||
|
extern AACCoefficientsEncoder ff_aac_coders[]; |
||||||
|
|
||||||
|
/**
|
||||||
|
* AAC encoder context |
||||||
|
*/ |
||||||
|
typedef struct AACEncContext { |
||||||
|
PutBitContext pb; |
||||||
|
MDCTContext mdct1024; ///< long (1024 samples) frame transform context
|
||||||
|
MDCTContext mdct128; ///< short (128 samples) frame transform context
|
||||||
|
DSPContext dsp; |
||||||
|
DECLARE_ALIGNED_16(FFTSample, output[2048]); ///< temporary buffer for MDCT input coefficients
|
||||||
|
int16_t* samples; ///< saved preprocessed input
|
||||||
|
|
||||||
|
int samplerate_index; ///< MPEG-4 samplerate index
|
||||||
|
|
||||||
|
ChannelElement *cpe; ///< channel elements
|
||||||
|
FFPsyContext psy; |
||||||
|
struct FFPsyPreprocessContext* psypp; |
||||||
|
AACCoefficientsEncoder *coder; |
||||||
|
int cur_channel; |
||||||
|
int last_frame; |
||||||
|
float lambda; |
||||||
|
DECLARE_ALIGNED_16(int, qcoefs[96][2]); ///< quantized coefficients
|
||||||
|
DECLARE_ALIGNED_16(float, scoefs[1024]); ///< scaled coefficients
|
||||||
|
} AACEncContext; |
||||||
|
|
||||||
|
#endif /* AVCODEC_AACENC_H */ |
@ -0,0 +1,130 @@ |
|||||||
|
/*
|
||||||
|
* audio encoder psychoacoustic model |
||||||
|
* Copyright (C) 2008 Konstantin Shishkov |
||||||
|
* |
||||||
|
* This file is part of FFmpeg. |
||||||
|
* |
||||||
|
* FFmpeg is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* FFmpeg is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with FFmpeg; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "avcodec.h" |
||||||
|
#include "psymodel.h" |
||||||
|
#include "iirfilter.h" |
||||||
|
|
||||||
|
extern const FFPsyModel ff_aac_psy_model; |
||||||
|
|
||||||
|
av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, |
||||||
|
int num_lens, |
||||||
|
const uint8_t **bands, const int* num_bands) |
||||||
|
{ |
||||||
|
ctx->avctx = avctx; |
||||||
|
ctx->psy_bands = av_mallocz(sizeof(FFPsyBand) * PSY_MAX_BANDS * avctx->channels); |
||||||
|
ctx->bands = av_malloc (sizeof(ctx->bands[0]) * num_lens); |
||||||
|
ctx->num_bands = av_malloc (sizeof(ctx->num_bands[0]) * num_lens); |
||||||
|
memcpy(ctx->bands, bands, sizeof(ctx->bands[0]) * num_lens); |
||||||
|
memcpy(ctx->num_bands, num_bands, sizeof(ctx->num_bands[0]) * num_lens); |
||||||
|
switch(ctx->avctx->codec_id){ |
||||||
|
case CODEC_ID_AAC: |
||||||
|
ctx->model = &ff_aac_psy_model; |
||||||
|
break; |
||||||
|
} |
||||||
|
if(ctx->model->init) |
||||||
|
return ctx->model->init(ctx); |
||||||
|
return 0; |
||||||
|
} |
||||||
|
|
||||||
|
FFPsyWindowInfo ff_psy_suggest_window(FFPsyContext *ctx, |
||||||
|
const int16_t *audio, const int16_t *la, |
||||||
|
int channel, int prev_type) |
||||||
|
{ |
||||||
|
return ctx->model->window(ctx, audio, la, channel, prev_type); |
||||||
|
} |
||||||
|
|
||||||
|
void ff_psy_set_band_info(FFPsyContext *ctx, int channel, |
||||||
|
const float *coeffs, FFPsyWindowInfo *wi) |
||||||
|
{ |
||||||
|
ctx->model->analyze(ctx, channel, coeffs, wi); |
||||||
|
} |
||||||
|
|
||||||
|
av_cold void ff_psy_end(FFPsyContext *ctx) |
||||||
|
{ |
||||||
|
if(ctx->model->end) |
||||||
|
ctx->model->end(ctx); |
||||||
|
av_freep(&ctx->bands); |
||||||
|
av_freep(&ctx->num_bands); |
||||||
|
av_freep(&ctx->psy_bands); |
||||||
|
} |
||||||
|
|
||||||
|
typedef struct FFPsyPreprocessContext{ |
||||||
|
AVCodecContext *avctx; |
||||||
|
float stereo_att; |
||||||
|
struct FFIIRFilterCoeffs *fcoeffs; |
||||||
|
struct FFIIRFilterState **fstate; |
||||||
|
}FFPsyPreprocessContext; |
||||||
|
|
||||||
|
#define FILT_ORDER 4 |
||||||
|
|
||||||
|
av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *avctx) |
||||||
|
{ |
||||||
|
FFPsyPreprocessContext *ctx; |
||||||
|
int i; |
||||||
|
float cutoff_coeff; |
||||||
|
ctx = av_mallocz(sizeof(FFPsyPreprocessContext)); |
||||||
|
ctx->avctx = avctx; |
||||||
|
|
||||||
|
if(avctx->flags & CODEC_FLAG_QSCALE) |
||||||
|
cutoff_coeff = 1.0f / av_clip(1 + avctx->global_quality / FF_QUALITY_SCALE, 1, 8); |
||||||
|
else |
||||||
|
cutoff_coeff = avctx->bit_rate / (4.0f * avctx->sample_rate * avctx->channels); |
||||||
|
|
||||||
|
ctx->fcoeffs = ff_iir_filter_init_coeffs(FF_FILTER_TYPE_BUTTERWORTH, FF_FILTER_MODE_LOWPASS, |
||||||
|
FILT_ORDER, cutoff_coeff, 0.0, 0.0); |
||||||
|
if(ctx->fcoeffs){ |
||||||
|
ctx->fstate = av_mallocz(sizeof(ctx->fstate[0]) * avctx->channels); |
||||||
|
for(i = 0; i < avctx->channels; i++) |
||||||
|
ctx->fstate[i] = ff_iir_filter_init_state(FILT_ORDER); |
||||||
|
} |
||||||
|
return ctx; |
||||||
|
} |
||||||
|
|
||||||
|
void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, |
||||||
|
const int16_t *audio, int16_t *dest, |
||||||
|
int tag, int channels) |
||||||
|
{ |
||||||
|
int ch, i; |
||||||
|
if(ctx->fstate){ |
||||||
|
for(ch = 0; ch < channels; ch++){ |
||||||
|
ff_iir_filter(ctx->fcoeffs, ctx->fstate[tag+ch], ctx->avctx->frame_size, |
||||||
|
audio + ch, ctx->avctx->channels, |
||||||
|
dest + ch, ctx->avctx->channels); |
||||||
|
} |
||||||
|
}else{ |
||||||
|
for(ch = 0; ch < channels; ch++){ |
||||||
|
for(i = 0; i < ctx->avctx->frame_size; i++) |
||||||
|
dest[i*ctx->avctx->channels + ch] = audio[i*ctx->avctx->channels + ch]; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
av_cold void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx) |
||||||
|
{ |
||||||
|
int i; |
||||||
|
ff_iir_filter_free_coeffs(ctx->fcoeffs); |
||||||
|
if (ctx->fstate) |
||||||
|
for (i = 0; i < ctx->avctx->channels; i++) |
||||||
|
ff_iir_filter_free_state(ctx->fstate[i]); |
||||||
|
av_freep(&ctx->fstate); |
||||||
|
} |
||||||
|
|
@ -0,0 +1,158 @@ |
|||||||
|
/*
|
||||||
|
* audio encoder psychoacoustic model |
||||||
|
* Copyright (C) 2008 Konstantin Shishkov |
||||||
|
* |
||||||
|
* This file is part of FFmpeg. |
||||||
|
* |
||||||
|
* FFmpeg is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* FFmpeg is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with FFmpeg; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef AVCODEC_PSYMODEL_H |
||||||
|
#define AVCODEC_PSYMODEL_H |
||||||
|
|
||||||
|
#include "avcodec.h" |
||||||
|
|
||||||
|
/** maximum possible number of bands */ |
||||||
|
#define PSY_MAX_BANDS 128 |
||||||
|
|
||||||
|
/**
|
||||||
|
* single band psychoacoustic information |
||||||
|
*/ |
||||||
|
typedef struct FFPsyBand{ |
||||||
|
int bits; |
||||||
|
float energy; |
||||||
|
float threshold; |
||||||
|
float distortion; |
||||||
|
float perceptual_weight; |
||||||
|
}FFPsyBand; |
||||||
|
|
||||||
|
/**
|
||||||
|
* windowing related information |
||||||
|
*/ |
||||||
|
typedef struct FFPsyWindowInfo{ |
||||||
|
int window_type[3]; ///< window type (short/long/transitional, etc.) - current, previous and next
|
||||||
|
int window_shape; ///< window shape (sine/KBD/whatever)
|
||||||
|
int num_windows; ///< number of windows in a frame
|
||||||
|
int grouping[8]; ///< window grouping (for e.g. AAC)
|
||||||
|
int *window_sizes; ///< sequence of window sizes inside one frame (for eg. WMA)
|
||||||
|
}FFPsyWindowInfo; |
||||||
|
|
||||||
|
/**
|
||||||
|
* context used by psychoacoustic model |
||||||
|
*/ |
||||||
|
typedef struct FFPsyContext{ |
||||||
|
AVCodecContext *avctx; ///< encoder context
|
||||||
|
const struct FFPsyModel *model; ///< encoder-specific model functions
|
||||||
|
|
||||||
|
FFPsyBand *psy_bands; ///< frame bands information
|
||||||
|
|
||||||
|
uint8_t **bands; ///< scalefactor band sizes for possible frame sizes
|
||||||
|
int *num_bands; ///< number of scalefactor bands for possible frame sizes
|
||||||
|
int num_lens; ///< number of scalefactor band sets
|
||||||
|
|
||||||
|
void* model_priv_data; ///< psychoacoustic model implementation private data
|
||||||
|
}FFPsyContext; |
||||||
|
|
||||||
|
/**
|
||||||
|
* codec-specific psychoacoustic model implementation |
||||||
|
*/ |
||||||
|
typedef struct FFPsyModel { |
||||||
|
const char *name; |
||||||
|
int (*init) (FFPsyContext *apc); |
||||||
|
FFPsyWindowInfo (*window)(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type); |
||||||
|
void (*analyze)(FFPsyContext *ctx, int channel, const float *coeffs, FFPsyWindowInfo *wi); |
||||||
|
void (*end) (FFPsyContext *apc); |
||||||
|
}FFPsyModel; |
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize psychoacoustic model. |
||||||
|
* |
||||||
|
* @param ctx model context |
||||||
|
* @param avctx codec context |
||||||
|
* @param num_lens number of possible frame lengths |
||||||
|
* @param bands scalefactor band lengths for all frame lengths |
||||||
|
* @param num_bands number of scalefactor bands for all frame lengths |
||||||
|
* |
||||||
|
* @return zero if successful, a negative value if not |
||||||
|
*/ |
||||||
|
av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, |
||||||
|
int num_lens, |
||||||
|
const uint8_t **bands, const int* num_bands); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Suggest window sequence for channel. |
||||||
|
* |
||||||
|
* @param ctx model context |
||||||
|
* @param audio samples for the current frame |
||||||
|
* @param la lookahead samples (NULL when unavailable) |
||||||
|
* @param channel number of channel element to analyze |
||||||
|
* @param prev_type previous window type |
||||||
|
* |
||||||
|
* @return suggested window information in a structure |
||||||
|
*/ |
||||||
|
FFPsyWindowInfo ff_psy_suggest_window(FFPsyContext *ctx, |
||||||
|
const int16_t *audio, const int16_t *la, |
||||||
|
int channel, int prev_type); |
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform psychoacoustic analysis and set band info (threshold, energy). |
||||||
|
* |
||||||
|
* @param ctx model context |
||||||
|
* @param channel audio channel number |
||||||
|
* @param coeffs pointer to the transformed coefficients |
||||||
|
* @param wi window information |
||||||
|
*/ |
||||||
|
void ff_psy_set_band_info(FFPsyContext *ctx, int channel, const float *coeffs, |
||||||
|
FFPsyWindowInfo *wi); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Cleanup model context at the end. |
||||||
|
* |
||||||
|
* @param ctx model context |
||||||
|
*/ |
||||||
|
av_cold void ff_psy_end(FFPsyContext *ctx); |
||||||
|
|
||||||
|
|
||||||
|
/**************************************************************************
|
||||||
|
* Audio preprocessing stuff. * |
||||||
|
* This should be moved into some audio filter eventually. * |
||||||
|
**************************************************************************/ |
||||||
|
struct FFPsyPreprocessContext; |
||||||
|
|
||||||
|
/**
|
||||||
|
* psychoacoustic model audio preprocessing initialization |
||||||
|
*/ |
||||||
|
av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *avctx); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Preprocess several channel in audio frame in order to compress it better. |
||||||
|
* |
||||||
|
* @param ctx preprocessing context |
||||||
|
* @param audio samples to preprocess |
||||||
|
* @param dest place to put filtered samples |
||||||
|
* @param tag channel number |
||||||
|
* @param channels number of channel to preprocess (some additional work may be done on stereo pair) |
||||||
|
*/ |
||||||
|
void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, |
||||||
|
const int16_t *audio, int16_t *dest, |
||||||
|
int tag, int channels); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Cleanup audio preprocessing module. |
||||||
|
*/ |
||||||
|
av_cold void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx); |
||||||
|
|
||||||
|
#endif /* AVCODEC_PSYMODEL_H */ |
Loading…
Reference in new issue