mirror of https://github.com/FFmpeg/FFmpeg.git
Originally committed as revision 19375 to svn://svn.ffmpeg.org/ffmpeg/trunkrelease/0.6
parent
5e039e1b4c
commit
78e65cd772
10 changed files with 1992 additions and 115 deletions
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,71 @@ |
||||
/*
|
||||
* AAC encoder |
||||
* Copyright (C) 2008 Konstantin Shishkov |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVCODEC_AACENC_H |
||||
#define AVCODEC_AACENC_H |
||||
|
||||
#include "avcodec.h" |
||||
#include "put_bits.h" |
||||
#include "dsputil.h" |
||||
|
||||
#include "aac.h" |
||||
|
||||
#include "psymodel.h" |
||||
|
||||
struct AACEncContext; |
||||
|
||||
typedef struct AACCoefficientsEncoder{ |
||||
void (*search_for_quantizers)(AVCodecContext *avctx, struct AACEncContext *s, |
||||
SingleChannelElement *sce, const float lambda); |
||||
void (*encode_window_bands_info)(struct AACEncContext *s, SingleChannelElement *sce, |
||||
int win, int group_len, const float lambda); |
||||
void (*quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, int size, |
||||
int scale_idx, int cb, const float lambda); |
||||
void (*search_for_ms)(struct AACEncContext *s, ChannelElement *cpe, const float lambda); |
||||
}AACCoefficientsEncoder; |
||||
|
||||
extern AACCoefficientsEncoder ff_aac_coders[]; |
||||
|
||||
/**
|
||||
* AAC encoder context |
||||
*/ |
||||
typedef struct AACEncContext { |
||||
PutBitContext pb; |
||||
MDCTContext mdct1024; ///< long (1024 samples) frame transform context
|
||||
MDCTContext mdct128; ///< short (128 samples) frame transform context
|
||||
DSPContext dsp; |
||||
DECLARE_ALIGNED_16(FFTSample, output[2048]); ///< temporary buffer for MDCT input coefficients
|
||||
int16_t* samples; ///< saved preprocessed input
|
||||
|
||||
int samplerate_index; ///< MPEG-4 samplerate index
|
||||
|
||||
ChannelElement *cpe; ///< channel elements
|
||||
FFPsyContext psy; |
||||
struct FFPsyPreprocessContext* psypp; |
||||
AACCoefficientsEncoder *coder; |
||||
int cur_channel; |
||||
int last_frame; |
||||
float lambda; |
||||
DECLARE_ALIGNED_16(int, qcoefs[96][2]); ///< quantized coefficients
|
||||
DECLARE_ALIGNED_16(float, scoefs[1024]); ///< scaled coefficients
|
||||
} AACEncContext; |
||||
|
||||
#endif /* AVCODEC_AACENC_H */ |
@ -0,0 +1,130 @@ |
||||
/*
|
||||
* audio encoder psychoacoustic model |
||||
* Copyright (C) 2008 Konstantin Shishkov |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "avcodec.h" |
||||
#include "psymodel.h" |
||||
#include "iirfilter.h" |
||||
|
||||
extern const FFPsyModel ff_aac_psy_model; |
||||
|
||||
av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, |
||||
int num_lens, |
||||
const uint8_t **bands, const int* num_bands) |
||||
{ |
||||
ctx->avctx = avctx; |
||||
ctx->psy_bands = av_mallocz(sizeof(FFPsyBand) * PSY_MAX_BANDS * avctx->channels); |
||||
ctx->bands = av_malloc (sizeof(ctx->bands[0]) * num_lens); |
||||
ctx->num_bands = av_malloc (sizeof(ctx->num_bands[0]) * num_lens); |
||||
memcpy(ctx->bands, bands, sizeof(ctx->bands[0]) * num_lens); |
||||
memcpy(ctx->num_bands, num_bands, sizeof(ctx->num_bands[0]) * num_lens); |
||||
switch(ctx->avctx->codec_id){ |
||||
case CODEC_ID_AAC: |
||||
ctx->model = &ff_aac_psy_model; |
||||
break; |
||||
} |
||||
if(ctx->model->init) |
||||
return ctx->model->init(ctx); |
||||
return 0; |
||||
} |
||||
|
||||
FFPsyWindowInfo ff_psy_suggest_window(FFPsyContext *ctx, |
||||
const int16_t *audio, const int16_t *la, |
||||
int channel, int prev_type) |
||||
{ |
||||
return ctx->model->window(ctx, audio, la, channel, prev_type); |
||||
} |
||||
|
||||
void ff_psy_set_band_info(FFPsyContext *ctx, int channel, |
||||
const float *coeffs, FFPsyWindowInfo *wi) |
||||
{ |
||||
ctx->model->analyze(ctx, channel, coeffs, wi); |
||||
} |
||||
|
||||
av_cold void ff_psy_end(FFPsyContext *ctx) |
||||
{ |
||||
if(ctx->model->end) |
||||
ctx->model->end(ctx); |
||||
av_freep(&ctx->bands); |
||||
av_freep(&ctx->num_bands); |
||||
av_freep(&ctx->psy_bands); |
||||
} |
||||
|
||||
typedef struct FFPsyPreprocessContext{ |
||||
AVCodecContext *avctx; |
||||
float stereo_att; |
||||
struct FFIIRFilterCoeffs *fcoeffs; |
||||
struct FFIIRFilterState **fstate; |
||||
}FFPsyPreprocessContext; |
||||
|
||||
#define FILT_ORDER 4 |
||||
|
||||
av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *avctx) |
||||
{ |
||||
FFPsyPreprocessContext *ctx; |
||||
int i; |
||||
float cutoff_coeff; |
||||
ctx = av_mallocz(sizeof(FFPsyPreprocessContext)); |
||||
ctx->avctx = avctx; |
||||
|
||||
if(avctx->flags & CODEC_FLAG_QSCALE) |
||||
cutoff_coeff = 1.0f / av_clip(1 + avctx->global_quality / FF_QUALITY_SCALE, 1, 8); |
||||
else |
||||
cutoff_coeff = avctx->bit_rate / (4.0f * avctx->sample_rate * avctx->channels); |
||||
|
||||
ctx->fcoeffs = ff_iir_filter_init_coeffs(FF_FILTER_TYPE_BUTTERWORTH, FF_FILTER_MODE_LOWPASS, |
||||
FILT_ORDER, cutoff_coeff, 0.0, 0.0); |
||||
if(ctx->fcoeffs){ |
||||
ctx->fstate = av_mallocz(sizeof(ctx->fstate[0]) * avctx->channels); |
||||
for(i = 0; i < avctx->channels; i++) |
||||
ctx->fstate[i] = ff_iir_filter_init_state(FILT_ORDER); |
||||
} |
||||
return ctx; |
||||
} |
||||
|
||||
void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, |
||||
const int16_t *audio, int16_t *dest, |
||||
int tag, int channels) |
||||
{ |
||||
int ch, i; |
||||
if(ctx->fstate){ |
||||
for(ch = 0; ch < channels; ch++){ |
||||
ff_iir_filter(ctx->fcoeffs, ctx->fstate[tag+ch], ctx->avctx->frame_size, |
||||
audio + ch, ctx->avctx->channels, |
||||
dest + ch, ctx->avctx->channels); |
||||
} |
||||
}else{ |
||||
for(ch = 0; ch < channels; ch++){ |
||||
for(i = 0; i < ctx->avctx->frame_size; i++) |
||||
dest[i*ctx->avctx->channels + ch] = audio[i*ctx->avctx->channels + ch]; |
||||
} |
||||
} |
||||
} |
||||
|
||||
av_cold void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx) |
||||
{ |
||||
int i; |
||||
ff_iir_filter_free_coeffs(ctx->fcoeffs); |
||||
if (ctx->fstate) |
||||
for (i = 0; i < ctx->avctx->channels; i++) |
||||
ff_iir_filter_free_state(ctx->fstate[i]); |
||||
av_freep(&ctx->fstate); |
||||
} |
||||
|
@ -0,0 +1,158 @@ |
||||
/*
|
||||
* audio encoder psychoacoustic model |
||||
* Copyright (C) 2008 Konstantin Shishkov |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVCODEC_PSYMODEL_H |
||||
#define AVCODEC_PSYMODEL_H |
||||
|
||||
#include "avcodec.h" |
||||
|
||||
/** maximum possible number of bands */ |
||||
#define PSY_MAX_BANDS 128 |
||||
|
||||
/**
|
||||
* single band psychoacoustic information |
||||
*/ |
||||
typedef struct FFPsyBand{ |
||||
int bits; |
||||
float energy; |
||||
float threshold; |
||||
float distortion; |
||||
float perceptual_weight; |
||||
}FFPsyBand; |
||||
|
||||
/**
|
||||
* windowing related information |
||||
*/ |
||||
typedef struct FFPsyWindowInfo{ |
||||
int window_type[3]; ///< window type (short/long/transitional, etc.) - current, previous and next
|
||||
int window_shape; ///< window shape (sine/KBD/whatever)
|
||||
int num_windows; ///< number of windows in a frame
|
||||
int grouping[8]; ///< window grouping (for e.g. AAC)
|
||||
int *window_sizes; ///< sequence of window sizes inside one frame (for eg. WMA)
|
||||
}FFPsyWindowInfo; |
||||
|
||||
/**
|
||||
* context used by psychoacoustic model |
||||
*/ |
||||
typedef struct FFPsyContext{ |
||||
AVCodecContext *avctx; ///< encoder context
|
||||
const struct FFPsyModel *model; ///< encoder-specific model functions
|
||||
|
||||
FFPsyBand *psy_bands; ///< frame bands information
|
||||
|
||||
uint8_t **bands; ///< scalefactor band sizes for possible frame sizes
|
||||
int *num_bands; ///< number of scalefactor bands for possible frame sizes
|
||||
int num_lens; ///< number of scalefactor band sets
|
||||
|
||||
void* model_priv_data; ///< psychoacoustic model implementation private data
|
||||
}FFPsyContext; |
||||
|
||||
/**
|
||||
* codec-specific psychoacoustic model implementation |
||||
*/ |
||||
typedef struct FFPsyModel { |
||||
const char *name; |
||||
int (*init) (FFPsyContext *apc); |
||||
FFPsyWindowInfo (*window)(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type); |
||||
void (*analyze)(FFPsyContext *ctx, int channel, const float *coeffs, FFPsyWindowInfo *wi); |
||||
void (*end) (FFPsyContext *apc); |
||||
}FFPsyModel; |
||||
|
||||
/**
|
||||
* Initialize psychoacoustic model. |
||||
* |
||||
* @param ctx model context |
||||
* @param avctx codec context |
||||
* @param num_lens number of possible frame lengths |
||||
* @param bands scalefactor band lengths for all frame lengths |
||||
* @param num_bands number of scalefactor bands for all frame lengths |
||||
* |
||||
* @return zero if successful, a negative value if not |
||||
*/ |
||||
av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, |
||||
int num_lens, |
||||
const uint8_t **bands, const int* num_bands); |
||||
|
||||
/**
|
||||
* Suggest window sequence for channel. |
||||
* |
||||
* @param ctx model context |
||||
* @param audio samples for the current frame |
||||
* @param la lookahead samples (NULL when unavailable) |
||||
* @param channel number of channel element to analyze |
||||
* @param prev_type previous window type |
||||
* |
||||
* @return suggested window information in a structure |
||||
*/ |
||||
FFPsyWindowInfo ff_psy_suggest_window(FFPsyContext *ctx, |
||||
const int16_t *audio, const int16_t *la, |
||||
int channel, int prev_type); |
||||
|
||||
|
||||
/**
|
||||
* Perform psychoacoustic analysis and set band info (threshold, energy). |
||||
* |
||||
* @param ctx model context |
||||
* @param channel audio channel number |
||||
* @param coeffs pointer to the transformed coefficients |
||||
* @param wi window information |
||||
*/ |
||||
void ff_psy_set_band_info(FFPsyContext *ctx, int channel, const float *coeffs, |
||||
FFPsyWindowInfo *wi); |
||||
|
||||
/**
|
||||
* Cleanup model context at the end. |
||||
* |
||||
* @param ctx model context |
||||
*/ |
||||
av_cold void ff_psy_end(FFPsyContext *ctx); |
||||
|
||||
|
||||
/**************************************************************************
|
||||
* Audio preprocessing stuff. * |
||||
* This should be moved into some audio filter eventually. * |
||||
**************************************************************************/ |
||||
struct FFPsyPreprocessContext; |
||||
|
||||
/**
|
||||
* psychoacoustic model audio preprocessing initialization |
||||
*/ |
||||
av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *avctx); |
||||
|
||||
/**
|
||||
* Preprocess several channel in audio frame in order to compress it better. |
||||
* |
||||
* @param ctx preprocessing context |
||||
* @param audio samples to preprocess |
||||
* @param dest place to put filtered samples |
||||
* @param tag channel number |
||||
* @param channels number of channel to preprocess (some additional work may be done on stereo pair) |
||||
*/ |
||||
void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, |
||||
const int16_t *audio, int16_t *dest, |
||||
int tag, int channels); |
||||
|
||||
/**
|
||||
* Cleanup audio preprocessing module. |
||||
*/ |
||||
av_cold void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx); |
||||
|
||||
#endif /* AVCODEC_PSYMODEL_H */ |
Loading…
Reference in new issue