@ -251,6 +251,7 @@ typedef struct WMAVoiceContext {
int frame_cntr ; ///< current frame index [0 - 0xFFFE]; is
///< only used for comfort noise in #pRNG()
int nb_superframes ; ///< number of superframes in current packet
float gain_pred_err [ 6 ] ; ///< cache for gain prediction
float excitation_history [ MAX_SIGNAL_HISTORY ] ;
///< cache of the signal of previous
@ -875,7 +876,6 @@ static void dequant_lsps(double *lsps, int num,
/**
* @ name LSP dequantization routines
* LSP dequantization routines , for 10 / 16L SPs and independent / residual coding .
* @ note we assume enough bits are available , caller should check .
* lsp10i ( ) consumes 24 bits ; lsp10r ( ) consumes an additional 24 bits ;
* lsp16i ( ) consumes 34 bits ; lsp16r ( ) consumes an additional 26 bits .
* @ {
@ -1419,7 +1419,6 @@ static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
/**
* Parse data in a single block .
* @ note we assume enough bits are available , caller should check .
*
* @ param s WMA Voice decoding context private data
* @ param gb bit I / O context
@ -1463,7 +1462,6 @@ static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
/**
* Synthesize output samples for a single frame .
* @ note we assume enough bits are available , caller should check .
*
* @ param ctx WMA Voice decoder context
* @ param gb bit I / O context ( s - > gb or one for cross - packet superframes )
@ -1681,83 +1679,6 @@ static void stabilize_lsps(double *lsps, int num)
}
}
/**
* Test if there ' s enough bits to read 1 superframe .
*
* @ param orig_gb bit I / O context used for reading . This function
* does not modify the state of the bitreader ; it
* only uses it to copy the current stream position
* @ param s WMA Voice decoding context private data
* @ return < 0 on error , 1 on not enough bits or 0 if OK .
*/
static int check_bits_for_superframe ( GetBitContext * orig_gb ,
WMAVoiceContext * s )
{
GetBitContext s_gb , * gb = & s_gb ;
int n , need_bits , bd_idx ;
const struct frame_type_desc * frame_desc ;
/* initialize a copy */
init_get_bits ( gb , orig_gb - > buffer , orig_gb - > size_in_bits ) ;
skip_bits_long ( gb , get_bits_count ( orig_gb ) ) ;
av_assert1 ( get_bits_left ( gb ) = = get_bits_left ( orig_gb ) ) ;
/* superframe header */
if ( get_bits_left ( gb ) < 14 )
return 1 ;
if ( ! get_bits1 ( gb ) )
return AVERROR ( ENOSYS ) ; // WMAPro-in-WMAVoice superframe
if ( get_bits1 ( gb ) ) skip_bits ( gb , 12 ) ; // number of samples in superframe
if ( s - > has_residual_lsps ) { // residual LSPs (for all frames)
if ( get_bits_left ( gb ) < s - > sframe_lsp_bitsize )
return 1 ;
skip_bits_long ( gb , s - > sframe_lsp_bitsize ) ;
}
/* frames */
for ( n = 0 ; n < MAX_FRAMES ; n + + ) {
int aw_idx_is_ext = 0 ;
if ( ! s - > has_residual_lsps ) { // independent LSPs (per-frame)
if ( get_bits_left ( gb ) < s - > frame_lsp_bitsize ) return 1 ;
skip_bits_long ( gb , s - > frame_lsp_bitsize ) ;
}
bd_idx = s - > vbm_tree [ get_vlc2 ( gb , frame_type_vlc . table , 6 , 3 ) ] ;
if ( bd_idx < 0 )
return AVERROR_INVALIDDATA ; // invalid frame type VLC code
frame_desc = & frame_descs [ bd_idx ] ;
if ( frame_desc - > acb_type = = ACB_TYPE_ASYMMETRIC ) {
if ( get_bits_left ( gb ) < s - > pitch_nbits )
return 1 ;
skip_bits_long ( gb , s - > pitch_nbits ) ;
}
if ( frame_desc - > fcb_type = = FCB_TYPE_SILENCE ) {
skip_bits ( gb , 8 ) ;
} else if ( frame_desc - > fcb_type = = FCB_TYPE_AW_PULSES ) {
int tmp = get_bits ( gb , 6 ) ;
if ( tmp > = 0x36 ) {
skip_bits ( gb , 2 ) ;
aw_idx_is_ext = 1 ;
}
}
/* blocks */
if ( frame_desc - > acb_type = = ACB_TYPE_HAMMING ) {
need_bits = s - > block_pitch_nbits +
( frame_desc - > n_blocks - 1 ) * s - > block_delta_pitch_nbits ;
} else if ( frame_desc - > fcb_type = = FCB_TYPE_AW_PULSES ) {
need_bits = 2 * ! aw_idx_is_ext ;
} else
need_bits = 0 ;
need_bits + = frame_desc - > frame_size ;
if ( get_bits_left ( gb ) < need_bits )
return 1 ;
skip_bits_long ( gb , need_bits ) ;
}
return 0 ;
}
/**
* Synthesize output samples for a single superframe . If we have any data
* cached in s - > sframe_cache , that will be used instead of whatever is loaded
@ -1780,7 +1701,7 @@ static int synth_superframe(AVCodecContext *ctx, AVFrame *frame,
{
WMAVoiceContext * s = ctx - > priv_data ;
GetBitContext * gb = & s - > gb , s_gb ;
int n , res , n_samples = 480 ;
int n , res , n_samples = MAX_SFRAMESIZE ;
double lsps [ MAX_FRAMES ] [ MAX_LSPS ] ;
const double * mean_lsf = s - > lsps = = 16 ?
wmavoice_mean_lsf16 [ s - > lsp_def_mode ] : wmavoice_mean_lsf10 [ s - > lsp_def_mode ] ;
@ -1799,12 +1720,6 @@ static int synth_superframe(AVCodecContext *ctx, AVFrame *frame,
s - > sframe_cache_size = 0 ;
}
if ( ( res = check_bits_for_superframe ( gb , s ) ) = = 1 ) {
* got_frame_ptr = 0 ;
return 1 ;
} else if ( res < 0 )
return res ;
/* First bit is speech/music bit, it differentiates between WMAVoice
* speech samples ( the actual codec ) and WMAVoice music samples , which
* are really WMAPro - in - WMAVoice - superframes . I ' ve never seen those in
@ -1816,13 +1731,14 @@ static int synth_superframe(AVCodecContext *ctx, AVFrame *frame,
/* (optional) nr. of samples in superframe; always <= 480 and >= 0 */
if ( get_bits1 ( gb ) ) {
if ( ( n_samples = get_bits ( gb , 12 ) ) > 480 ) {
if ( ( n_samples = get_bits ( gb , 12 ) ) > MAX_SFRAMESIZE ) {
av_log ( ctx , AV_LOG_ERROR ,
" Superframe encodes >480 samples (%d), not allowed \n " ,
n_samples ) ;
" Superframe encodes > %d samples (%d), not allowed \n " ,
MAX_SFRAMESIZE , n_samples ) ;
return AVERROR_INVALIDDATA ;
}
}
/* Parse LSPs, if global for the superframe (can also be per-frame). */
if ( s - > has_residual_lsps ) {
double prev_lsps [ MAX_LSPS ] , a1 [ MAX_LSPS * 2 ] , a2 [ MAX_LSPS * 2 ] ;
@ -1845,7 +1761,7 @@ static int synth_superframe(AVCodecContext *ctx, AVFrame *frame,
}
/* get output buffer */
frame - > nb_samples = 480 ;
frame - > nb_samples = MAX_SFRAMESIZE ;
if ( ( res = ff_get_buffer ( ctx , frame , 0 ) ) < 0 )
return res ;
frame - > nb_samples = n_samples ;
@ -1905,26 +1821,23 @@ static int synth_superframe(AVCodecContext *ctx, AVFrame *frame,
* decoder ) .
*
* @ param s WMA Voice decoding context private data
* @ return 1 if not enough bits were available , or 0 on success .
* @ return < 0 on error , nb_superframes on success .
*/
static int parse_packet_header ( WMAVoiceContext * s )
{
GetBitContext * gb = & s - > gb ;
unsigned int res ;
unsigned int res , n_superframes = 0 ;
if ( get_bits_left ( gb ) < 11 )
return 1 ;
skip_bits ( gb , 4 ) ; // packet sequence number
s - > has_residual_lsps = get_bits1 ( gb ) ;
do {
res = get_bits ( gb , 6 ) ; // number of superframes per packet
// (minus first one if there is spillover)
if ( get_bits_left ( gb ) < 6 * ( res = = 0x3F ) + s - > spillover_bitsize )
return 1 ;
n_superframes + = res ;
} while ( res = = 0x3F ) ;
s - > spillover_nbits = get_bits ( gb , s - > spillover_bitsize ) ;
return 0 ;
return get_bits_left ( gb ) > = 0 ? n_superframes : AVERROR_INVALIDDATA ;
}
/**
@ -1984,23 +1897,24 @@ static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
* in a single " muxer " packet , so we artificially emulate that by
* capping the packet size at ctx - > block_align . */
for ( size = avpkt - > size ; size > ctx - > block_align ; size - = ctx - > block_align ) ;
if ( ! size ) {
* got_frame_ptr = 0 ;
return 0 ;
}
init_get_bits ( & s - > gb , avpkt - > data , size < < 3 ) ;
/* size == ctx->block_align is used to indicate whether we are dealing with
* a new packet or a packet of which we already read the packet header
* previously . */
if ( size = = ctx - > block_align ) { // new packet header
if ( ( res = parse_packet_header ( s ) ) < 0 )
return res ;
if ( ! ( size % ctx - > block_align ) ) { // new packet header
if ( ! size ) {
s - > spillover_nbits = 0 ;
s - > nb_superframes = 0 ;
} else {
if ( ( res = parse_packet_header ( s ) ) < 0 )
return res ;
s - > nb_superframes = res ;
}
/* If the packet header specifies a s->spillover_nbits, then we want
* to push out all data of the previous packet ( + spillover ) before
* continuing to parse new superframes in the current packet . */
if ( s - > spillover_nbits > 0 ) {
if ( s - > sframe_cache_size > 0 ) {
int cnt = get_bits_count ( gb ) ;
copy_bits ( & s - > pb , avpkt - > data , size , gb , s - > spillover_nbits ) ;
@ -2021,9 +1935,9 @@ static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
} else
skip_bits_long ( gb , s - > spillover_nbits - cnt +
get_bits_count ( gb ) ) ; // resync
} else
} else if ( s - > spillover_nbits ) {
skip_bits_long ( gb , s - > spillover_nbits ) ; // resync
}
}
} else if ( s - > skip_bits_next )
skip_bits ( gb , s - > skip_bits_next ) ;
@ -2031,6 +1945,10 @@ static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
s - > sframe_cache_size = 0 ;
s - > skip_bits_next = 0 ;
pos = get_bits_left ( gb ) ;
if ( s - > nb_superframes - - = = 0 ) {
* got_frame_ptr = 0 ;
return size ;
} else if ( s - > nb_superframes > 0 ) {
if ( ( res = synth_superframe ( ctx , data , got_frame_ptr ) ) < 0 ) {
return res ;
} else if ( * got_frame_ptr ) {
@ -2044,13 +1962,9 @@ static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
return AVERROR_INVALIDDATA ;
}
return res ;
}
} else if ( ( s - > sframe_cache_size = pos ) > 0 ) {
/* rewind bit reader to start of last (incomplete) superframe... */
init_get_bits ( gb , avpkt - > data , size < < 3 ) ;
skip_bits_long ( gb , ( size < < 3 ) - pos ) ;
av_assert1 ( get_bits_left ( gb ) = = pos ) ;
/* ...and cache it for spillover in next packet */
/* ... cache it for spillover in next packet */
init_put_bits ( & s - > pb , s - > sframe_cache , SFRAME_CACHE_MAXSIZE ) ;
copy_bits ( & s - > pb , avpkt - > data , size , gb , s - > sframe_cache_size ) ;
// FIXME bad - just copy bytes as whole and add use the
@ -2084,6 +1998,6 @@ AVCodec ff_wmavoice_decoder = {
. init_static_data = wmavoice_init_static_data ,
. close = wmavoice_decode_end ,
. decode = wmavoice_decode_packet ,
. capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1 ,
. capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY ,
. flush = wmavoice_flush ,
} ;