lavc: use a separate field for exporting audio encoder padding

Currently, the amount of padding inserted at the beginning by some audio
encoders, is exported through AVCodecContext.delay. However
- the term 'delay' is heavily overloaded and can have multiple different
  meanings even in the case of audio encoding.
- this field has entirely different meanings, depending on whether the
  codec context is used for encoding or decoding (and has yet another
  different meaning for video), preventing generic handling of the codec
  context.

Therefore, add a new field -- AVCodecContext.initial_padding. It could
conceivably be used for decoding as well at a later point.
pull/89/head
Anton Khirnov 10 years ago
parent c80a816142
commit 2df0c32ea1
  1. 4
      doc/APIchanges
  2. 2
      libavcodec/aacenc.c
  3. 2
      libavcodec/ac3enc.c
  4. 4
      libavcodec/audio_frame_queue.c
  5. 28
      libavcodec/avcodec.h
  6. 4
      libavcodec/g722enc.c
  7. 2
      libavcodec/libfaac.c
  8. 2
      libavcodec/libfdk-aacenc.c
  9. 2
      libavcodec/libmp3lame.c
  10. 4
      libavcodec/libopencore-amr.c
  11. 4
      libavcodec/libopusenc.c
  12. 2
      libavcodec/libspeexenc.c
  13. 4
      libavcodec/libtwolame.c
  14. 2
      libavcodec/libvo-aacenc.c
  15. 4
      libavcodec/libvo-amrwbenc.c
  16. 4
      libavcodec/libvorbis.c
  17. 4
      libavcodec/mpegaudioenc.c
  18. 2
      libavcodec/nellymoserenc.c
  19. 2
      libavcodec/ra144enc.c
  20. 9
      libavcodec/utils.c
  21. 7
      libavcodec/version.h
  22. 5
      libavcodec/wmaenc.c

@ -13,6 +13,10 @@ libavutil: 2014-08-09
API changes, most recent first: API changes, most recent first:
2014-10-13 - xxxxxxx - lavc 55.03.0 - avcodec.h
Add AVCodecContext.initial_padding. Deprecate the use of AVCodecContext.delay
for audio encoding.
2014-09-xx - xxxxxxx - lavu 54.04.0 - pixdesc.h 2014-09-xx - xxxxxxx - lavu 54.04.0 - pixdesc.h
Add API to return the name of frame and context color properties. Add API to return the name of frame and context color properties.

@ -777,7 +777,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
for (i = 0; i < 428; i++) for (i = 0; i < 428; i++)
ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i])); ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));
avctx->delay = 1024; avctx->initial_padding = 1024;
ff_af_queue_init(avctx, &s->afq); ff_af_queue_init(avctx, &s->afq);
return 0; return 0;

@ -2436,7 +2436,7 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
return ret; return ret;
avctx->frame_size = AC3_BLOCK_SIZE * s->num_blocks; avctx->frame_size = AC3_BLOCK_SIZE * s->num_blocks;
avctx->delay = AC3_BLOCK_SIZE; avctx->initial_padding = AC3_BLOCK_SIZE;
s->bitstream_mode = avctx->audio_service_type; s->bitstream_mode = avctx->audio_service_type;
if (s->bitstream_mode == AV_AUDIO_SERVICE_TYPE_KARAOKE) if (s->bitstream_mode == AV_AUDIO_SERVICE_TYPE_KARAOKE)

@ -29,8 +29,8 @@ av_cold void ff_af_queue_init(AVCodecContext *avctx, AudioFrameQueue *afq)
{ {
afq->avctx = avctx; afq->avctx = avctx;
afq->next_pts = AV_NOPTS_VALUE; afq->next_pts = AV_NOPTS_VALUE;
afq->remaining_delay = avctx->delay; afq->remaining_delay = avctx->initial_padding;
afq->remaining_samples = avctx->delay; afq->remaining_samples = avctx->initial_padding;
afq->frame_queue = NULL; afq->frame_queue = NULL;
} }

@ -1191,16 +1191,7 @@ typedef struct AVCodecContext {
* encoded input. * encoded input.
* *
* Audio: * Audio:
* For encoding, this is the number of "priming" samples added by the * For encoding, this field is unused (see initial_padding).
* encoder to the beginning of the stream. The decoded output will be
* delayed by this many samples relative to the input to the encoder (or
* more, if the decoder adds its own padding).
* The timestamps on the output packets are adjusted by the encoder so
* that they always refer to the first sample of the data actually
* contained in the packet, including any added padding.
* E.g. if the timebase is 1/samplerate and the timestamp of the first
* input sample is 0, the timestamp of the first output packet will be
* -delay.
* *
* For decoding, this is the number of samples the decoder needs to * For decoding, this is the number of samples the decoder needs to
* output before the decoder's output is valid. When seeking, you should * output before the decoder's output is valid. When seeking, you should
@ -2780,6 +2771,23 @@ typedef struct AVCodecContext {
* use AVOptions to set this field. * use AVOptions to set this field.
*/ */
int side_data_only_packets; int side_data_only_packets;
/**
* Audio only. The number of "priming" samples (padding) inserted by the
* encoder at the beginning of the audio. I.e. this number of leading
* decoded samples must be discarded by the caller to get the original audio
* without leading padding.
*
* - decoding: unused
* - encoding: Set by libavcodec. The timestamps on the output packets are
* adjusted by the encoder so that they always refer to the
* first sample of the data actually contained in the packet,
* including any added padding. E.g. if the timebase is
* 1/samplerate and the timestamp of the first input sample is
* 0, the timestamp of the first output packet will be
* -initial_padding.
*/
int initial_padding;
} AVCodecContext; } AVCodecContext;
/** /**

@ -106,7 +106,7 @@ static av_cold int g722_encode_init(AVCodecContext * avctx)
a common packet size for VoIP applications */ a common packet size for VoIP applications */
avctx->frame_size = 320; avctx->frame_size = 320;
} }
avctx->delay = 22; avctx->initial_padding = 22;
if (avctx->trellis) { if (avctx->trellis) {
/* validate trellis */ /* validate trellis */
@ -375,7 +375,7 @@ static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
} }
if (frame->pts != AV_NOPTS_VALUE) if (frame->pts != AV_NOPTS_VALUE)
avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay); avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
*got_packet_ptr = 1; *got_packet_ptr = 1;
return 0; return 0;
} }

@ -157,7 +157,7 @@ static av_cold int Faac_encode_init(AVCodecContext *avctx)
goto error; goto error;
} }
avctx->delay = FAAC_DELAY_SAMPLES; avctx->initial_padding = FAAC_DELAY_SAMPLES;
ff_af_queue_init(avctx, &s->afq); ff_af_queue_init(avctx, &s->afq);
return 0; return 0;

@ -286,7 +286,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
} }
avctx->frame_size = info.frameLength; avctx->frame_size = info.frameLength;
avctx->delay = info.encoderDelay; avctx->initial_padding = info.encoderDelay;
ff_af_queue_init(avctx, &s->afq); ff_af_queue_init(avctx, &s->afq);
if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) { if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {

@ -137,7 +137,7 @@ static av_cold int mp3lame_encode_init(AVCodecContext *avctx)
} }
/* get encoder delay */ /* get encoder delay */
avctx->delay = lame_get_encoder_delay(s->gfp) + 528 + 1; avctx->initial_padding = lame_get_encoder_delay(s->gfp) + 528 + 1;
ff_af_queue_init(avctx, &s->afq); ff_af_queue_init(avctx, &s->afq);
avctx->frame_size = lame_get_framesize(s->gfp); avctx->frame_size = lame_get_framesize(s->gfp);

@ -200,7 +200,7 @@ static av_cold int amr_nb_encode_init(AVCodecContext *avctx)
} }
avctx->frame_size = 160; avctx->frame_size = 160;
avctx->delay = 50; avctx->initial_padding = 50;
ff_af_queue_init(avctx, &s->afq); ff_af_queue_init(avctx, &s->afq);
s->enc_state = Encoder_Interface_init(s->enc_dtx); s->enc_state = Encoder_Interface_init(s->enc_dtx);
@ -250,7 +250,7 @@ static int amr_nb_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
memcpy(flush_buf, samples, frame->nb_samples * sizeof(*flush_buf)); memcpy(flush_buf, samples, frame->nb_samples * sizeof(*flush_buf));
samples = flush_buf; samples = flush_buf;
if (frame->nb_samples < avctx->frame_size - avctx->delay) if (frame->nb_samples < avctx->frame_size - avctx->initial_padding)
s->enc_last_frame = -1; s->enc_last_frame = -1;
} }
if ((ret = ff_af_queue_add(&s->afq, frame)) < 0) { if ((ret = ff_af_queue_add(&s->afq, frame)) < 0) {

@ -87,7 +87,7 @@ static void libopus_write_header(AVCodecContext *avctx, int stream_count,
bytestream_put_buffer(&p, "OpusHead", 8); bytestream_put_buffer(&p, "OpusHead", 8);
bytestream_put_byte(&p, 1); /* Version */ bytestream_put_byte(&p, 1); /* Version */
bytestream_put_byte(&p, channels); bytestream_put_byte(&p, channels);
bytestream_put_le16(&p, avctx->delay); /* Lookahead samples at 48kHz */ bytestream_put_le16(&p, avctx->initial_padding); /* Lookahead samples at 48kHz */
bytestream_put_le32(&p, avctx->sample_rate); /* Original sample rate */ bytestream_put_le32(&p, avctx->sample_rate); /* Original sample rate */
bytestream_put_le16(&p, 0); /* Gain of 0dB is recommended. */ bytestream_put_le16(&p, 0); /* Gain of 0dB is recommended. */
@ -277,7 +277,7 @@ static int av_cold libopus_encode_init(AVCodecContext *avctx)
goto fail; goto fail;
} }
ret = opus_multistream_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&avctx->delay)); ret = opus_multistream_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&avctx->initial_padding));
if (ret != OPUS_OK) if (ret != OPUS_OK)
av_log(avctx, AV_LOG_WARNING, av_log(avctx, AV_LOG_WARNING,
"Unable to get number of lookahead samples: %s\n", "Unable to get number of lookahead samples: %s\n",

@ -235,7 +235,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
s->header.frames_per_packet = s->frames_per_packet; s->header.frames_per_packet = s->frames_per_packet;
/* set encoding delay */ /* set encoding delay */
speex_encoder_ctl(s->enc_state, SPEEX_GET_LOOKAHEAD, &avctx->delay); speex_encoder_ctl(s->enc_state, SPEEX_GET_LOOKAHEAD, &avctx->initial_padding);
ff_af_queue_init(avctx, &s->afq); ff_af_queue_init(avctx, &s->afq);
/* create header packet bytes from header struct */ /* create header packet bytes from header struct */

@ -60,7 +60,7 @@ static av_cold int twolame_encode_init(AVCodecContext *avctx)
int ret; int ret;
avctx->frame_size = TWOLAME_SAMPLES_PER_FRAME; avctx->frame_size = TWOLAME_SAMPLES_PER_FRAME;
avctx->delay = 512 - 32 + 1; avctx->initial_padding = 512 - 32 + 1;
s->glopts = twolame_init(); s->glopts = twolame_init();
if (!s->glopts) if (!s->glopts)
@ -151,7 +151,7 @@ static int twolame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
avpkt->duration = ff_samples_to_time_base(avctx, frame->nb_samples); avpkt->duration = ff_samples_to_time_base(avctx, frame->nb_samples);
if (frame) { if (frame) {
if (frame->pts != AV_NOPTS_VALUE) if (frame->pts != AV_NOPTS_VALUE)
avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay); avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
} else { } else {
avpkt->pts = s->next_pts; avpkt->pts = s->next_pts;
} }

@ -61,7 +61,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
int index, ret; int index, ret;
avctx->frame_size = FRAME_SIZE; avctx->frame_size = FRAME_SIZE;
avctx->delay = ENC_DELAY; avctx->initial_padding = ENC_DELAY;
s->last_frame = 2; s->last_frame = 2;
ff_af_queue_init(avctx, &s->afq); ff_af_queue_init(avctx, &s->afq);

@ -93,7 +93,7 @@ static av_cold int amr_wb_encode_init(AVCodecContext *avctx)
s->last_bitrate = avctx->bit_rate; s->last_bitrate = avctx->bit_rate;
avctx->frame_size = 320; avctx->frame_size = 320;
avctx->delay = 80; avctx->initial_padding = 80;
s->state = E_IF_init(); s->state = E_IF_init();
@ -131,7 +131,7 @@ static int amr_wb_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
} }
if (frame->pts != AV_NOPTS_VALUE) if (frame->pts != AV_NOPTS_VALUE)
avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay); avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
avpkt->size = size; avpkt->size = size;
*got_packet_ptr = 1; *got_packet_ptr = 1;

@ -322,8 +322,8 @@ static int libvorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
if (duration > 0) { if (duration > 0) {
/* we do not know encoder delay until we get the first packet from /* we do not know encoder delay until we get the first packet from
* libvorbis, so we have to update the AudioFrameQueue counts */ * libvorbis, so we have to update the AudioFrameQueue counts */
if (!avctx->delay) { if (!avctx->initial_padding) {
avctx->delay = duration; avctx->initial_padding = duration;
s->afq.remaining_delay += duration; s->afq.remaining_delay += duration;
s->afq.remaining_samples += duration; s->afq.remaining_samples += duration;
} }

@ -84,7 +84,7 @@ static av_cold int MPA_encode_init(AVCodecContext *avctx)
bitrate = bitrate / 1000; bitrate = bitrate / 1000;
s->nb_channels = channels; s->nb_channels = channels;
avctx->frame_size = MPA_FRAME_SIZE; avctx->frame_size = MPA_FRAME_SIZE;
avctx->delay = 512 - 32 + 1; avctx->initial_padding = 512 - 32 + 1;
/* encoding freq */ /* encoding freq */
s->lsf = 0; s->lsf = 0;
@ -735,7 +735,7 @@ static int MPA_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
encode_frame(s, bit_alloc, padding); encode_frame(s, bit_alloc, padding);
if (frame->pts != AV_NOPTS_VALUE) if (frame->pts != AV_NOPTS_VALUE)
avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay); avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
avpkt->size = put_bits_count(&s->pb) / 8; avpkt->size = put_bits_count(&s->pb) / 8;
*got_packet_ptr = 1; *got_packet_ptr = 1;

@ -165,7 +165,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
} }
avctx->frame_size = NELLY_SAMPLES; avctx->frame_size = NELLY_SAMPLES;
avctx->delay = NELLY_BUF_LEN; avctx->initial_padding = NELLY_BUF_LEN;
ff_af_queue_init(avctx, &s->afq); ff_af_queue_init(avctx, &s->afq);
s->avctx = avctx; s->avctx = avctx;
if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0) if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)

@ -56,7 +56,7 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx)
return -1; return -1;
} }
avctx->frame_size = NBLOCKS * BLOCKSIZE; avctx->frame_size = NBLOCKS * BLOCKSIZE;
avctx->delay = avctx->frame_size; avctx->initial_padding = avctx->frame_size;
avctx->bit_rate = 8000; avctx->bit_rate = 8000;
ractx = avctx->priv_data; ractx = avctx->priv_data;
ractx->lpc_coef[0] = ractx->lpc_tables[0]; ractx->lpc_coef[0] = ractx->lpc_tables[0];

@ -1240,6 +1240,11 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
} }
} }
#if FF_API_AUDIOENC_DELAY
if (av_codec_is_encoder(avctx->codec))
avctx->delay = avctx->initial_padding;
#endif
if (av_codec_is_decoder(avctx->codec)) { if (av_codec_is_decoder(avctx->codec)) {
/* validate channel layout from the decoder */ /* validate channel layout from the decoder */
if (avctx->channel_layout) { if (avctx->channel_layout) {
@ -1447,6 +1452,10 @@ int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx,
end: end:
av_frame_free(&padded_frame); av_frame_free(&padded_frame);
#if FF_API_AUDIOENC_DELAY
avctx->delay = avctx->initial_padding;
#endif
return ret; return ret;
} }

@ -29,8 +29,8 @@
#include "libavutil/version.h" #include "libavutil/version.h"
#define LIBAVCODEC_VERSION_MAJOR 56 #define LIBAVCODEC_VERSION_MAJOR 56
#define LIBAVCODEC_VERSION_MINOR 2 #define LIBAVCODEC_VERSION_MINOR 3
#define LIBAVCODEC_VERSION_MICRO 2 #define LIBAVCODEC_VERSION_MICRO 0
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
LIBAVCODEC_VERSION_MINOR, \ LIBAVCODEC_VERSION_MINOR, \
@ -153,5 +153,8 @@
#ifndef FF_API_AFD #ifndef FF_API_AFD
#define FF_API_AFD (LIBAVCODEC_VERSION_MAJOR < 57) #define FF_API_AFD (LIBAVCODEC_VERSION_MAJOR < 57)
#endif #endif
#ifndef FF_API_AUDIOENC_DELAY
#define FF_API_AUDIOENC_DELAY (LIBAVCODEC_VERSION_MAJOR < 58)
#endif
#endif /* AVCODEC_VERSION_H */ #endif /* AVCODEC_VERSION_H */

@ -92,8 +92,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
avctx->block_align = block_align; avctx->block_align = block_align;
avctx->bit_rate = avctx->block_align * 8LL * avctx->sample_rate / avctx->bit_rate = avctx->block_align * 8LL * avctx->sample_rate /
s->frame_len; s->frame_len;
avctx->frame_size = avctx->frame_size = avctx->initial_padding = s->frame_len;
avctx->delay = s->frame_len;
return 0; return 0;
} }
@ -420,7 +419,7 @@ static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt,
flush_put_bits(&s->pb); flush_put_bits(&s->pb);
if (frame->pts != AV_NOPTS_VALUE) if (frame->pts != AV_NOPTS_VALUE)
avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay); avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
avpkt->size = avctx->block_align; avpkt->size = avctx->block_align;
*got_packet_ptr = 1; *got_packet_ptr = 1;

Loading…
Cancel
Save