From 136e19e1cf4fb1bc7b35e21357dd75c759b8b35e Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Tue, 2 Nov 2010 08:32:04 +0000 Subject: [PATCH] Add single stream LATM/LOAS decoder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The decoder is just a wrapper around the AAC decoder. based on patch by Paul Kendall { paul <ät> kcbbs gen nz } Originally committed as revision 25642 to svn://svn.ffmpeg.org/ffmpeg/trunk --- Changelog | 1 + configure | 1 + libavcodec/Makefile | 1 + libavcodec/aacdec.c | 279 +++++++++++++++++++++++++++++++++++++++ libavcodec/allcodecs.c | 2 + libavcodec/avcodec.h | 3 +- libavcodec/latm_parser.c | 119 +++++++++++++++++ 7 files changed, 405 insertions(+), 1 deletion(-) create mode 100644 libavcodec/latm_parser.c diff --git a/Changelog b/Changelog index 1289036a55..979e7133cb 100644 --- a/Changelog +++ b/Changelog @@ -50,6 +50,7 @@ version : - transpose filter added - ffmpeg -force_key_frames option added - demuxer for receiving raw rtp:// URLs without an SDP description +- single stream LATM/LOAS decoder version 0.6: diff --git a/configure b/configure index 27f807a56b..3ad8aabc6f 100755 --- a/configure +++ b/configure @@ -1188,6 +1188,7 @@ rdft_select="fft" # decoders / encoders / hardware accelerators aac_decoder_select="mdct rdft" aac_encoder_select="mdct" +aac_latm_decoder_select="aac_decoder aac_latm_parser" ac3_decoder_select="mdct ac3_parser" alac_encoder_select="lpc" amrnb_decoder_select="lsp" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 385ae027dd..5abc4954f4 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -576,6 +576,7 @@ OBJS-$(CONFIG_H264_PARSER) += h264_parser.o h264.o \ h264_loopfilter.o h264_cabac.o \ h264_cavlc.o h264_ps.o \ mpegvideo.o error_resilience.o +OBJS-$(CONFIG_AAC_LATM_PARSER) += latm_parser.o OBJS-$(CONFIG_MJPEG_PARSER) += mjpeg_parser.o OBJS-$(CONFIG_MLP_PARSER) += mlp_parser.o mlp.o OBJS-$(CONFIG_MPEG4VIDEO_PARSER) += mpeg4video_parser.o h263.o \ diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c index 7c23bdf0cb..f3073c6023 100644 --- a/libavcodec/aacdec.c +++ b/libavcodec/aacdec.c @@ -3,6 +3,10 @@ * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org ) * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com ) * + * AAC LATM decoder + * Copyright (c) 2008-2010 Paul Kendall + * Copyright (c) 2010 Janne Grunau + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -2098,6 +2102,261 @@ static av_cold int aac_decode_close(AVCodecContext *avctx) return 0; } + +#define LOAS_SYNC_WORD 0x2b7 ///< 11 bits LOAS sync word + +struct LATMContext { + AACContext aac_ctx; ///< containing AACContext + int initialized; ///< initilized after a valid extradata was seen + + // parser data + int audio_mux_version_A; ///< LATM syntax version + int frame_length_type; ///< 0/1 variable/fixed frame length + int frame_length; ///< frame length for fixed frame length +}; + +static inline uint32_t latm_get_value(GetBitContext *b) +{ + int length = get_bits(b, 2); + + return get_bits_long(b, (length+1)*8); +} + +static int latm_decode_audio_specific_config(struct LATMContext *latmctx, + GetBitContext *gb) +{ + AVCodecContext *avctx = latmctx->aac_ctx.avctx; + MPEG4AudioConfig m4ac; + int config_start_bit = get_bits_count(gb); + int bits_consumed, esize; + + if (config_start_bit % 8) { + av_log_missing_feature(latmctx->aac_ctx.avctx, "audio specific " + "config not byte aligned.\n", 1); + return AVERROR_INVALIDDATA; + } else { + bits_consumed = + decode_audio_specific_config(NULL, avctx, &m4ac, + gb->buffer + (config_start_bit / 8), + get_bits_left(gb) / 8); + + if (bits_consumed < 0) + return AVERROR_INVALIDDATA; + + esize = (bits_consumed+7) / 8; + + if (avctx->extradata_size <= esize) { + av_free(avctx->extradata); + avctx->extradata = av_malloc(esize + FF_INPUT_BUFFER_PADDING_SIZE); + if (!avctx->extradata) + return AVERROR(ENOMEM); + } + + avctx->extradata_size = esize; + memcpy(avctx->extradata, gb->buffer + (config_start_bit/8), esize); + memset(avctx->extradata+esize, 0, FF_INPUT_BUFFER_PADDING_SIZE); + } + + return bits_consumed; +} + +static int read_stream_mux_config(struct LATMContext *latmctx, + GetBitContext *gb) +{ + int ret, audio_mux_version = get_bits(gb, 1); + + latmctx->audio_mux_version_A = 0; + if (audio_mux_version) + latmctx->audio_mux_version_A = get_bits(gb, 1); + + if (!latmctx->audio_mux_version_A) { + + if (audio_mux_version) + latm_get_value(gb); // taraFullness + + skip_bits(gb, 1); // allStreamSameTimeFraming + skip_bits(gb, 6); // numSubFrames + // numPrograms + if (get_bits(gb, 4)) { // numPrograms + av_log_missing_feature(latmctx->aac_ctx.avctx, + "multiple programs are not supported\n", 1); + return AVERROR_PATCHWELCOME; + } + + // for each program (which there is only on in DVB) + + // for each layer (which there is only on in DVB) + if (get_bits(gb, 3)) { // numLayer + av_log_missing_feature(latmctx->aac_ctx.avctx, + "multiple layers are not supported\n", 1); + return AVERROR_PATCHWELCOME; + } + + // for all but first stream: use_same_config = get_bits(gb, 1); + if (!audio_mux_version) { + if ((ret = latm_decode_audio_specific_config(latmctx, gb)) < 0) + return ret; + } else { + int ascLen = latm_get_value(gb); + if ((ret = latm_decode_audio_specific_config(latmctx, gb)) < 0) + return ret; + ascLen -= ret; + skip_bits_long(gb, ascLen); + } + + latmctx->frame_length_type = get_bits(gb, 3); + switch (latmctx->frame_length_type) { + case 0: + skip_bits(gb, 8); // latmBufferFullness + break; + case 1: + latmctx->frame_length = get_bits(gb, 9); + break; + case 3: + case 4: + case 5: + skip_bits(gb, 6); // CELP frame length table index + break; + case 6: + case 7: + skip_bits(gb, 1); // HVXC frame length table index + break; + } + + if (get_bits(gb, 1)) { // other data + if (audio_mux_version) { + latm_get_value(gb); // other_data_bits + } else { + int esc; + do { + esc = get_bits(gb, 1); + skip_bits(gb, 8); + } while (esc); + } + } + + if (get_bits(gb, 1)) // crc present + skip_bits(gb, 8); // config_crc + } + + return 0; +} + +static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb) +{ + uint8_t tmp; + + if (ctx->frame_length_type == 0) { + int mux_slot_length = 0; + do { + tmp = get_bits(gb, 8); + mux_slot_length += tmp; + } while (tmp == 255); + return mux_slot_length; + } else if (ctx->frame_length_type == 1) { + return ctx->frame_length; + } else if (ctx->frame_length_type == 3 || + ctx->frame_length_type == 5 || + ctx->frame_length_type == 7) { + skip_bits(gb, 2); // mux_slot_length_coded + } + return 0; +} + +static int read_audio_mux_element(struct LATMContext *latmctx, + GetBitContext *gb) +{ + int err; + uint8_t use_same_mux = get_bits(gb, 1); + if (!use_same_mux) { + if ((err = read_stream_mux_config(latmctx, gb)) < 0) + return err; + } else if (!latmctx->aac_ctx.avctx->extradata) { + av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG, + "no decoder config found\n"); + return AVERROR(EAGAIN); + } + if (latmctx->audio_mux_version_A == 0) { + int mux_slot_length_bytes = read_payload_length_info(latmctx, gb); + if (mux_slot_length_bytes * 8 > get_bits_left(gb)) { + av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n"); + return AVERROR_INVALIDDATA; + } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) { + av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, + "frame length mismatch %d << %d\n", + mux_slot_length_bytes * 8, get_bits_left(gb)); + return AVERROR_INVALIDDATA; + } + } + return 0; +} + + +static int latm_decode_frame(AVCodecContext *avctx, void *out, int *out_size, + AVPacket *avpkt) +{ + struct LATMContext *latmctx = avctx->priv_data; + int muxlength, err; + GetBitContext gb; + + if (avpkt->size == 0) + return 0; + + init_get_bits(&gb, avpkt->data, avpkt->size * 8); + + // check for LOAS sync word + if (get_bits(&gb, 11) != LOAS_SYNC_WORD) + return AVERROR_INVALIDDATA; + + muxlength = get_bits(&gb, 13); + // not enough data, the parser should have sorted this + if (muxlength+3 > avpkt->size) + return AVERROR_INVALIDDATA; + + if ((err = read_audio_mux_element(latmctx, &gb)) < 0) + return err; + + if (!latmctx->initialized) { + if (!avctx->extradata) { + *out_size = 0; + return avpkt->size; + } else { + if ((err = aac_decode_init(avctx)) < 0) + return err; + latmctx->initialized = 1; + } + } + + if (show_bits(&gb, 12) == 0xfff) { + av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, + "ADTS header detected, probably as result of configuration " + "misparsing\n"); + return AVERROR_INVALIDDATA; + } + + if ((err = aac_decode_frame_int(avctx, out, out_size, &gb)) < 0) + return err; + + return muxlength; +} + +av_cold static int latm_decode_init(AVCodecContext *avctx) +{ + struct LATMContext *latmctx = avctx->priv_data; + int ret; + + ret = aac_decode_init(avctx); + + if (avctx->extradata_size > 0) { + latmctx->initialized = !ret; + } else { + latmctx->initialized = 0; + } + + return ret; +} + + AVCodec aac_decoder = { "aac", AVMEDIA_TYPE_AUDIO, @@ -2113,3 +2372,23 @@ AVCodec aac_decoder = { }, .channel_layouts = aac_channel_layout, }; + +/* + Note: This decoder filter is intended to decode LATM streams transferred + in MPEG transport streams which only contain one program. + To do a more complex LATM demuxing a separate LATM demuxer should be used. +*/ +AVCodec aac_latm_decoder = { + .name = "aac_latm", + .type = CODEC_TYPE_AUDIO, + .id = CODEC_ID_AAC_LATM, + .priv_data_size = sizeof(struct LATMContext), + .init = latm_decode_init, + .close = aac_decode_close, + .decode = latm_decode_frame, + .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Codec LATM syntax)"), + .sample_fmts = (const enum SampleFormat[]) { + SAMPLE_FMT_S16,SAMPLE_FMT_NONE + }, + .channel_layouts = aac_channel_layout, +}; diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 89614ab7d7..c3e4647c8c 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -220,6 +220,7 @@ void avcodec_register_all(void) /* audio codecs */ REGISTER_ENCDEC (AAC, aac); + REGISTER_DECODER (AAC_LATM, aac_latm); REGISTER_ENCDEC (AC3, ac3); REGISTER_ENCDEC (ALAC, alac); REGISTER_DECODER (ALS, als); @@ -366,6 +367,7 @@ void avcodec_register_all(void) /* parsers */ REGISTER_PARSER (AAC, aac); + REGISTER_PARSER (AAC_LATM, aac_latm); REGISTER_PARSER (AC3, ac3); REGISTER_PARSER (CAVSVIDEO, cavsvideo); REGISTER_PARSER (DCA, dca); diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 705259ed2a..96d7987bdd 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -31,7 +31,7 @@ #include "libavutil/cpu.h" #define LIBAVCODEC_VERSION_MAJOR 52 -#define LIBAVCODEC_VERSION_MINOR 93 +#define LIBAVCODEC_VERSION_MINOR 94 #define LIBAVCODEC_VERSION_MICRO 0 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ @@ -376,6 +376,7 @@ enum CodecID { CODEC_ID_ATRAC1, CODEC_ID_BINKAUDIO_RDFT, CODEC_ID_BINKAUDIO_DCT, + CODEC_ID_AAC_LATM, /* subtitle codecs */ CODEC_ID_DVD_SUBTITLE= 0x17000, diff --git a/libavcodec/latm_parser.c b/libavcodec/latm_parser.c new file mode 100644 index 0000000000..b8b67ef546 --- /dev/null +++ b/libavcodec/latm_parser.c @@ -0,0 +1,119 @@ +/* + * copyright (c) 2008 Paul Kendall + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * AAC LATM parser + */ + +#include +#include +#include +#include +#include + +#include "parser.h" + +#define LATM_HEADER 0x56e000 // 0x2b7 (11 bits) +#define LATM_MASK 0xFFE000 // top 11 bits +#define LATM_SIZE_MASK 0x001FFF // bottom 13 bits + +typedef struct LATMParseContext{ + ParseContext pc; + int count; +} LATMParseContext; + +/** + * finds the end of the current frame in the bitstream. + * @return the position of the first byte of the next frame, or -1 + */ +static int latm_find_frame_end(AVCodecParserContext *s1, const uint8_t *buf, + int buf_size) +{ + LATMParseContext *s = s1->priv_data; + ParseContext *pc = &s->pc; + int pic_found, i; + uint32_t state; + + pic_found = pc->frame_start_found; + state = pc->state; + + i = 0; + if (!pic_found) { + for (i = 0; i < buf_size; i++) { + state = (state<<8) | buf[i]; + if ((state & LATM_MASK) == LATM_HEADER) { + i++; + s->count = -i; + pic_found = 1; + break; + } + } + } + + if (pic_found) { + /* EOF considered as end of frame */ + if (buf_size == 0) + return 0; + if ((state & LATM_SIZE_MASK) - s->count <= buf_size) { + pc->frame_start_found = 0; + pc->state = -1; + return (state & LATM_SIZE_MASK) - s->count; + } + } + + s->count += buf_size; + pc->frame_start_found = pic_found; + pc->state = state; + + return END_NOT_FOUND; +} + +static int latm_parse(AVCodecParserContext *s1, AVCodecContext *avctx, + const uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size) +{ + LATMParseContext *s = s1->priv_data; + ParseContext *pc = &s->pc; + int next; + + if (s1->flags & PARSER_FLAG_COMPLETE_FRAMES) { + next = buf_size; + } else { + next = latm_find_frame_end(s1, buf, buf_size); + + if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) { + *poutbuf = NULL; + *poutbuf_size = 0; + return buf_size; + } + } + *poutbuf = buf; + *poutbuf_size = buf_size; + return next; +} + +AVCodecParser aac_latm_parser = { + { CODEC_ID_AAC_LATM }, + sizeof(LATMParseContext), + NULL, + latm_parse, + ff_parse_close +};