From e1f41b4fdc5d3e8813d8f9d4d81899bd4e6df31b Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Fri, 16 Apr 2021 20:53:55 +0200 Subject: [PATCH] avformat/wavdec: Fix reading files with id3v2 apic before fmt tag Up until now the cover images will get the stream index 0 in this case, violating the hardcoded assumption that this is the index of the audio stream. Fix this by creating the audio stream first; this is also in line with the expectations of ff_pcm_read_seek() and ff_spdif_read_packet(). It also simplifies the code to parse the fmt and xma2 tags. Fixes #8540; regression since f5aad350d3695b5b16e7d135154a4c61e4dce9d8. Signed-off-by: Andreas Rheinhardt --- libavformat/wavdec.c | 78 ++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/libavformat/wavdec.c b/libavformat/wavdec.c index aca9a8382b..e11535b6ae 100644 --- a/libavformat/wavdec.c +++ b/libavformat/wavdec.c @@ -49,6 +49,7 @@ typedef struct WAVDemuxContext { const AVClass *class; int64_t data_end; int w64; + AVStream *vst; int64_t smv_data_ofs; int smv_block_size; int smv_frames_per_jpeg; @@ -170,30 +171,26 @@ static void handle_stream_probing(AVStream *st) } } -static int wav_parse_fmt_tag(AVFormatContext *s, int64_t size, AVStream **st) +static int wav_parse_fmt_tag(AVFormatContext *s, int64_t size, AVStream *st) { AVIOContext *pb = s->pb; WAVDemuxContext *wav = s->priv_data; int ret; /* parse fmt header */ - *st = avformat_new_stream(s, NULL); - if (!*st) - return AVERROR(ENOMEM); - - ret = ff_get_wav_header(s, pb, (*st)->codecpar, size, wav->rifx); + ret = ff_get_wav_header(s, pb, st->codecpar, size, wav->rifx); if (ret < 0) return ret; - handle_stream_probing(*st); + handle_stream_probing(st); - (*st)->need_parsing = AVSTREAM_PARSE_FULL_RAW; + st->need_parsing = AVSTREAM_PARSE_FULL_RAW; - avpriv_set_pts_info(*st, 64, 1, (*st)->codecpar->sample_rate); + avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate); return 0; } -static int wav_parse_xma2_tag(AVFormatContext *s, int64_t size, AVStream **st) +static int wav_parse_xma2_tag(AVFormatContext *s, int64_t size, AVStream *st) { AVIOContext *pb = s->pb; int version, num_streams, i, channels = 0, ret; @@ -201,13 +198,9 @@ static int wav_parse_xma2_tag(AVFormatContext *s, int64_t size, AVStream **st) if (size < 36) return AVERROR_INVALIDDATA; - *st = avformat_new_stream(s, NULL); - if (!*st) - return AVERROR(ENOMEM); - - (*st)->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; - (*st)->codecpar->codec_id = AV_CODEC_ID_XMA2; - (*st)->need_parsing = AVSTREAM_PARSE_FULL_RAW; + st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; + st->codecpar->codec_id = AV_CODEC_ID_XMA2; + st->need_parsing = AVSTREAM_PARSE_FULL_RAW; version = avio_r8(pb); if (version != 3 && version != 4) @@ -216,26 +209,26 @@ static int wav_parse_xma2_tag(AVFormatContext *s, int64_t size, AVStream **st) if (size != (32 + ((version==3)?0:8) + 4*num_streams)) return AVERROR_INVALIDDATA; avio_skip(pb, 10); - (*st)->codecpar->sample_rate = avio_rb32(pb); + st->codecpar->sample_rate = avio_rb32(pb); if (version == 4) avio_skip(pb, 8); avio_skip(pb, 4); - (*st)->duration = avio_rb32(pb); + st->duration = avio_rb32(pb); avio_skip(pb, 8); for (i = 0; i < num_streams; i++) { channels += avio_r8(pb); avio_skip(pb, 3); } - (*st)->codecpar->channels = channels; + st->codecpar->channels = channels; - if ((*st)->codecpar->channels <= 0 || (*st)->codecpar->sample_rate <= 0) + if (st->codecpar->channels <= 0 || st->codecpar->sample_rate <= 0) return AVERROR_INVALIDDATA; - avpriv_set_pts_info(*st, 64, 1, (*st)->codecpar->sample_rate); + avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate); avio_seek(pb, -size, SEEK_CUR); - if ((ret = ff_get_extradata(s, (*st)->codecpar, pb, size)) < 0) + if ((ret = ff_get_extradata(s, st->codecpar, pb, size)) < 0) return ret; return 0; @@ -407,6 +400,11 @@ static int wav_read_header(AVFormatContext *s) } + /* Create the audio stream now so that its index is always zero */ + st = avformat_new_stream(s, NULL); + if (!st) + return AVERROR(ENOMEM); + for (;;) { AVStream *vst; size = next_tag(pb, &tag, wav->rifx); @@ -418,7 +416,7 @@ static int wav_read_header(AVFormatContext *s) switch (tag) { case MKTAG('f', 'm', 't', ' '): /* only parse the first 'fmt ' tag found */ - if (!got_xma2 && !got_fmt && (ret = wav_parse_fmt_tag(s, size, &st)) < 0) { + if (!got_xma2 && !got_fmt && (ret = wav_parse_fmt_tag(s, size, st)) < 0) { return ret; } else if (got_fmt) av_log(s, AV_LOG_WARNING, "found more than one 'fmt ' tag\n"); @@ -427,7 +425,7 @@ static int wav_read_header(AVFormatContext *s) break; case MKTAG('X', 'M', 'A', '2'): /* only parse the first 'XMA2' tag found */ - if (!got_fmt && !got_xma2 && (ret = wav_parse_xma2_tag(s, size, &st)) < 0) { + if (!got_fmt && !got_xma2 && (ret = wav_parse_xma2_tag(s, size, st)) < 0) { return ret; } else if (got_xma2) av_log(s, AV_LOG_WARNING, "found more than one 'XMA2' tag\n"); @@ -484,6 +482,7 @@ static int wav_read_header(AVFormatContext *s) vst = avformat_new_stream(s, NULL); if (!vst) return AVERROR(ENOMEM); + wav->vst = vst; avio_r8(pb); vst->id = 1; vst->codecpar->codec_type = AVMEDIA_TYPE_VIDEO; @@ -693,23 +692,24 @@ static int wav_read_packet(AVFormatContext *s, AVPacket *pkt) { int ret, size; int64_t left; - AVStream *st; WAVDemuxContext *wav = s->priv_data; + AVStream *st = s->streams[0]; if (CONFIG_SPDIF_DEMUXER && wav->spdif == 1) return ff_spdif_read_packet(s, pkt); if (wav->smv_data_ofs > 0) { int64_t audio_dts, video_dts; + AVStream *vst = wav->vst; smv_retry: - audio_dts = (int32_t)s->streams[0]->cur_dts; - video_dts = (int32_t)s->streams[1]->cur_dts; + audio_dts = (int32_t)st->cur_dts; + video_dts = (int32_t)vst->cur_dts; if (audio_dts != AV_NOPTS_VALUE && video_dts != AV_NOPTS_VALUE) { /*We always return a video frame first to get the pixel format first*/ wav->smv_last_stream = wav->smv_given_first ? - av_compare_ts(video_dts, s->streams[1]->time_base, - audio_dts, s->streams[0]->time_base) > 0 : 0; + av_compare_ts(video_dts, vst->time_base, + audio_dts, st->time_base) > 0 : 0; wav->smv_given_first = 1; } wav->smv_last_stream = !wav->smv_last_stream; @@ -732,7 +732,7 @@ smv_retry: pkt->duration = wav->smv_frames_per_jpeg; wav->smv_block++; - pkt->stream_index = 1; + pkt->stream_index = vst->index; smv_out: avio_seek(s->pb, old_pos, SEEK_SET); if (ret == AVERROR_EOF) { @@ -743,8 +743,6 @@ smv_out: } } - st = s->streams[0]; - left = wav->data_end - avio_tell(s->pb); if (wav->ignore_length) left = INT_MAX; @@ -781,22 +779,24 @@ static int wav_read_seek(AVFormatContext *s, int stream_index, int64_t timestamp, int flags) { WAVDemuxContext *wav = s->priv_data; - AVStream *st; + AVStream *ast = s->streams[0], *vst = wav->vst; wav->smv_eof = 0; wav->audio_eof = 0; + + if (stream_index != 0 && (!vst || stream_index != vst->index)) + return AVERROR(EINVAL); if (wav->smv_data_ofs > 0) { int64_t smv_timestamp = timestamp; if (stream_index == 0) - smv_timestamp = av_rescale_q(timestamp, s->streams[0]->time_base, s->streams[1]->time_base); + smv_timestamp = av_rescale_q(timestamp, ast->time_base, vst->time_base); else - timestamp = av_rescale_q(smv_timestamp, s->streams[1]->time_base, s->streams[0]->time_base); + timestamp = av_rescale_q(smv_timestamp, vst->time_base, ast->time_base); if (wav->smv_frames_per_jpeg > 0) { wav->smv_block = smv_timestamp / wav->smv_frames_per_jpeg; } } - st = s->streams[0]; - switch (st->codecpar->codec_id) { + switch (ast->codecpar->codec_id) { case AV_CODEC_ID_MP2: case AV_CODEC_ID_MP3: case AV_CODEC_ID_AC3: @@ -807,7 +807,7 @@ static int wav_read_seek(AVFormatContext *s, default: break; } - return ff_pcm_read_seek(s, stream_index, timestamp, flags); + return ff_pcm_read_seek(s, 0, timestamp, flags); } static const AVClass wav_demuxer_class = {