From 101036adb9ad143c1da8db41c2dd89eaf6d32deb Mon Sep 17 00:00:00 2001 From: Ivan Schreter Date: Sat, 22 Aug 2009 16:05:43 +0000 Subject: [PATCH] Support for generic multi-stream key frame finding for new seek API. Originally committed as revision 19680 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavformat/Makefile | 2 +- libavformat/seek.c | 578 +++++++++++++++++++++++++++++++++++++++++++ libavformat/seek.h | 97 ++++++++ libavformat/utils.c | 2 +- 4 files changed, 677 insertions(+), 2 deletions(-) create mode 100644 libavformat/seek.c create mode 100644 libavformat/seek.h diff --git a/libavformat/Makefile b/libavformat/Makefile index e4aa53abd0..89b6ce2a45 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -5,7 +5,7 @@ FFLIBS = avcodec avutil HEADERS = avformat.h avio.h -OBJS = allformats.o cutils.o metadata.o metadata_compat.o options.o os_support.o sdp.o utils.o +OBJS = allformats.o cutils.o metadata.o metadata_compat.o options.o os_support.o sdp.o seek.o utils.o # muxers/demuxers OBJS-$(CONFIG_AAC_DEMUXER) += raw.o id3v1.o id3v2.o diff --git a/libavformat/seek.c b/libavformat/seek.c new file mode 100644 index 0000000000..2f4595304d --- /dev/null +++ b/libavformat/seek.c @@ -0,0 +1,578 @@ +/* + * Utility functions for seeking for use within FFmpeg format handlers. + * + * Copyright (c) 2009 Ivan Schreter + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "seek.h" +#include "libavutil/mem.h" + +// NOTE: implementation should be moved here in another patch, to keep patches +// separated. +extern void av_read_frame_flush(AVFormatContext *s); + +/** + * Helper structure to store parser state of AVStream. + */ +typedef struct AVStreamState { + // Saved members of AVStream + AVCodecParserContext *parser; + AVPacket cur_pkt; + int64_t last_IP_pts; + int64_t cur_dts; + int64_t reference_dts; + const uint8_t *cur_ptr; + int cur_len; + int probe_packets; +} AVStreamState; + +/** + * Helper structure to store parser state of AVFormat. + */ +struct AVParserState { + int64_t fpos; ///< File position at the time of call. + + // Saved members of AVFormatContext + AVStream *cur_st; ///< Current stream. + AVPacketList *packet_buffer; ///< Packet buffer of original state. + AVPacketList *raw_packet_buffer; ///< Raw packet buffer of original state. + int raw_packet_buffer_remaining_size; ///< Remaining size available for raw_packet_buffer. + + // Saved info for streams. + int nb_streams; ///< Number of streams with stored state. + AVStreamState *stream_states; ///< States of individual streams (array). +}; + +/** + * Helper structure describing keyframe search state of one stream. + */ +typedef struct { + int64_t pos_lo; ///< Position of the frame with low timestamp in file or INT64_MAX if not found (yet). + int64_t ts_lo; ///< Frame presentation timestamp or same as pos_lo for byte seeking. + + int64_t pos_hi; ///< Position of the frame with high timestamp in file or INT64_MAX if not found (yet). + int64_t ts_hi; ///< Frame presentation timestamp or same as pos_hi for byte seeking. + + int64_t last_pos; ///< Last known position of a frame, for multi-frame packets. + + int64_t term_ts; ///< Termination timestamp (which TS we already read). + AVRational term_ts_tb; ///< Timebase for term_ts. + int64_t first_ts; ///< First packet timestamp in this iteration (to fill term_ts later). + AVRational first_ts_tb;///< Timebase for first_ts. + + int terminated; ///< Termination flag for current iteration. +} AVSyncPoint; + +/** + * Compare two timestamps exactly, taking into account their respective time bases. + * + * @param ts_a timestamp A. + * @param tb_a time base for timestamp A. + * @param ts_b timestamp B. + * @param tb_b time base for timestamp A. + * @return -1. 0 or 1 if timestamp A is less than, equal or greater than timestamp B. + */ +static int compare_ts(int64_t ts_a, AVRational tb_a, int64_t ts_b, AVRational tb_b) +{ + int64_t a, b, res; + + if (ts_a == INT64_MIN) + return ts_a < ts_b ? -1 : 0; + if (ts_a == INT64_MAX) + return ts_a > ts_b ? 1 : 0; + if (ts_b == INT64_MIN) + return ts_a > ts_b ? 1 : 0; + if (ts_b == INT64_MAX) + return ts_a < ts_b ? -1 : 0; + + a = ts_a * tb_a.num * tb_b.den; + b = ts_b * tb_b.num * tb_a.den; + + res = a - b; + if (res == 0) + return 0; + else + return (res >> 63) | 1; +} + +/** + * Compute a distance between timestamps. + * + * Distances are only comparable, if same time bases are used for computing + * distances. + * + * @param ts_hi high timestamp. + * @param tb_hi high timestamp time base. + * @param ts_lo low timestamp. + * @param tb_lo low timestamp time base. + * @return representation of distance between high and low timestamps. + */ +static int64_t ts_distance(int64_t ts_hi, AVRational tb_hi, int64_t ts_lo, AVRational tb_lo) +{ + int64_t hi, lo; + + hi = ts_hi * tb_hi.num * tb_lo.den; + lo = ts_lo * tb_lo.num * tb_hi.den; + + return hi - lo; +} + +/** + * Partial search for keyframes in multiple streams. + * + * This routine searches for the next lower and next higher timestamp to + * given target timestamp in each stream, starting at current file position + * and ending at position, where all streams have already been examined + * (or when all higher key frames found in first iteration). + * + * This routine is called iteratively with exponential backoff to find lower + * timestamp. + * + * @param s format context. + * @param timestamp target timestamp (or position, if AVSEEK_FLAG_BYTE). + * @param timebase time base for timestamps. + * @param flags seeking flags. + * @param sync array with information per stream. + * @param keyframes_to_find count of keyframes to find in total. + * @param found_lo pointer to count of already found low timestamp keyframes. + * @param found_hi pointer to count of already found high timestamp keyframes. + * @param first_iter flag for first iteration. + */ +static void search_hi_lo_keyframes(AVFormatContext *s, + int64_t timestamp, + AVRational timebase, + int flags, + AVSyncPoint *sync, + int keyframes_to_find, + int *found_lo, + int *found_hi, + int first_iter) +{ + AVPacket pkt; + AVSyncPoint *sp; + AVStream *st; + int idx; + int flg; + int terminated_count = 0; + int64_t pos; + int64_t pts, dts; // PTS/DTS from stream + int64_t ts; // PTS in stream-local time base or position for byte seeking + AVRational ts_tb; // Time base of the stream or 1:1 for byte seeking + + for (;;) { + if (av_read_frame(s, &pkt) < 0) { + // EOF or error, make sure high flags are set + for (idx = 0; idx < s->nb_streams; ++idx) { + if (s->streams[idx]->discard < AVDISCARD_ALL) { + sp = &sync[idx]; + if (sp->pos_hi == INT64_MAX) { + // No high frame exists for this stream + (*found_hi)++; + sp->ts_hi = INT64_MAX; + sp->pos_hi = INT64_MAX - 1; + } + } + } + break; + } + + idx = pkt.stream_index; + st = s->streams[idx]; + if (st->discard >= AVDISCARD_ALL) { + // This stream is not active, skip packet. + continue; + } + sp = &sync[idx]; + + flg = pkt.flags; + pos = pkt.pos; + pts = pkt.pts; + dts = pkt.dts; + if (pts == AV_NOPTS_VALUE) { + // Some formats don't provide PTS, only DTS. + pts = dts; + } + av_free_packet(&pkt); + + // Multi-frame packets only return position for the very first frame. + // Other frames are read with position == -1. Therefore, we note down + // last known position of a frame and use it if a frame without + // position arrives. In this way, it's possible to seek to proper + // position. Additionally, for parsers not providing position at all, + // an approximation will be used (starting position of this iteration). + if (pos < 0) { + pos = sp->last_pos; + } else { + sp->last_pos = pos; + } + + // Evaluate key frames with known TS (or any frames, if AVSEEK_FLAG_ANY set). + if (pts != AV_NOPTS_VALUE && ((flg & PKT_FLAG_KEY) || (flags & AVSEEK_FLAG_ANY))) { + if (flags & AVSEEK_FLAG_BYTE) { + // For byte seeking, use position as timestamp. + ts = pos; + ts_tb.num = 1; + ts_tb.den = 1; + } else { + // Get stream time_base. + ts = pts; + ts_tb = st->time_base; + } + + if (sp->first_ts == AV_NOPTS_VALUE) { + // Note down termination timestamp for the next iteration - when + // we encounter a packet with the same timestamp, we will ignore + // any further packets for this stream in next iteration (as they + // are already evaluated). + sp->first_ts = ts; + sp->first_ts_tb = ts_tb; + } + + if (sp->term_ts != AV_NOPTS_VALUE && compare_ts(ts, ts_tb, sp->term_ts, sp->term_ts_tb) > 0) { + // We are past the end position from last iteration, ignore packet. + if (!sp->terminated) { + sp->terminated = 1; + ++terminated_count; + if (sp->pos_hi == INT64_MAX) { + // No high frame exists for this stream + (*found_hi)++; + sp->ts_hi = INT64_MAX; + sp->pos_hi = INT64_MAX - 1; + } + if (terminated_count == keyframes_to_find) + break; // all terminated, iteration done + } + continue; + } + + if (compare_ts(ts, ts_tb, timestamp, timebase) <= 0) { + // Keyframe found before target timestamp. + if (sp->pos_lo == INT64_MAX) { + // Found first keyframe lower than target timestamp. + (*found_lo)++; + sp->ts_lo = ts; + sp->pos_lo = pos; + } else if (sp->ts_lo < ts) { + // Found a better match (closer to target timestamp). + sp->ts_lo = ts; + sp->pos_lo = pos; + } + } + if (compare_ts(ts, ts_tb, timestamp, timebase) >= 0) { + // Keyframe found after target timestamp. + if (sp->pos_hi == INT64_MAX) { + // Found first keyframe higher than target timestamp. + (*found_hi)++; + sp->ts_hi = ts; + sp->pos_hi = pos; + if (*found_hi >= keyframes_to_find && first_iter) { + // We found high frame for all. They may get updated + // to TS closer to target TS in later iterations (which + // will stop at start position of previous iteration). + break; + } + } else if (sp->ts_hi > ts) { + // Found a better match (actually, shouldn't happen). + sp->ts_hi = ts; + sp->pos_hi = pos; + } + } + } + } + + // Clean up the parser. + av_read_frame_flush(s); +} + +int64_t ff_gen_syncpoint_search(AVFormatContext *s, + int stream_index, + int64_t pos, + int64_t ts_min, + int64_t ts, + int64_t ts_max, + int flags) +{ + AVSyncPoint *sync, *sp; + AVStream *st; + int i; + int keyframes_to_find = 0; + int64_t curpos; + int64_t step; + int found_lo = 0, found_hi = 0; + int64_t min_distance, distance; + int64_t min_pos = 0; + int first_iter = 1; + AVRational time_base; + + if (flags & AVSEEK_FLAG_BYTE) { + /* For byte seeking, we have exact 1:1 "timestamps" - positions */ + time_base.num = 1; + time_base.den = 1; + } else { + if (stream_index >= 0) { + /* We have a reference stream, which time base we use */ + st = s->streams[stream_index]; + time_base = st->time_base; + } else { + /* No reference stream, use AV_TIME_BASE as reference time base */ + time_base.num = 1; + time_base.den = AV_TIME_BASE; + } + } + + // Initialize syncpoint structures for each stream. + sync = (AVSyncPoint*) av_malloc(s->nb_streams * sizeof(AVSyncPoint)); + if (!sync) { + // cannot allocate helper structure + return -1; + } + for (i = 0; i < s->nb_streams; ++i) { + st = s->streams[i]; + sp = &sync[i]; + + sp->pos_lo = INT64_MAX; + sp->ts_lo = INT64_MAX; + sp->pos_hi = INT64_MAX; + sp->ts_hi = INT64_MAX; + sp->terminated = 0; + sp->first_ts = AV_NOPTS_VALUE; + sp->term_ts = ts_max; + sp->term_ts_tb = time_base; + sp->last_pos = pos; + + st->cur_dts = AV_NOPTS_VALUE; + + if (st->discard < AVDISCARD_ALL) + ++keyframes_to_find; + } + + if (keyframes_to_find == 0) { + // No stream active, error. + av_free(sync); + return -1; + } + + // Find keyframes in all active streams with timestamp/position just before + // and just after requested timestamp/position. + step = 1024; + curpos = pos; + for (;;) { + url_fseek(s->pb, curpos, SEEK_SET); + search_hi_lo_keyframes(s, + ts, time_base, + flags, + sync, + keyframes_to_find, + &found_lo, &found_hi, + first_iter); + if (found_lo == keyframes_to_find && found_hi == keyframes_to_find) + break; // have all keyframes we wanted + if (curpos == 0) + break; // cannot go back anymore + + curpos = pos - step; + if (curpos < 0) + curpos = 0; + step *= 2; + + // switch termination positions + for (i = 0; i < s->nb_streams; ++i) { + st = s->streams[i]; + st->cur_dts = AV_NOPTS_VALUE; + + sp = &sync[i]; + if (sp->first_ts != AV_NOPTS_VALUE) { + sp->term_ts = sp->first_ts; + sp->term_ts_tb = sp->first_ts_tb; + sp->first_ts = AV_NOPTS_VALUE; + } + sp->terminated = 0; + sp->last_pos = curpos; + } + first_iter = 0; + } + + // Find actual position to start decoding so that decoder synchronizes + // closest to ts and between ts_min and ts_max. + pos = INT64_MAX; + + for (i = 0; i < s->nb_streams; ++i) { + st = s->streams[i]; + if (st->discard < AVDISCARD_ALL) { + sp = &sync[i]; + min_distance = INT64_MAX; + // Find timestamp closest to requested timestamp within min/max limits. + if (sp->pos_lo != INT64_MAX + && compare_ts(ts_min, time_base, sp->ts_lo, st->time_base) <= 0 + && compare_ts(sp->ts_lo, st->time_base, ts_max, time_base) <= 0) { + // low timestamp is in range + min_distance = ts_distance(ts, time_base, sp->ts_lo, st->time_base); + min_pos = sp->pos_lo; + } + if (sp->pos_hi != INT64_MAX + && compare_ts(ts_min, time_base, sp->ts_hi, st->time_base) <= 0 + && compare_ts(sp->ts_hi, st->time_base, ts_max, time_base) <= 0) { + // high timestamp is in range, check distance + distance = ts_distance(sp->ts_hi, st->time_base, ts, time_base); + if (distance < min_distance) { + min_distance = distance; + min_pos = sp->pos_hi; + } + } + if (min_distance == INT64_MAX) { + // no timestamp is in range, cannot seek + av_free(sync); + return -1; + } + if (min_pos < pos) + pos = min_pos; + } + } + + url_fseek(s->pb, pos, SEEK_SET); + av_free(sync); + return pos; +} + +AVParserState *ff_store_parser_state(AVFormatContext *s) +{ + int i; + AVStream *st; + AVStreamState *ss; + AVParserState *state = (AVParserState*) av_malloc(sizeof(AVParserState)); + if (!state) + return NULL; + + state->stream_states = (AVStreamState*) av_malloc(sizeof(AVStreamState) * s->nb_streams); + if (!state->stream_states) { + av_free(state); + return NULL; + } + + state->fpos = url_ftell(s->pb); + + // copy context structures + state->cur_st = s->cur_st; + state->packet_buffer = s->packet_buffer; + state->raw_packet_buffer = s->raw_packet_buffer; + state->raw_packet_buffer_remaining_size = s->raw_packet_buffer_remaining_size; + + s->cur_st = NULL; + s->packet_buffer = NULL; + s->raw_packet_buffer = NULL; + s->raw_packet_buffer_remaining_size = RAW_PACKET_BUFFER_SIZE; + + // copy stream structures + state->nb_streams = s->nb_streams; + for (i = 0; i < s->nb_streams; i++) { + st = s->streams[i]; + ss = &state->stream_states[i]; + + ss->parser = st->parser; + ss->last_IP_pts = st->last_IP_pts; + ss->cur_dts = st->cur_dts; + ss->reference_dts = st->reference_dts; + ss->cur_ptr = st->cur_ptr; + ss->cur_len = st->cur_len; + ss->probe_packets = st->probe_packets; + ss->cur_pkt = st->cur_pkt; + + st->parser = NULL; + st->last_IP_pts = AV_NOPTS_VALUE; + st->cur_dts = AV_NOPTS_VALUE; + st->reference_dts = AV_NOPTS_VALUE; + st->cur_ptr = NULL; + st->cur_len = 0; + st->probe_packets = MAX_PROBE_PACKETS; + av_init_packet(&st->cur_pkt); + } + + return state; +} + +void ff_restore_parser_state(AVFormatContext *s, AVParserState *state) +{ + int i; + AVStream *st; + AVStreamState *ss; + av_read_frame_flush(s); + + if (!state) + return; + + url_fseek(s->pb, state->fpos, SEEK_SET); + + // copy context structures + s->cur_st = state->cur_st; + s->packet_buffer = state->packet_buffer; + s->raw_packet_buffer = state->raw_packet_buffer; + s->raw_packet_buffer_remaining_size = state->raw_packet_buffer_remaining_size; + + // copy stream structures + for (i = 0; i < state->nb_streams; i++) { + st = s->streams[i]; + ss = &state->stream_states[i]; + + st->parser = ss->parser; + st->last_IP_pts = ss->last_IP_pts; + st->cur_dts = ss->cur_dts; + st->reference_dts = ss->reference_dts; + st->cur_ptr = ss->cur_ptr; + st->cur_len = ss->cur_len; + st->probe_packets = ss->probe_packets; + st->cur_pkt = ss->cur_pkt; + } + + av_free(state->stream_states); + av_free(state); +} + +static void free_packet_list(AVPacketList *pktl) +{ + AVPacketList *cur; + while (pktl) { + cur = pktl; + pktl = cur->next; + av_free_packet(&cur->pkt); + av_free(cur); + } +} + +void ff_free_parser_state(AVFormatContext *s, AVParserState *state) +{ + int i; + AVStreamState *ss; + + if (!state) + return; + + for (i = 0; i < state->nb_streams; i++) { + ss = &state->stream_states[i]; + if (ss->parser) + av_parser_close(ss->parser); + av_free_packet(&ss->cur_pkt); + } + + free_packet_list(state->packet_buffer); + free_packet_list(state->raw_packet_buffer); + + av_free(state->stream_states); + av_free(state); +} + diff --git a/libavformat/seek.h b/libavformat/seek.h new file mode 100644 index 0000000000..570c2bcedf --- /dev/null +++ b/libavformat/seek.h @@ -0,0 +1,97 @@ +/* + * Utility functions for seeking for use within FFmpeg format handlers. + * + * Copyright (c) 2009 Ivan Schreter + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFORMAT_SEEK_H +#define AVFORMAT_SEEK_H + +#include "avformat.h" + +/// Opaque structure for parser state. +typedef struct AVParserState AVParserState; + +/** + * Search for sync point of all active streams. + * + * This is not supposed to be called directly by a user application, + * but by demuxers. + * + * A sync point is a point in stream, so that decoding from this point, + * output of decoders of all streams synchronizes closest to given timestamp + * ts (but taking timestamp limits into account, i.e., no sooner than ts_min + * and no later than ts_max). + * + * @param stream_index stream index for time base reference of timestamps. + * @param pos approximate position where to start searching for key frames. + * @param min_ts minimum allowed timestamp (position, if AVSEEK_FLAG_BYTE set). + * @param ts target timestamp (or position, if AVSEEK_FLAG_BYTE set in flags). + * @param max_ts maximum allowed timestamp (position, if AVSEEK_FLAG_BYTE set). + * @param flags if AVSEEK_FLAG_ANY is set, seek to any frame, otherwise only + * to a keyframe. If AVSEEK_FLAG_BYTE is set, search by + * position, not by timestamp. + * @return < 0 if no such sync point could be found, otherwise stream position + * (stream is repositioned to this position). + */ +int64_t ff_gen_syncpoint_search(AVFormatContext *s, + int stream_index, + int64_t pos, + int64_t min_ts, + int64_t ts, + int64_t max_ts, + int flags); + +/** + * Store current parser state and file position. + * + * This function can be used by demuxers before destructive seeking algorithm + * to store parser state. After the seek, depending on outcome, original state + * can be restored or new state kept and original state freed. + * + * @note As a side effect, original parser state is reset, since structures + * are relinked to stored state instead of being deeply-copied (for + * performance reasons and to keep code simple). + * + * @param s context from which to save state. + * @return parser state object or NULL if memory could not be allocated. + */ +AVParserState *ff_store_parser_state(AVFormatContext *s); + +/** + * Restore previously saved parser state and file position. + * + * Saved state will be invalidated and freed by this call, since internal + * structures will be relinked back to stored state instead of being + * deeply-copied. + * + * @param s context to which to restore state (same as used for storing state). + * @param state state to restore. + */ +void ff_restore_parser_state(AVFormatContext *s, AVParserState *state); + +/** + * Free previously saved parser state. + * + * @param s context to which the state belongs (same as used for storing state). + * @param state state to free. + */ +void ff_free_parser_state(AVFormatContext *s, AVParserState *state); + +#endif /* AVFORMAT_SEEK_H */ diff --git a/libavformat/utils.c b/libavformat/utils.c index b9f6c004c9..4cec2863f3 100644 --- a/libavformat/utils.c +++ b/libavformat/utils.c @@ -1140,7 +1140,7 @@ int av_find_default_stream_index(AVFormatContext *s) /** * Flush the frame reader. */ -static void av_read_frame_flush(AVFormatContext *s) +void av_read_frame_flush(AVFormatContext *s) { AVStream *st; int i;