fftools/ffmpeg: add options for writing encoding stats

Similar to -vstats, but more flexible: - works for audio as well as video - frame and/or packet information - user-specifiable format
2 years ago · 425b2c4a56
parent b95b2c8492
commit 425b2c4a56
7 changed files with 434 additions and 3 deletions
--- a/1
+++ b/1
@ -32,6 +32,7 @@ version <next>:
 - WADY DPCM decoder and demuxer
 - CBD2 DPCM decoder
 - ssim360 video filter
+- ffmpeg CLI new options: -enc_stats_pre[_fmt], -enc_stats_post[_fmt]


 version 5.1:
--- a/doc/ffmpeg.texi
+++ b/doc/ffmpeg.texi
@ -2047,6 +2047,94 @@ encoder/muxer, it does not change the stream to conform to this value. Setting
 values that do not match the stream properties may result in encoding failures
 or invalid output files.

+@item -enc_stats_pre[:@var{stream_specifier}] @var{path} (@emph{output,per-stream})
+@item -enc_stats_post[:@var{stream_specifier}] @var{path} (@emph{output,per-stream})
+Write per-frame encoding information about the matching streams into the file
+given by @var{path}.
+
+@option{-enc_stats_pre} writes information about raw video or audio frames right
+before they are sent for encoding, while @option{-enc_stats_post} writes
+information about encoded packets as they are received from the encoder. Every
+frame or packet produces one line in the specified file. The format of this line
+is controlled by @option{-enc_stats_pre_fmt} / @option{-enc_stats_post_fmt}.
+
+When stats for multiple streams are written into a single file, the lines
+corresponding to different streams will be interleaved. The precise order of
+this interleaving is not specified and not guaranteed to remain stable between
+different invocations of the program, even with the same options.
+
+@item -enc_stats_pre_fmt[:@var{stream_specifier}] @var{format_spec} (@emph{output,per-stream})
+@item -enc_stats_post_fmt[:@var{stream_specifier}] @var{format_spec} (@emph{output,per-stream})
+Specify the format for the lines written with @option{-enc_stats_pre} /
+@option{-enc_stats_post}.
+
+@var{format_spec} is a string that may contain directives of the form
+@var{@{fmt@}}. @var{format_spec} is backslash-escaped --- use \@{, \@}, and \\
+to write a literal @{, @}, or \, respectively, into the output.
+
+The directives given with @var{fmt} may be one of the following:
+@table @option
+@item fidx
+Index of the output file.
+
+@item sidx
+Index of the output stream in the file.
+
+@item n
+Frame number. Pre-encoding: number of frames sent to the encoder so far.
+Post-encoding: number of packets received from the encoder so far.
+
+@item tb
+Encoder timebase, as a rational number @var{num/den}. Note that this may be
+different from the timebase used by the muxer.
+
+@item pts
+Presentation timestamp of the frame or packet, as an integer. Should be
+multiplied by the timebase to compute presentation time.
+
+@item t
+Presentation time of the frame or packet, as a decimal number. Equal to
+@var{pts} multiplied by @var{tb}.
+
+@item dts
+Decoding timestamp of the packet, as an integer. Should be multiplied by the
+timebase to compute presentation time. Post-encoding only.
+
+@item dt
+Decoding time of the frame or packet, as a decimal number. Equal to
+@var{dts} multiplied by @var{tb}.
+
+@item sn
+Number of audio samples sent to the encoder so far. Audio and pre-encoding only.
+
+@item samp
+Number of audio samples in the frame. Audio and pre-encoding only.
+
+@item size
+Size of the encoded packet in bytes. Post-encoding only.
+
+@item br
+Current bitrate in bits per second. Post-encoding only.
+
+@item abr
+Average bitrate for the whole stream so far, in bits per second, -1 if it cannot
+be determined at this point. Post-encoding only.
+@end table
+
+The default format strings are:
+@table @option
+@item pre-encoding
+@{fidx@} @{sidx@} @{n@} @{t@}
+@item post-encoding
+@{fidx@} @{sidx@} @{n@} @{t@}
+@end table
+In the future, new items may be added to the end of the default formatting
+strings. Users who depend on the format staying exactly the same, should
+prescribe it manually.
+
+Note that stats for different streams written into the same file may have
+different formats.
+
@end table

@section Preset files
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@ -554,6 +554,8 @@ static void ffmpeg_cleanup(int ret)
                   av_err2str(AVERROR(errno)));
    }
    av_freep(&vstats_filename);
+    of_enc_stats_close();
+
    av_freep(&filter_nbthreads);

    av_freep(&input_files);
@ -798,6 +800,56 @@ static void update_video_stats(OutputStream *ost, const AVPacket *pkt, int write
    fprintf(vstats_file, "type= %c\n", av_get_picture_type_char(ost->pict_type));
 }

+static void enc_stats_write(OutputStream *ost, EncStats *es,
+                            const AVFrame *frame, const AVPacket *pkt)
+{
+    AVIOContext *io = ost->enc_stats_pre.io;
+    AVRational   tb = ost->enc_ctx->time_base;
+    int64_t     pts = frame ? frame->pts : pkt->pts;
+
+    for (size_t i = 0; i < es->nb_components; i++) {
+        const EncStatsComponent *c = &es->components[i];
+
+        switch (c->type) {
+        case ENC_STATS_LITERAL:         avio_write (io, c->str,     c->str_len);                    continue;
+        case ENC_STATS_FILE_IDX:        avio_printf(io, "%d",       ost->file_index);               continue;
+        case ENC_STATS_STREAM_IDX:      avio_printf(io, "%d",       ost->index);                    continue;
+        case ENC_STATS_TIMEBASE:        avio_printf(io, "%d/%d",    tb.num, tb.den);                continue;
+        case ENC_STATS_PTS:             avio_printf(io, "%"PRId64,  pts);                           continue;
+        case ENC_STATS_PTS_TIME:        avio_printf(io, "%g",       pts * av_q2d(tb));              continue;
+        }
+
+        if (frame) {
+            switch (c->type) {
+            case ENC_STATS_FRAME_NUM:   avio_printf(io, "%"PRIu64,  ost->frames_encoded);           continue;
+            case ENC_STATS_SAMPLE_NUM:  avio_printf(io, "%"PRIu64,  ost->samples_encoded);          continue;
+            case ENC_STATS_NB_SAMPLES:  avio_printf(io, "%d",       frame->nb_samples);             continue;
+            default: av_assert0(0);
+            }
+        } else {
+            switch (c->type) {
+            case ENC_STATS_DTS:         avio_printf(io, "%"PRId64,  pkt->dts);                      continue;
+            case ENC_STATS_DTS_TIME:    avio_printf(io, "%g",       pkt->dts * av_q2d(tb));         continue;
+            case ENC_STATS_PKT_SIZE:    avio_printf(io, "%d",       pkt->size);                     continue;
+            case ENC_STATS_FRAME_NUM:   avio_printf(io, "%"PRIu64,  ost->packets_encoded);          continue;
+            case ENC_STATS_BITRATE: {
+                double duration = FFMAX(pkt->duration, 1) * av_q2d(tb);
+                avio_printf(io, "%g",  8.0 * pkt->size / duration);
+                continue;
+            }
+            case ENC_STATS_AVG_BITRATE: {
+                double duration = pkt->dts * av_q2d(tb);
+                avio_printf(io, "%g",  duration > 0 ? 8.0 * ost->data_size_enc / duration : -1.);
+                continue;
+            }
+            default: av_assert0(0);
+            }
+        }
+    }
+    avio_w8(io, '\n');
+    avio_flush(io);
+}
+
 static int encode_frame(OutputFile *of, OutputStream *ost, AVFrame *frame)
 {
    AVCodecContext   *enc = ost->enc_ctx;
@ -807,6 +859,9 @@ static int encode_frame(OutputFile *of, OutputStream *ost, AVFrame *frame)
    int ret;

    if (frame) {
+        if (ost->enc_stats_pre.io)
+            enc_stats_write(ost, &ost->enc_stats_pre, frame, NULL);
+
        ost->frames_encoded++;
        ost->samples_encoded += frame->nb_samples;

@ -848,6 +903,11 @@ static int encode_frame(OutputFile *of, OutputStream *ost, AVFrame *frame)
            return ret;
        }

+        if (enc->codec_type == AVMEDIA_TYPE_VIDEO)
+            update_video_stats(ost, pkt, !!vstats_filename);
+        if (ost->enc_stats_post.io)
+            enc_stats_write(ost, &ost->enc_stats_post, NULL, pkt);
+
        if (debug_ts) {
            av_log(NULL, AV_LOG_INFO, "encoder -> type:%s "
                   "pkt_pts:%s pkt_pts_time:%s pkt_dts:%s pkt_dts_time:%s "
@ -872,9 +932,6 @@ static int encode_frame(OutputFile *of, OutputStream *ost, AVFrame *frame)

        ost->data_size_enc += pkt->size;

-        if (enc->codec_type == AVMEDIA_TYPE_VIDEO)
-            update_video_stats(ost, pkt, !!vstats_filename);
-
        ost->packets_encoded++;

        of_output_packet(of, pkt, ost, 0);
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@ -252,6 +252,14 @@ typedef struct OptionsContext {
    int        nb_autoscale;
    SpecifierOpt *bits_per_raw_sample;
    int        nb_bits_per_raw_sample;
+    SpecifierOpt *enc_stats_pre;
+    int        nb_enc_stats_pre;
+    SpecifierOpt *enc_stats_post;
+    int        nb_enc_stats_post;
+    SpecifierOpt *enc_stats_pre_fmt;
+    int        nb_enc_stats_pre_fmt;
+    SpecifierOpt *enc_stats_post_fmt;
+    int        nb_enc_stats_post_fmt;
 } OptionsContext;

 typedef struct InputFilter {
@ -480,6 +488,37 @@ enum forced_keyframes_const {
 #define ABORT_ON_FLAG_EMPTY_OUTPUT        (1 <<  0)
 #define ABORT_ON_FLAG_EMPTY_OUTPUT_STREAM (1 <<  1)

+enum EncStatsType {
+    ENC_STATS_LITERAL = 0,
+    ENC_STATS_FILE_IDX,
+    ENC_STATS_STREAM_IDX,
+    ENC_STATS_FRAME_NUM,
+    ENC_STATS_TIMEBASE,
+    ENC_STATS_PTS,
+    ENC_STATS_PTS_TIME,
+    ENC_STATS_DTS,
+    ENC_STATS_DTS_TIME,
+    ENC_STATS_SAMPLE_NUM,
+    ENC_STATS_NB_SAMPLES,
+    ENC_STATS_PKT_SIZE,
+    ENC_STATS_BITRATE,
+    ENC_STATS_AVG_BITRATE,
+};
+
+typedef struct EncStatsComponent {
+    enum EncStatsType type;
+
+    uint8_t *str;
+    size_t   str_len;
+} EncStatsComponent;
+
+typedef struct EncStats {
+    EncStatsComponent  *components;
+    int              nb_components;
+
+    AVIOContext        *io;
+} EncStats;
+
 extern const char *const forced_keyframes_const_names[];

 typedef enum {
@ -625,6 +664,9 @@ typedef struct OutputStream {

    int sq_idx_encode;
    int sq_idx_mux;
+
+    EncStats enc_stats_pre;
+    EncStats enc_stats_post;
 } OutputStream;

 typedef struct OutputFile {
@ -749,6 +791,8 @@ int of_write_trailer(OutputFile *of);
 int of_open(const OptionsContext *o, const char *filename);
 void of_close(OutputFile **pof);

+void of_enc_stats_close(void);
+
 /*
 * Send a single packet to the output, applying any bitstream filters
 * associated with the output stream.  This may result in any number
--- a/fftools/ffmpeg_mux.c
+++ b/fftools/ffmpeg_mux.c
@ -686,6 +686,14 @@ static void ost_free(OutputStream **post)
        av_freep(&ost->enc_ctx->stats_in);
    avcodec_free_context(&ost->enc_ctx);

+    for (int i = 0; i < ost->enc_stats_pre.nb_components; i++)
+        av_freep(&ost->enc_stats_pre.components[i].str);
+    av_freep(&ost->enc_stats_pre.components);
+
+    for (int i = 0; i < ost->enc_stats_post.nb_components; i++)
+        av_freep(&ost->enc_stats_post.components[i].str);
+    av_freep(&ost->enc_stats_post.components);
+
    av_freep(post);
 }

--- a/fftools/ffmpeg_mux_init.c
+++ b/fftools/ffmpeg_mux_init.c
@ -55,6 +55,10 @@ static const char *const opt_name_copy_initial_nonkeyframes[] = {"copyinkf", NUL
 static const char *const opt_name_copy_prior_start[]          = {"copypriorss", NULL};
 static const char *const opt_name_disposition[]               = {"disposition", NULL};
 static const char *const opt_name_enc_time_bases[]            = {"enc_time_base", NULL};
+static const char *const opt_name_enc_stats_pre[]             = {"enc_stats_pre", NULL};
+static const char *const opt_name_enc_stats_post[]            = {"enc_stats_post", NULL};
+static const char *const opt_name_enc_stats_pre_fmt[]         = {"enc_stats_pre_fmt", NULL};
+static const char *const opt_name_enc_stats_post_fmt[]        = {"enc_stats_post_fmt", NULL};
 static const char *const opt_name_filters[]                   = {"filter", "af", "vf", NULL};
 static const char *const opt_name_filter_scripts[]            = {"filter_script", NULL};
 static const char *const opt_name_fps_mode[]                  = {"fps_mode", NULL};
@ -170,6 +174,201 @@ static int get_preset_file_2(const char *preset_name, const char *codec_name, AV
    return ret;
 }

+typedef struct EncStatsFile {
+    char        *path;
+    AVIOContext *io;
+} EncStatsFile;
+
+static EncStatsFile   *enc_stats_files;
+static          int nb_enc_stats_files;
+
+static int enc_stats_get_file(AVIOContext **io, const char *path)
+{
+    EncStatsFile *esf;
+    int ret;
+
+    for (int i = 0; i < nb_enc_stats_files; i++)
+        if (!strcmp(path, enc_stats_files[i].path)) {
+            *io = enc_stats_files[i].io;
+            return 0;
+        }
+
+    GROW_ARRAY(enc_stats_files, nb_enc_stats_files);
+
+    esf = &enc_stats_files[nb_enc_stats_files - 1];
+
+    ret = avio_open2(&esf->io, path, AVIO_FLAG_WRITE, &int_cb, NULL);
+    if (ret < 0) {
+        av_log(NULL, AV_LOG_ERROR, "Error opening stats file '%s': %s\n",
+               path, av_err2str(ret));
+        return ret;
+    }
+
+    esf->path = av_strdup(path);
+    if (!esf->path)
+        return AVERROR(ENOMEM);
+
+    *io = esf->io;
+
+    return 0;
+}
+
+void of_enc_stats_close(void)
+{
+    for (int i = 0; i < nb_enc_stats_files; i++) {
+        av_freep(&enc_stats_files[i].path);
+        avio_closep(&enc_stats_files[i].io);
+    }
+    av_freep(&enc_stats_files);
+    nb_enc_stats_files = 0;
+}
+
+static int unescape(char **pdst, size_t *dst_len,
+                    const char **pstr, char delim)
+{
+    const char *str = *pstr;
+    char *dst;
+    size_t len, idx;
+
+    *pdst = NULL;
+
+    len = strlen(str);
+    if (!len)
+        return 0;
+
+    dst = av_malloc(len + 1);
+    if (!dst)
+        return AVERROR(ENOMEM);
+
+    for (idx = 0; *str; idx++, str++) {
+        if (str[0] == '\\' && str[1])
+            str++;
+        else if (*str == delim)
+            break;
+
+        dst[idx] = *str;
+    }
+    if (!idx) {
+        av_freep(&dst);
+        return 0;
+    }
+
+    dst[idx] = 0;
+
+    *pdst    = dst;
+    *dst_len = idx;
+    *pstr    = str;
+
+    return 0;
+}
+
+static int enc_stats_init(OutputStream *ost, int pre,
+                          const char *path, const char *fmt_spec)
+{
+    static const struct {
+        enum EncStatsType  type;
+        const char        *str;
+        int                pre_only:1;
+        int                post_only:1;
+    } fmt_specs[] = {
+        { ENC_STATS_FILE_IDX,       "fidx"                      },
+        { ENC_STATS_STREAM_IDX,     "sidx"                      },
+        { ENC_STATS_FRAME_NUM,      "n"                         },
+        { ENC_STATS_TIMEBASE,       "tb"                        },
+        { ENC_STATS_PTS,            "pts"                       },
+        { ENC_STATS_PTS_TIME,       "t"                         },
+        { ENC_STATS_DTS,            "dts",      0, 1            },
+        { ENC_STATS_DTS_TIME,       "dt",       0, 1            },
+        { ENC_STATS_SAMPLE_NUM,     "sn",       1               },
+        { ENC_STATS_NB_SAMPLES,     "samp",     1               },
+        { ENC_STATS_PKT_SIZE,       "size",     0, 1            },
+        { ENC_STATS_BITRATE,        "br",       0, 1            },
+        { ENC_STATS_AVG_BITRATE,    "abr",      0, 1            },
+    };
+    EncStats *es = pre ? &ost->enc_stats_pre : &ost->enc_stats_post;
+    const char *next = fmt_spec;
+
+    int ret;
+
+    while (*next) {
+        EncStatsComponent *c;
+        char *val;
+        size_t val_len;
+
+        // get the sequence up until next opening brace
+        ret = unescape(&val, &val_len, &next, '{');
+        if (ret < 0)
+            return ret;
+
+        if (val) {
+            GROW_ARRAY(es->components, es->nb_components);
+
+            c          = &es->components[es->nb_components - 1];
+            c->type    = ENC_STATS_LITERAL;
+            c->str     = val;
+            c->str_len = val_len;
+        }
+
+        if (!*next)
+            break;
+        next++;
+
+        // get the part inside braces
+        ret = unescape(&val, &val_len, &next, '}');
+        if (ret < 0)
+            return ret;
+
+        if (!val) {
+            av_log(NULL, AV_LOG_ERROR,
+                   "Empty formatting directive in: %s\n", fmt_spec);
+            return AVERROR(EINVAL);
+        }
+
+        if (!*next) {
+            av_log(NULL, AV_LOG_ERROR,
+                   "Missing closing brace in: %s\n", fmt_spec);
+            ret = AVERROR(EINVAL);
+            goto fail;
+        }
+        next++;
+
+        GROW_ARRAY(es->components, es->nb_components);
+        c = &es->components[es->nb_components - 1];
+
+        for (size_t i = 0; i < FF_ARRAY_ELEMS(fmt_specs); i++) {
+            if (!strcmp(val, fmt_specs[i].str)) {
+                if ((pre && fmt_specs[i].post_only) || (!pre && fmt_specs[i].pre_only)) {
+                    av_log(NULL, AV_LOG_ERROR,
+                           "Format directive '%s' may only be used %s-encoding\n",
+                           val, pre ? "post" : "pre");
+                    ret = AVERROR(EINVAL);
+                    goto fail;
+                }
+
+                c->type = fmt_specs[i].type;
+                break;
+            }
+        }
+
+        if (!c->type) {
+            av_log(NULL, AV_LOG_ERROR, "Invalid format directive: %s\n", val);
+            ret = AVERROR(EINVAL);
+            goto fail;
+        }
+
+fail:
+        av_freep(&val);
+        if (ret < 0)
+            return ret;
+    }
+
+    ret = enc_stats_get_file(&es->io, path);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
 static OutputStream *new_output_stream(Muxer *mux, const OptionsContext *o,
                                       enum AVMediaType type, InputStream *ist)
 {
@ -230,6 +429,7 @@ static OutputStream *new_output_stream(Muxer *mux, const OptionsContext *o,
        AVCodecContext *enc = ost->enc_ctx;
        AVIOContext *s = NULL;
        char *buf = NULL, *arg = NULL, *preset = NULL;
+        const char *enc_stats_pre = NULL, *enc_stats_post = NULL;

        ost->encoder_opts = filter_codec_opts(o->g->codec_opts, enc->codec_id,
                                              oc, st, enc->codec);
@ -261,6 +461,30 @@ static OutputStream *new_output_stream(Muxer *mux, const OptionsContext *o,
                   preset, ost->file_index, ost->index);
            exit_program(1);
        }
+
+        MATCH_PER_STREAM_OPT(enc_stats_pre, str, enc_stats_pre, oc, st);
+        if (enc_stats_pre &&
+            (type == AVMEDIA_TYPE_VIDEO || type == AVMEDIA_TYPE_AUDIO)) {
+            const char *format = "{fidx} {sidx} {n} {t}";
+
+            MATCH_PER_STREAM_OPT(enc_stats_pre_fmt, str, format, oc, st);
+
+            ret = enc_stats_init(ost, 1, enc_stats_pre, format);
+            if (ret < 0)
+                exit_program(1);
+        }
+
+        MATCH_PER_STREAM_OPT(enc_stats_post, str, enc_stats_post, oc, st);
+        if (enc_stats_post &&
+            (type == AVMEDIA_TYPE_VIDEO || type == AVMEDIA_TYPE_AUDIO)) {
+            const char *format = "{fidx} {sidx} {n} {t}";
+
+            MATCH_PER_STREAM_OPT(enc_stats_post_fmt, str, format, oc, st);
+
+            ret = enc_stats_init(ost, 0, enc_stats_post, format);
+            if (ret < 0)
+                exit_program(1);
+        }
    } else {
        ost->encoder_opts = filter_codec_opts(o->g->codec_opts, AV_CODEC_ID_NONE, oc, st, NULL);
    }
--- a/fftools/ffmpeg_opt.c
+++ b/fftools/ffmpeg_opt.c
@ -1543,6 +1543,15 @@ const OptionDef options[] = {
        { .off = OFFSET(bits_per_raw_sample) },
        "set the number of bits per raw sample", "number" },

+    { "enc_stats_pre",      HAS_ARG | OPT_SPEC | OPT_EXPERT | OPT_OUTPUT | OPT_STRING, { .off = OFFSET(enc_stats_pre)      },
+        "write encoding stats before encoding" },
+    { "enc_stats_post",     HAS_ARG | OPT_SPEC | OPT_EXPERT | OPT_OUTPUT | OPT_STRING, { .off = OFFSET(enc_stats_post)     },
+        "write encoding stats after encoding" },
+    { "enc_stats_pre_fmt",  HAS_ARG | OPT_SPEC | OPT_EXPERT | OPT_OUTPUT | OPT_STRING, { .off = OFFSET(enc_stats_pre_fmt)  },
+        "format of the stats written with -enc_stats_pre" },
+    { "enc_stats_post_fmt", HAS_ARG | OPT_SPEC | OPT_EXPERT | OPT_OUTPUT | OPT_STRING, { .off = OFFSET(enc_stats_post_fmt) },
+        "format of the stats written with -enc_stats_post" },
+
    /* video options */
    { "vframes",      OPT_VIDEO | HAS_ARG  | OPT_PERFILE | OPT_OUTPUT,           { .func_arg = opt_video_frames },
        "set the number of video frames to output", "number" },