avformat/movenc: add support for TTML muxing

Includes basic support for both the ISMV ('dfxp') and MP4 ('stpp')
methods. This initial version also foregoes fragmentation support
in case the built-in sample squashing is to be utilized, as this
eases the initial review.

Additionally, add basic tests for both muxing modes in MP4.

Signed-off-by: Jan Ekström <jan.ekstrom@24i.com>
pull/365/head
Jan Ekström 4 years ago committed by Jan Ekström
parent 460beb948c
commit e41bd075dd
  1. 2
      libavformat/Makefile
  2. 3
      libavformat/isom.h
  3. 179
      libavformat/movenc.c
  4. 5
      libavformat/movenc.h
  5. 171
      libavformat/movenc_ttml.c
  6. 31
      libavformat/movenc_ttml.h
  7. 4
      tests/fate/subtitles.mak
  8. 44
      tests/ref/fate/sub-ttml-mp4-dfxp
  9. 44
      tests/ref/fate/sub-ttml-mp4-stpp

@ -337,7 +337,7 @@ OBJS-$(CONFIG_MOV_DEMUXER) += mov.o mov_chan.o mov_esds.o \
qtpalette.o replaygain.o
OBJS-$(CONFIG_MOV_MUXER) += movenc.o av1.o avc.o hevc.o vpcc.o \
movenchint.o mov_chan.o rtp.o \
movenccenc.o rawutils.o
movenccenc.o movenc_ttml.o rawutils.o
OBJS-$(CONFIG_MP2_MUXER) += rawenc.o
OBJS-$(CONFIG_MP3_DEMUXER) += mp3dec.o replaygain.o
OBJS-$(CONFIG_MP3_MUXER) += mp3enc.o rawenc.o id3v2enc.o

@ -387,4 +387,7 @@ static inline enum AVCodecID ff_mov_get_lpcm_codec_id(int bps, int flags)
return ff_get_pcm_codec_id(bps, flags & 1, flags & 2, flags & 4 ? -1 : 0);
}
#define MOV_ISMV_TTML_TAG MKTAG('d', 'f', 'x', 'p')
#define MOV_MP4_TTML_TAG MKTAG('s', 't', 'p', 'p')
#endif /* AVFORMAT_ISOM_H */

@ -57,6 +57,8 @@
#include "hevc.h"
#include "rtpenc.h"
#include "mov_chan.h"
#include "movenc_ttml.h"
#include "ttmlenc.h"
#include "vpcc.h"
static const AVOption options[] = {
@ -120,6 +122,7 @@ static const AVClass mov_isobmff_muxer_class = {
};
static int get_moov_size(AVFormatContext *s);
static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt);
static int utf8len(const uint8_t *b)
{
@ -1788,7 +1791,29 @@ static int mov_write_subtitle_tag(AVIOContext *pb, MOVTrack *track)
if (track->par->codec_id == AV_CODEC_ID_DVD_SUBTITLE)
mov_write_esds_tag(pb, track);
else if (track->par->extradata_size)
else if (track->par->codec_id == AV_CODEC_ID_TTML) {
switch (track->par->codec_tag) {
case MOV_ISMV_TTML_TAG:
// ISMV dfxp requires no extradata.
break;
case MOV_MP4_TTML_TAG:
// As specified in 14496-30, XMLSubtitleSampleEntry
// Namespace
avio_put_str(pb, "http://www.w3.org/ns/ttml");
// Empty schema_location
avio_w8(pb, 0);
// Empty auxiliary_mime_types
avio_w8(pb, 0);
break;
default:
av_log(NULL, AV_LOG_ERROR,
"Unknown codec tag '%s' utilized for TTML stream with "
"index %d (track id %d)!\n",
av_fourcc2str(track->par->codec_tag), track->st->index,
track->track_id);
return AVERROR(EINVAL);
}
} else if (track->par->extradata_size)
avio_write(pb, track->par->extradata, track->par->extradata_size);
if (track->mode == MODE_MP4 &&
@ -2662,6 +2687,14 @@ static int mov_write_nmhd_tag(AVIOContext *pb)
return 12;
}
static int mov_write_sthd_tag(AVIOContext *pb)
{
avio_wb32(pb, 12);
ffio_wfourcc(pb, "sthd");
avio_wb32(pb, 0);
return 12;
}
static int mov_write_tcmi_tag(AVIOContext *pb, MOVTrack *track)
{
int64_t pos = avio_tell(pb);
@ -2788,6 +2821,8 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
hdlr_type = "sbtl";
} else if (track->tag == MKTAG('m','p','4','s')) {
hdlr_type = "subp";
} else if (track->tag == MOV_MP4_TTML_TAG) {
hdlr_type = "subt";
} else {
hdlr_type = "text";
}
@ -2866,6 +2901,8 @@ static int mov_write_minf_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
else if (track->par->codec_type == AVMEDIA_TYPE_SUBTITLE) {
if (track->tag == MKTAG('t','e','x','t') || is_clcp_track(track)) {
mov_write_gmhd_tag(pb, track);
} else if (track->tag == MOV_MP4_TTML_TAG) {
mov_write_sthd_tag(pb);
} else {
mov_write_nmhd_tag(pb);
}
@ -5251,6 +5288,68 @@ static int mov_flush_fragment_interleaving(AVFormatContext *s, MOVTrack *track)
return 0;
}
static int mov_write_squashed_packet(AVFormatContext *s, MOVTrack *track)
{
MOVMuxContext *mov = s->priv_data;
AVPacket *squashed_packet = mov->pkt;
int ret = AVERROR_BUG;
switch (track->st->codecpar->codec_id) {
case AV_CODEC_ID_TTML: {
int had_packets = !!track->squashed_packet_queue;
if ((ret = ff_mov_generate_squashed_ttml_packet(s, track, squashed_packet)) < 0) {
goto finish_squash;
}
// We have generated a padding packet (no actual input packets in
// queue) and its duration is zero. Skipping writing it.
if (!had_packets && squashed_packet->duration == 0) {
goto finish_squash;
}
track->end_reliable = 1;
break;
}
default:
ret = AVERROR(EINVAL);
goto finish_squash;
}
squashed_packet->stream_index = track->st->index;
ret = mov_write_single_packet(s, squashed_packet);
finish_squash:
av_packet_unref(squashed_packet);
return ret;
}
static int mov_write_squashed_packets(AVFormatContext *s)
{
MOVMuxContext *mov = s->priv_data;
for (int i = 0; i < s->nb_streams; i++) {
MOVTrack *track = &mov->tracks[i];
int ret = AVERROR_BUG;
if (track->squash_fragment_samples_to_one && !track->entry) {
if ((ret = mov_write_squashed_packet(s, track)) < 0) {
av_log(s, AV_LOG_ERROR,
"Failed to write squashed packet for %s stream with "
"index %d and track id %d. Error: %s\n",
avcodec_get_name(track->st->codecpar->codec_id),
track->st->index, track->track_id,
av_err2str(ret));
return ret;
}
}
}
return 0;
}
static int mov_flush_fragment(AVFormatContext *s, int force)
{
MOVMuxContext *mov = s->priv_data;
@ -5262,6 +5361,11 @@ static int mov_flush_fragment(AVFormatContext *s, int force)
if (!(mov->flags & FF_MOV_FLAG_FRAGMENT))
return 0;
// Check if we have any tracks that require squashing.
// In that case, we'll have to write the packet here.
if ((ret = mov_write_squashed_packets(s)) < 0)
return ret;
// Try to fill in the duration of the last packet in each stream
// from queued packets in the interleave queues. If the flushing
// of fragments was triggered automatically by an AVPacket, we
@ -5727,7 +5831,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
trk->cluster[trk->entry].entries = samples_in_chunk;
trk->cluster[trk->entry].dts = pkt->dts;
trk->cluster[trk->entry].pts = pkt->pts;
if (!trk->entry && trk->start_dts != AV_NOPTS_VALUE) {
if (!trk->squash_fragment_samples_to_one &&
!trk->entry && trk->start_dts != AV_NOPTS_VALUE) {
if (!trk->frag_discont) {
/* First packet of a new fragment. We already wrote the duration
* of the last packet of the previous fragment based on track_duration,
@ -6022,6 +6127,33 @@ static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
}
}
if (trk->squash_fragment_samples_to_one) {
/*
* If the track has to have its samples squashed into one sample,
* we just take it into the track's queue.
* This will then be utilized as the samples get written in either
* mov_flush_fragment or when the mux is finalized in
* mov_write_trailer.
*/
int ret = AVERROR_BUG;
if (pkt->pts == AV_NOPTS_VALUE) {
av_log(s, AV_LOG_ERROR,
"Packets without a valid presentation timestamp are "
"not supported with packet squashing!\n");
return AVERROR(EINVAL);
}
if ((ret = avpriv_packet_list_put(&trk->squashed_packet_queue,
&trk->squashed_packet_queue_end,
pkt, av_packet_ref, 0)) < 0) {
return ret;
}
return 0;
}
if (trk->mode == MODE_MOV && trk->par->codec_type == AVMEDIA_TYPE_VIDEO) {
AVPacket *opkt = pkt;
int reshuffle_ret, ret;
@ -6300,6 +6432,11 @@ static void mov_free(AVFormatContext *s)
ff_mov_cenc_free(&mov->tracks[i].cenc);
ffio_free_dyn_buf(&mov->tracks[i].mdat_buf);
if (mov->tracks[i].squashed_packet_queue) {
avpriv_packet_list_free(&(mov->tracks[i].squashed_packet_queue),
&(mov->tracks[i].squashed_packet_queue_end));
}
}
av_freep(&mov->tracks);
@ -6690,6 +6827,36 @@ static int mov_init(AVFormatContext *s)
}
} else if (st->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE) {
track->timescale = st->time_base.den;
if (track->par->codec_id == AV_CODEC_ID_TTML) {
/* 14496-30 requires us to use a single sample per fragment
for TTML, for which we define a per-track flag.
We set the flag in case we are receiving TTML paragraphs
from the input, in other words in case we are not doing
stream copy. */
track->squash_fragment_samples_to_one =
ff_is_ttml_stream_paragraph_based(track->par);
if (mov->flags & FF_MOV_FLAG_FRAGMENT &&
track->squash_fragment_samples_to_one) {
av_log(s, AV_LOG_ERROR,
"Fragmentation is not currently supported for "
"TTML in MP4/ISMV (track synchronization between "
"subtitles and other media is not yet implemented)!\n");
return AVERROR_PATCHWELCOME;
}
if (track->mode != MODE_ISM &&
track->par->codec_tag == MOV_ISMV_TTML_TAG &&
s->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL) {
av_log(s, AV_LOG_ERROR,
"ISMV style TTML support with the 'dfxp' tag in "
"non-ISMV formats is not officially supported. Add "
"'-strict unofficial' if you want to use it.\n");
return AVERROR_EXPERIMENTAL;
}
}
} else if (st->codecpar->codec_type == AVMEDIA_TYPE_DATA) {
track->timescale = st->time_base.den;
} else {
@ -7036,6 +7203,11 @@ static int mov_write_trailer(AVFormatContext *s)
}
}
// Check if we have any tracks that require squashing.
// In that case, we'll have to write the packet here.
if ((res = mov_write_squashed_packets(s)) < 0)
return res;
// If there were no chapters when the header was written, but there
// are chapters now, write them in the trailer. This only works
// when we are not doing fragments.
@ -7180,6 +7352,8 @@ static const AVCodecTag codec_mp4_tags[] = {
{ AV_CODEC_ID_MOV_TEXT, MKTAG('t', 'x', '3', 'g') },
{ AV_CODEC_ID_BIN_DATA, MKTAG('g', 'p', 'm', 'd') },
{ AV_CODEC_ID_MPEGH_3D_AUDIO, MKTAG('m', 'h', 'm', '1') },
{ AV_CODEC_ID_TTML, MOV_MP4_TTML_TAG },
{ AV_CODEC_ID_TTML, MOV_ISMV_TTML_TAG },
{ AV_CODEC_ID_NONE, 0 },
};
#if CONFIG_MP4_MUXER || CONFIG_PSP_MUXER
@ -7188,6 +7362,7 @@ static const AVCodecTag *const mp4_codec_tags_list[] = { codec_mp4_tags, NULL };
static const AVCodecTag codec_ism_tags[] = {
{ AV_CODEC_ID_WMAPRO , MKTAG('w', 'm', 'a', ' ') },
{ AV_CODEC_ID_TTML , MOV_ISMV_TTML_TAG },
{ AV_CODEC_ID_NONE , 0 },
};

@ -26,6 +26,7 @@
#include "avformat.h"
#include "movenccenc.h"
#include "libavcodec/packet_internal.h"
#define MOV_FRAG_INFO_ALLOC_INCREMENT 64
#define MOV_INDEX_CLUSTER_SIZE 1024
@ -163,6 +164,10 @@ typedef struct MOVTrack {
int pal_done;
int is_unaligned_qt_rgb;
unsigned int squash_fragment_samples_to_one; //< flag to note formats where all samples for a fragment are to be squashed
PacketList *squashed_packet_queue, *squashed_packet_queue_end;
} MOVTrack;
typedef enum {

@ -0,0 +1,171 @@
/*
* MP4, ISMV Muxer TTML helpers
* Copyright (c) 2021 24i
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "avformat.h"
#include "avio_internal.h"
#include "isom.h"
#include "movenc.h"
#include "movenc_ttml.h"
#include "libavcodec/packet_internal.h"
static const unsigned char empty_ttml_document[] =
"<tt xml:lang=\"\" xmlns=\"http://www.w3.org/ns/ttml\" />";
static int mov_init_ttml_writer(MOVTrack *track, AVFormatContext **out_ctx)
{
AVStream *movenc_stream = track->st, *ttml_stream = NULL;
int ret = AVERROR_BUG;
if ((ret = avformat_alloc_output_context2(out_ctx, NULL,
"ttml", NULL)) < 0)
return ret;
if ((ret = avio_open_dyn_buf(&(*out_ctx)->pb)) < 0)
return ret;
if (!(ttml_stream = avformat_new_stream(*out_ctx, NULL))) {
return AVERROR(ENOMEM);
}
if ((ret = avcodec_parameters_copy(ttml_stream->codecpar,
movenc_stream->codecpar)) < 0)
return ret;
ttml_stream->time_base = movenc_stream->time_base;
return 0;
}
static int mov_write_ttml_document_from_queue(AVFormatContext *s,
AVFormatContext *ttml_ctx,
MOVTrack *track,
AVPacket *pkt,
int64_t *out_start_ts,
int64_t *out_duration)
{
int ret = AVERROR_BUG;
int64_t start_ts = track->start_dts == AV_NOPTS_VALUE ?
0 : (track->start_dts + track->track_duration);
int64_t end_ts = start_ts;
if ((ret = avformat_write_header(ttml_ctx, NULL)) < 0) {
return ret;
}
while (!avpriv_packet_list_get(&track->squashed_packet_queue,
&track->squashed_packet_queue_end,
pkt)) {
end_ts = FFMAX(end_ts, pkt->pts + pkt->duration);
// in case of the 'dfxp' muxing mode, each written document is offset
// to its containing sample's beginning.
if (track->par->codec_tag == MOV_ISMV_TTML_TAG) {
pkt->dts = pkt->pts = (pkt->pts - start_ts);
}
pkt->stream_index = 0;
av_packet_rescale_ts(pkt, track->st->time_base,
ttml_ctx->streams[pkt->stream_index]->time_base);
if ((ret = av_write_frame(ttml_ctx, pkt)) < 0) {
goto cleanup;
}
av_packet_unref(pkt);
}
if ((ret = av_write_trailer(ttml_ctx)) < 0)
goto cleanup;
*out_start_ts = start_ts;
*out_duration = end_ts - start_ts;
ret = 0;
cleanup:
return ret;
}
int ff_mov_generate_squashed_ttml_packet(AVFormatContext *s,
MOVTrack *track, AVPacket *pkt)
{
AVFormatContext *ttml_ctx = NULL;
// values for the generated AVPacket
int64_t start_ts = 0;
int64_t duration = 0;
int ret = AVERROR_BUG;
if ((ret = mov_init_ttml_writer(track, &ttml_ctx)) < 0) {
av_log(s, AV_LOG_ERROR, "Failed to initialize the TTML writer: %s\n",
av_err2str(ret));
goto cleanup;
}
if (!track->squashed_packet_queue) {
// empty queue, write minimal empty document with zero duration
avio_write(ttml_ctx->pb, empty_ttml_document,
sizeof(empty_ttml_document) - 1);
start_ts = 0;
duration = 0;
goto generate_packet;
}
if ((ret = mov_write_ttml_document_from_queue(s, ttml_ctx, track, pkt,
&start_ts,
&duration)) < 0) {
av_log(s, AV_LOG_ERROR,
"Failed to generate a squashed TTML packet from the packet "
"queue: %s\n",
av_err2str(ret));
goto cleanup;
}
generate_packet:
{
// Generate an AVPacket from the data written into the dynamic buffer.
uint8_t *buf = NULL;
int buf_len = avio_close_dyn_buf(ttml_ctx->pb, &buf);
ttml_ctx->pb = NULL;
if ((ret = av_packet_from_data(pkt, buf, buf_len)) < 0) {
av_log(s, AV_LOG_ERROR,
"Failed to create a TTML AVPacket from AVIO data: %s\n",
av_err2str(ret));
av_freep(&buf);
goto cleanup;
}
pkt->pts = pkt->dts = start_ts;
pkt->duration = duration;
pkt->flags |= AV_PKT_FLAG_KEY;
}
ret = 0;
cleanup:
if (ttml_ctx)
ffio_free_dyn_buf(&ttml_ctx->pb);
avformat_free_context(ttml_ctx);
return ret;
}

@ -0,0 +1,31 @@
/*
* MP4, ISMV Muxer TTML helpers
* Copyright (c) 2021 24i
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVFORMAT_MOVENC_TTML_H
#define AVFORMAT_MOVENC_TTML_H
#include "avformat.h"
#include "movenc.h"
int ff_mov_generate_squashed_ttml_packet(AVFormatContext *s,
MOVTrack *track, AVPacket *pkt);
#endif /* AVFORMAT_MOVENC_TTML_H */

@ -109,6 +109,10 @@ fate-sub-dvb: CMD = framecrc -i $(TARGET_SAMPLES)/sub/dvbsubtest_filter.ts -map
FATE_SUBTITLES-$(call ALLYES, FILE_PROTOCOL PIPE_PROTOCOL SRT_DEMUXER SUBRIP_DECODER TTML_ENCODER TTML_MUXER) += fate-sub-ttmlenc
fate-sub-ttmlenc: CMD = fmtstdout ttml -i $(TARGET_SAMPLES)/sub/SubRip_capability_tester.srt
FATE_SUBTITLES-$(call ALLYES, FILE_PROTOCOL SRT_DEMUXER MOV_DEMUXER SUBRIP_DECODER TTML_ENCODER TTML_MUXER MOV_MUXER) += fate-sub-ttml-mp4-stpp fate-sub-ttml-mp4-dfxp
fate-sub-ttml-mp4-stpp: CMD = transcode srt $(TARGET_SAMPLES)/sub/SubRip_capability_tester.srt mp4 "-map 0:s -c:s ttml -time_base:s 1:1000" "-map 0 -c copy" "" "-of json -show_entries packet:stream=index,codec_type,codec_tag_string,codec_tag,codec_name,time_base,start_time,duration_ts,duration,nb_frames,nb_read_packets:stream_tags"
fate-sub-ttml-mp4-dfxp: CMD = transcode srt $(TARGET_SAMPLES)/sub/SubRip_capability_tester.srt mp4 "-map 0:s -c:s ttml -time_base:s 1:1000 -tag:s dfxp -strict unofficial" "-map 0 -c copy" "" "-of json -show_entries packet:stream=index,codec_type,codec_tag_string,codec_tag,codec_name,time_base,start_time,duration_ts,duration,nb_frames,nb_read_packets:stream_tags"
FATE_SUBTITLES-$(call ENCMUX, ASS, ASS) += $(FATE_SUBTITLES_ASS-yes)
FATE_SUBTITLES += $(FATE_SUBTITLES-yes)

@ -0,0 +1,44 @@
2e7e01c821c111466e7a2844826b7f6d *tests/data/fate/sub-ttml-mp4-dfxp.mp4
8519 tests/data/fate/sub-ttml-mp4-dfxp.mp4
#tb 0: 1/1000
#media_type 0: data
#codec_id 0: none
0, 0, 0, 68500, 7866, 0x456c36b7
{
"packets": [
{
"codec_type": "data",
"stream_index": 0,
"pts": 0,
"pts_time": "0.000000",
"dts": 0,
"dts_time": "0.000000",
"duration": 68500,
"duration_time": "68.500000",
"size": "7866",
"pos": "44",
"flags": "K_"
}
],
"programs": [
],
"streams": [
{
"index": 0,
"codec_type": "data",
"codec_tag_string": "dfxp",
"codec_tag": "0x70786664",
"time_base": "1/1000",
"start_time": "0.000000",
"duration_ts": 68500,
"duration": "68.500000",
"nb_frames": "1",
"nb_read_packets": "1",
"tags": {
"language": "und",
"handler_name": "SubtitleHandler"
}
}
]
}

@ -0,0 +1,44 @@
cbd2c7ff864a663b0d893deac5a0caec *tests/data/fate/sub-ttml-mp4-stpp.mp4
8547 tests/data/fate/sub-ttml-mp4-stpp.mp4
#tb 0: 1/1000
#media_type 0: data
#codec_id 0: none
0, 0, 0, 68500, 7866, 0x456c36b7
{
"packets": [
{
"codec_type": "data",
"stream_index": 0,
"pts": 0,
"pts_time": "0.000000",
"dts": 0,
"dts_time": "0.000000",
"duration": 68500,
"duration_time": "68.500000",
"size": "7866",
"pos": "44",
"flags": "K_"
}
],
"programs": [
],
"streams": [
{
"index": 0,
"codec_type": "data",
"codec_tag_string": "stpp",
"codec_tag": "0x70707473",
"time_base": "1/1000",
"start_time": "0.000000",
"duration_ts": 68500,
"duration": "68.500000",
"nb_frames": "1",
"nb_read_packets": "1",
"tags": {
"language": "und",
"handler_name": "SubtitleHandler"
}
}
]
}
Loading…
Cancel
Save