lavc: support subtitles character encoding conversion.

12 years ago · f796399344
parent 8732271e40
commit f796399344
6 changed files with 131 additions and 5 deletions
--- a/1
+++ b/1
@ -21,6 +21,7 @@ version <next>:
 - encrypted TTA stream decoding support
 - RF64 support in WAV muxer
 - noise filter ported from libmpcodecs
 - Subtitles character encoding conversion
 version 1.1:
--- a/2
+++ b/2
@ -1390,6 +1390,7 @@ HAVE_LIST="
    gnu_as
    gsm_h
    ibm_asm
    iconv
    inet_aton
    io_h
    isatty
@ -3716,6 +3717,7 @@ check_func  getopt
 check_func  getrusage
 check_struct "sys/time.h sys/resource.h" "struct rusage" ru_maxrss
 check_func  gettimeofday
 check_func  iconv
 check_func  inet_aton $network_extralibs
 check_func  isatty
 check_func  localtime_r
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@ -3208,6 +3208,24 @@ typedef struct AVCodecContext {
     * - encoding: unused
     */
    AVDictionary *metadata;
    /**
     * Character encoding of the input subtitles file.
     * - decoding: set by user
     * - encoding: unused
     */
    char *sub_charenc;
    /**
     * Subtitles character encoding mode. Formats or codecs might be adjusting
     * this setting (if they are doing the conversion themselves for instance).
     * - decoding: set by libavcodec
     * - encoding: unused
     */
    int sub_charenc_mode;
 #define FF_SUB_CHARENC_MODE_DO_NOTHING  -1  ///< do nothing (demuxer outputs a stream supposed to be already in UTF-8, or the codec is bitmap for instance)
 #define FF_SUB_CHARENC_MODE_AUTOMATIC    0  ///< libavcodec will select the mode itself
 #define FF_SUB_CHARENC_MODE_PRE_DECODER  1  ///< the AVPacket data needs to be recoded to UTF-8 before being fed to the decoder, requires iconv
 } AVCodecContext;
 AVRational av_codec_get_pkt_timebase         (const AVCodecContext *avctx);
--- a/libavcodec/options_table.h
+++ b/libavcodec/options_table.h
@ -406,6 +406,11 @@ static const AVOption options[]={
 {"ka", "Karaoke",            0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_KARAOKE },           INT_MIN, INT_MAX, A|E, "audio_service_type"},
 {"request_sample_fmt", "sample format audio decoders should prefer", OFFSET(request_sample_fmt), AV_OPT_TYPE_SAMPLE_FMT, {.i64=AV_SAMPLE_FMT_NONE}, -1, AV_SAMPLE_FMT_NB-1, A|D, "request_sample_fmt"},
 {"pkt_timebase", NULL, OFFSET(pkt_timebase), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, 0, INT_MAX, 0},
 {"sub_charenc", "set input text subtitles character encoding", OFFSET(sub_charenc), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, S|D},
 {"sub_charenc_mode", "set input text subtitles character encoding mode", OFFSET(sub_charenc_mode), AV_OPT_TYPE_FLAGS, {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC}, -1, INT_MAX, S|D, "sub_charenc_mode"},
 {"do_nothing",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_DO_NOTHING},  INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
 {"auto",        NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC},   INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
 {"pre_decoder", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_PRE_DECODER}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
 {NULL},
 };
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@ -48,6 +48,9 @@
 #include <stdarg.h>
 #include <limits.h>
 #include <float.h>
 #if HAVE_ICONV
 # include <iconv.h>
 #endif
 volatile int ff_avcodec_locked;
 static int volatile entangled_thread_counter = 0;
@ -1089,6 +1092,32 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
            ret = AVERROR(EINVAL);
            goto free_and_end;
        }
        if (avctx->sub_charenc) {
            if (avctx->codec_type != AVMEDIA_TYPE_SUBTITLE) {
                av_log(avctx, AV_LOG_ERROR, "Character encoding is only "
                       "supported with subtitles codecs\n");
                ret = AVERROR(EINVAL);
                goto free_and_end;
            } else if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB) {
                av_log(avctx, AV_LOG_WARNING, "Codec '%s' is bitmap-based, "
                       "subtitles character encoding will be ignored\n",
                       avctx->codec_descriptor->name);
                avctx->sub_charenc_mode = FF_SUB_CHARENC_MODE_DO_NOTHING;
            } else {
                /* input character encoding is set for a text based subtitle
                 * codec at this point */
                if (avctx->sub_charenc_mode == FF_SUB_CHARENC_MODE_AUTOMATIC)
                    avctx->sub_charenc_mode = FF_SUB_CHARENC_MODE_PRE_DECODER;
                if (!HAVE_ICONV && avctx->sub_charenc_mode == FF_SUB_CHARENC_MODE_PRE_DECODER) {
                    av_log(avctx, AV_LOG_ERROR, "Character encoding subtitles "
                           "conversion needs a libavcodec built with iconv support "
                           "for this codec\n");
                    ret = AVERROR(ENOSYS);
                    goto free_and_end;
                }
            }
        }
    }
 end:
    ff_unlock_avcodec();
@ -1847,6 +1876,68 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
    return ret;
 }
 #define UTF8_MAX_BYTES 4 /* 5 and 6 bytes sequences should not be used */
 static int recode_subtitle(AVCodecContext *avctx,
                           AVPacket *outpkt, const AVPacket *inpkt)
 {
 #if HAVE_ICONV
    iconv_t cd = (iconv_t)-1;
    int ret = 0;
    char *inb, *outb;
    size_t inl, outl;
    AVPacket tmp;
 #endif
    if (avctx->sub_charenc_mode != FF_SUB_CHARENC_MODE_PRE_DECODER)
        return 0;
 #if HAVE_ICONV
    cd = iconv_open("UTF-8", avctx->sub_charenc);
    if (cd == (iconv_t)-1) {
        av_log(avctx, AV_LOG_ERROR, "Unable to open iconv context "
               "with input character encoding \"%s\"\n", avctx->sub_charenc);
        ret = AVERROR(errno);
        goto end;
    }
    inb = inpkt->data;
    inl = inpkt->size;
    if (inl >= INT_MAX / UTF8_MAX_BYTES - FF_INPUT_BUFFER_PADDING_SIZE) {
        av_log(avctx, AV_LOG_ERROR, "Subtitles packet is too big for recoding\n");
        ret = AVERROR(ENOMEM);
        goto end;
    }
    ret = av_new_packet(&tmp, inl * UTF8_MAX_BYTES);
    if (ret < 0)
        goto end;
    outpkt->data = tmp.data;
    outpkt->size = tmp.size;
    outb = outpkt->data;
    outl = outpkt->size;
    if (iconv(cd, &inb, &inl, &outb, &outl) == (size_t)-1 ||
        iconv(cd, NULL, NULL, &outb, &outl) == (size_t)-1 ||
        outl >= outpkt->size || inl != 0) {
        av_log(avctx, AV_LOG_ERROR, "Unable to recode subtitle event \"%s\" "
               "from %s to UTF-8\n", inpkt->data, avctx->sub_charenc);
        av_free_packet(&tmp);
        ret = AVERROR(errno);
        goto end;
    }
    outpkt->size -= outl;
    outpkt->data[outpkt->size - 1] = '\0';
 end:
    if (cd != (iconv_t)-1)
        iconv_close(cd);
    return ret;
 #else
    av_assert0(!"requesting subtitles recoding without iconv");
 #endif
 }
 int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
                             int *got_sub_ptr,
                             AVPacket *avpkt)
@ -1862,19 +1953,28 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
    avcodec_get_subtitle_defaults(sub);
    if (avpkt->size) {
        AVPacket pkt_recoded;
        AVPacket tmp = *avpkt;
        int did_split = av_packet_split_side_data(&tmp);
        //apply_param_change(avctx, &tmp);
-        avctx->pkt = &tmp;
+        pkt_recoded = tmp;
        ret = recode_subtitle(avctx, &pkt_recoded, &tmp);
        if (ret < 0) {
            *got_sub_ptr = 0;
        } else {
        avctx->pkt = &pkt_recoded;
        if (avctx->pkt_timebase.den && avpkt->pts != AV_NOPTS_VALUE)
            sub->pts = av_rescale_q(avpkt->pts,
                                    avctx->pkt_timebase, AV_TIME_BASE_Q);
-        ret = avctx->codec->decode(avctx, sub, got_sub_ptr, &tmp);
+        ret = avctx->codec->decode(avctx, sub, got_sub_ptr, &pkt_recoded);
        if (tmp.data != pkt_recoded.data)
            av_free(pkt_recoded.data);
        sub->format = !(avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB);
        avctx->pkt = NULL;
        }
        if (did_split) {
            ff_packet_free_side_data(&tmp);
            if(ret == tmp.size)
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@ -29,8 +29,8 @@
 #include "libavutil/avutil.h"
 #define LIBAVCODEC_VERSION_MAJOR 54
-#define LIBAVCODEC_VERSION_MINOR 91
+#define LIBAVCODEC_VERSION_MINOR 92
-#define LIBAVCODEC_VERSION_MICRO 103
+#define LIBAVCODEC_VERSION_MICRO 100
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
                                               LIBAVCODEC_VERSION_MINOR, \