Merge commit '98c97994c5b90bdae02accb155eeceeb5224b8ef'

* commit '98c97994c5b90bdae02accb155eeceeb5224b8ef': h264: decouple extradata parsing from the decoder Main changes: - move get_avc_nalsize() inside h264_parser.c and make it use H264ParseContext instead of H264Context. This helps fixing fate-flv-demux. - Also use is_avc/nal_length_size from the H264ParseContext in various places instead of the H264Context one as that's the fields now filled by ff_h264_decode_extradata() - h264_parse: dont fail decode_extradata_ps() due to nal split failure. Change by Michael to fix decoding of h264/ref_10.avi. Merged-by: Clément Bœsch <u@pkh.me> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
9 years ago · 0bf5fd2e19
parent 8a135a55b3 98c97994c5
commit 0bf5fd2e19
5 changed files with 198 additions and 191 deletions
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@ -300,120 +300,6 @@ fail:
    return AVERROR(ENOMEM); // ff_h264_free_tables will clean up for us
 }

-static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
-                            int parse_extradata);
-
-/* There are (invalid) samples in the wild with mp4-style extradata, where the
- * parameter sets are stored unescaped (i.e. as RBSP).
- * This function catches the parameter set decoding failure and tries again
- * after escaping it */
-static int decode_extradata_ps_mp4(H264Context *h, const uint8_t *buf, int buf_size)
-{
-    int ret;
-
-    ret = decode_nal_units(h, buf, buf_size, 1);
-    if (ret < 0 && !(h->avctx->err_recognition & AV_EF_EXPLODE)) {
-        GetByteContext gbc;
-        PutByteContext pbc;
-        uint8_t *escaped_buf;
-        int escaped_buf_size;
-
-        av_log(h->avctx, AV_LOG_WARNING,
-               "SPS decoding failure, trying again after escaping the NAL\n");
-
-        if (buf_size / 2 >= (INT16_MAX - AV_INPUT_BUFFER_PADDING_SIZE) / 3)
-            return AVERROR(ERANGE);
-        escaped_buf_size = buf_size * 3 / 2 + AV_INPUT_BUFFER_PADDING_SIZE;
-        escaped_buf = av_mallocz(escaped_buf_size);
-        if (!escaped_buf)
-            return AVERROR(ENOMEM);
-
-        bytestream2_init(&gbc, buf, buf_size);
-        bytestream2_init_writer(&pbc, escaped_buf, escaped_buf_size);
-
-        while (bytestream2_get_bytes_left(&gbc)) {
-            if (bytestream2_get_bytes_left(&gbc) >= 3 &&
-                bytestream2_peek_be24(&gbc) <= 3) {
-                bytestream2_put_be24(&pbc, 3);
-                bytestream2_skip(&gbc, 2);
-            } else
-                bytestream2_put_byte(&pbc, bytestream2_get_byte(&gbc));
-        }
-
-        escaped_buf_size = bytestream2_tell_p(&pbc);
-        AV_WB16(escaped_buf, escaped_buf_size - 2);
-
-        ret = decode_nal_units(h, escaped_buf, escaped_buf_size, 1);
-        av_freep(&escaped_buf);
-        if (ret < 0)
-            return ret;
-    }
-
-    return 0;
-}
-
-int ff_h264_decode_extradata(H264Context *h, const uint8_t *buf, int size)
-{
-    AVCodecContext *avctx = h->avctx;
-    int ret;
-
-    if (!buf || size <= 0)
-        return -1;
-
-    if (buf[0] == 1) {
-        int i, cnt, nalsize;
-        const unsigned char *p = buf;
-
-        h->is_avc = 1;
-
-        if (size < 7) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "avcC %d too short\n", size);
-            return AVERROR_INVALIDDATA;
-        }
-        /* sps and pps in the avcC always have length coded with 2 bytes,
-         * so put a fake nal_length_size = 2 while parsing them */
-        h->nal_length_size = 2;
-        // Decode sps from avcC
-        cnt = *(p + 5) & 0x1f; // Number of sps
-        p  += 6;
-        for (i = 0; i < cnt; i++) {
-            nalsize = AV_RB16(p) + 2;
-            if(nalsize > size - (p-buf))
-                return AVERROR_INVALIDDATA;
-            ret = decode_extradata_ps_mp4(h, p, nalsize);
-            if (ret < 0) {
-                av_log(avctx, AV_LOG_ERROR,
-                       "Decoding sps %d from avcC failed\n", i);
-                return ret;
-            }
-            p += nalsize;
-        }
-        // Decode pps from avcC
-        cnt = *(p++); // Number of pps
-        for (i = 0; i < cnt; i++) {
-            nalsize = AV_RB16(p) + 2;
-            if(nalsize > size - (p-buf))
-                return AVERROR_INVALIDDATA;
-            ret = decode_extradata_ps_mp4(h, p, nalsize);
-            if (ret < 0) {
-                av_log(avctx, AV_LOG_ERROR,
-                       "Decoding pps %d from avcC failed\n", i);
-                return ret;
-            }
-            p += nalsize;
-        }
-        // Store right nal length size that will be used to parse all other nals
-        h->nal_length_size = (buf[4] & 0x03) + 1;
-    } else {
-        h->is_avc = 0;
-        ret = decode_nal_units(h, buf, size, 1);
-        if (ret < 0)
-            return ret;
-    }
-    return size;
-}
-
 static int h264_init_context(AVCodecContext *avctx, H264Context *h)
 {
    int i;
@ -503,7 +389,9 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx)
    }

    if (avctx->extradata_size > 0 && avctx->extradata) {
-        ret = ff_h264_decode_extradata(h, avctx->extradata, avctx->extradata_size);
+        ret = ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size,
+                                       &h->ps, &h->is_avc, &h->nal_length_size,
+                                       avctx->err_recognition, avctx);
        if (ret < 0) {
            h264_decode_end(avctx);
            return ret;
@ -992,8 +880,7 @@ static void debug_green_metadata(const H264SEIGreenMetaData *gm, void *logctx)
    }
 }

-static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
-                            int parse_extradata)
+static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size)
 {
    AVCodecContext *const avctx = h->avctx;
    unsigned context_count = 0;
@ -1025,8 +912,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
    if (ret < 0) {
        av_log(avctx, AV_LOG_ERROR,
               "Error splitting the input into NAL units.\n");
-        /* don't consider NAL parsing failure a fatal error when parsing extradata, as the stream may work without it */
-        return parse_extradata ? buf_size : ret;
+        return ret;
    }

    if (avctx->active_thread_type & FF_THREAD_FRAME)
@ -1044,25 +930,6 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
            continue;

 again:
-        /* Ignore per frame NAL unit type during extradata
-         * parsing. Decoding slices is not possible in codec init
-         * with frame-mt */
-        if (parse_extradata) {
-            switch (nal->type) {
-            case NAL_IDR_SLICE:
-            case NAL_SLICE:
-            case NAL_DPA:
-            case NAL_DPB:
-            case NAL_DPC:
-                av_log(h->avctx, AV_LOG_WARNING,
-                       "Ignoring NAL %d in global header/extradata\n",
-                       nal->type);
-                // fall through to next case
-            case NAL_AUXILIARY_SLICE:
-                nal->type = NAL_FF_IGNORE;
-            }
-        }
-
        // FIXME these should stop being context-global variables
        h->nal_ref_idc   = nal->ref_idc;
        h->nal_unit_type = nal->type;
@ -1440,14 +1307,18 @@ static int h264_decode_frame(AVCodecContext *avctx, void *data,
        int side_size;
        uint8_t *side = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size);
        if (is_extra(side, side_size))
-            ff_h264_decode_extradata(h, side, side_size);
+            ff_h264_decode_extradata(side, side_size,
+                                     &h->ps, &h->is_avc, &h->nal_length_size,
+                                     avctx->err_recognition, avctx);
    }
    if(h->is_avc && buf_size >= 9 && buf[0]==1 && buf[2]==0 && (buf[4]&0xFC)==0xFC && (buf[5]&0x1F) && buf[8]==0x67){
        if (is_extra(buf, buf_size))
-            return ff_h264_decode_extradata(h, buf, buf_size);
+            return ff_h264_decode_extradata(buf, buf_size,
+                                            &h->ps, &h->is_avc, &h->nal_length_size,
+                                            avctx->err_recognition, avctx);
    }

-    buf_index = decode_nal_units(h, buf, buf_size, 0);
+    buf_index = decode_nal_units(h, buf, buf_size);
    if (buf_index < 0)
        return AVERROR_INVALIDDATA;

--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@ -754,7 +754,6 @@ int ff_h264_decode_ref_pic_marking(H264Context *h, GetBitContext *gb,
 int ff_generate_sliding_window_mmcos(H264Context *h, int first_slice);

 void ff_h264_hl_decode_mb(const H264Context *h, H264SliceContext *sl);
-int ff_h264_decode_extradata(H264Context *h, const uint8_t *buf, int size);
 int ff_h264_decode_init(AVCodecContext *avctx);
 void ff_h264_decode_init_vlc(void);

@ -1000,26 +999,6 @@ static inline int find_start_code(const uint8_t *buf, int buf_size,
    return FFMIN(buf_index, buf_size);
 }

-static inline int get_avc_nalsize(H264Context *h, const uint8_t *buf,
-                           int buf_size, int *buf_index)
-{
-    int i, nalsize = 0;
-
-    if (*buf_index >= buf_size - h->nal_length_size) {
-        // the end of the buffer is reached, refill it.
-        return AVERROR(EAGAIN);
-    }
-
-    for (i = 0; i < h->nal_length_size; i++)
-        nalsize = ((unsigned)nalsize << 8) | buf[(*buf_index)++];
-    if (nalsize <= 0 || nalsize > buf_size - *buf_index) {
-        av_log(h->avctx, AV_LOG_ERROR,
-               "AVC: nal size %d\n", nalsize);
-        return AVERROR_INVALIDDATA;
-    }
-    return nalsize;
-}
-
 int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup);

 int ff_h264_ref_picture(H264Context *h, H264Picture *dst, H264Picture *src);
--- a/libavcodec/h264_parse.c
+++ b/libavcodec/h264_parse.c
@ -16,6 +16,7 @@
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

+#include "bytestream.h"
 #include "get_bits.h"
 #include "golomb.h"
 #include "h264.h"
@ -319,3 +320,151 @@ int ff_h264_init_poc(int pic_field_poc[2], int *pic_poc,

    return 0;
 }
+
+static int decode_extradata_ps(const uint8_t *data, int size, H264ParamSets *ps,
+                               int is_avc, void *logctx)
+{
+    H2645Packet pkt = { 0 };
+    int i, ret = 0;
+
+    ret = ff_h2645_packet_split(&pkt, data, size, logctx, is_avc, 2, AV_CODEC_ID_H264);
+    if (ret < 0) {
+        ret = 0;
+        goto fail;
+    }
+
+    for (i = 0; i < pkt.nb_nals; i++) {
+        H2645NAL *nal = &pkt.nals[i];
+        switch (nal->type) {
+        case NAL_SPS:
+            ret = ff_h264_decode_seq_parameter_set(&nal->gb, logctx, ps, 0);
+            if (ret < 0)
+                goto fail;
+            break;
+        case NAL_PPS:
+            ret = ff_h264_decode_picture_parameter_set(&nal->gb, logctx, ps,
+                                                       nal->size_bits);
+            if (ret < 0)
+                goto fail;
+            break;
+        default:
+            av_log(logctx, AV_LOG_VERBOSE, "Ignoring NAL type %d in extradata\n",
+                   nal->type);
+            break;
+        }
+    }
+
+fail:
+    ff_h2645_packet_uninit(&pkt);
+    return ret;
+}
+
+/* There are (invalid) samples in the wild with mp4-style extradata, where the
+ * parameter sets are stored unescaped (i.e. as RBSP).
+ * This function catches the parameter set decoding failure and tries again
+ * after escaping it */
+static int decode_extradata_ps_mp4(const uint8_t *buf, int buf_size, H264ParamSets *ps,
+                                   int err_recognition, void *logctx)
+{
+    int ret;
+
+    ret = decode_extradata_ps(buf, buf_size, ps, 1, logctx);
+    if (ret < 0 && !(err_recognition & AV_EF_EXPLODE)) {
+        GetByteContext gbc;
+        PutByteContext pbc;
+        uint8_t *escaped_buf;
+        int escaped_buf_size;
+
+        av_log(logctx, AV_LOG_WARNING,
+               "SPS decoding failure, trying again after escaping the NAL\n");
+
+        if (buf_size / 2 >= (INT16_MAX - AV_INPUT_BUFFER_PADDING_SIZE) / 3)
+            return AVERROR(ERANGE);
+        escaped_buf_size = buf_size * 3 / 2 + AV_INPUT_BUFFER_PADDING_SIZE;
+        escaped_buf = av_mallocz(escaped_buf_size);
+        if (!escaped_buf)
+            return AVERROR(ENOMEM);
+
+        bytestream2_init(&gbc, buf, buf_size);
+        bytestream2_init_writer(&pbc, escaped_buf, escaped_buf_size);
+
+        while (bytestream2_get_bytes_left(&gbc)) {
+            if (bytestream2_get_bytes_left(&gbc) >= 3 &&
+                bytestream2_peek_be24(&gbc) <= 3) {
+                bytestream2_put_be24(&pbc, 3);
+                bytestream2_skip(&gbc, 2);
+            } else
+                bytestream2_put_byte(&pbc, bytestream2_get_byte(&gbc));
+        }
+
+        escaped_buf_size = bytestream2_tell_p(&pbc);
+        AV_WB16(escaped_buf, escaped_buf_size - 2);
+
+        ret = decode_extradata_ps(escaped_buf, escaped_buf_size, ps, 1, logctx);
+        av_freep(&escaped_buf);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+int ff_h264_decode_extradata(const uint8_t *data, int size, H264ParamSets *ps,
+                             int *is_avc, int *nal_length_size,
+                             int err_recognition, void *logctx)
+{
+    int ret;
+
+    if (!data || size <= 0)
+        return -1;
+
+    if (data[0] == 1) {
+        int i, cnt, nalsize;
+        const uint8_t *p = data;
+
+        *is_avc = 1;
+
+        if (size < 7) {
+            av_log(logctx, AV_LOG_ERROR, "avcC %d too short\n", size);
+            return AVERROR_INVALIDDATA;
+        }
+
+        // Decode sps from avcC
+        cnt = *(p + 5) & 0x1f; // Number of sps
+        p  += 6;
+        for (i = 0; i < cnt; i++) {
+            nalsize = AV_RB16(p) + 2;
+            if (nalsize > size - (p - data))
+                return AVERROR_INVALIDDATA;
+            ret = decode_extradata_ps_mp4(p, nalsize, ps, err_recognition, logctx);
+            if (ret < 0) {
+                av_log(logctx, AV_LOG_ERROR,
+                       "Decoding sps %d from avcC failed\n", i);
+                return ret;
+            }
+            p += nalsize;
+        }
+        // Decode pps from avcC
+        cnt = *(p++); // Number of pps
+        for (i = 0; i < cnt; i++) {
+            nalsize = AV_RB16(p) + 2;
+            if (nalsize > size - (p - data))
+                return AVERROR_INVALIDDATA;
+            ret = decode_extradata_ps_mp4(p, nalsize, ps, err_recognition, logctx);
+            if (ret < 0) {
+                av_log(logctx, AV_LOG_ERROR,
+                       "Decoding pps %d from avcC failed\n", i);
+                return ret;
+            }
+            p += nalsize;
+        }
+        // Store right nal length size that will be used to parse all other nals
+        *nal_length_size = (data[4] & 0x03) + 1;
+    } else {
+        *is_avc = 0;
+        ret = decode_extradata_ps(data, size, ps, 0, logctx);
+        if (ret < 0)
+            return ret;
+    }
+    return size;
+}
--- a/libavcodec/h264_parse.h
+++ b/libavcodec/h264_parse.h
@ -54,6 +54,7 @@ typedef struct H264POCContext {

 struct SPS;
 struct PPS;
+struct H264ParamSets;

 int ff_h264_pred_weight_table(GetBitContext *gb, const struct SPS *sps,
                              const int *ref_count, int slice_type_nos,
@ -82,4 +83,8 @@ int ff_h264_init_poc(int pic_field_poc[2], int *pic_poc,
                     const struct SPS *sps, H264POCContext *poc,
                     int picture_structure, int nal_ref_idc);

+int ff_h264_decode_extradata(const uint8_t *data, int size, struct H264ParamSets *ps,
+                             int *is_avc, int *nal_length_size,
+                             int err_recognition, void *logctx);
+
 #endif /* AVCODEC_H264_PARSE_H */
--- a/libavcodec/h264_parser.c
+++ b/libavcodec/h264_parser.c
@ -52,6 +52,8 @@ typedef struct H264ParseContext {
    H264DSPContext h264dsp;
    H264POCContext poc;
    H264SEIContext sei;
+    int is_avc;
+    int nal_length_size;
    int got_first;
 } H264ParseContext;

@ -64,20 +66,20 @@ static int h264_find_frame_end(H264ParseContext *p, const uint8_t *buf,
    uint32_t state;
    ParseContext *pc = &p->pc;

-    int next_avc= h->is_avc ? 0 : buf_size;
+    int next_avc = p->is_avc ? 0 : buf_size;
 //    mb_addr= pc->mb_addr - 1;
    state = pc->state;
    if (state > 13)
        state = 7;

-    if (h->is_avc && !h->nal_length_size)
+    if (p->is_avc && !p->nal_length_size)
        av_log(h->avctx, AV_LOG_ERROR, "AVC-parser: nal length size invalid\n");

    for (i = 0; i < buf_size; i++) {
        if (i >= next_avc) {
            int nalsize = 0;
            i = next_avc;
-            for (j = 0; j < h->nal_length_size; j++)
+            for (j = 0; j < p->nal_length_size; j++)
                nalsize = (nalsize << 8) | buf[i++];
            if (nalsize <= 0 || nalsize > buf_size - i) {
                av_log(h->avctx, AV_LOG_ERROR, "AVC-parser: nal size %d remaining %d\n", nalsize, buf_size - i);
@ -132,14 +134,14 @@ static int h264_find_frame_end(H264ParseContext *p, const uint8_t *buf,
        }
    }
    pc->state = state;
-    if (h->is_avc)
+    if (p->is_avc)
        return next_avc;
    return END_NOT_FOUND;

 found:
    pc->state             = 7;
    pc->frame_start_found = 0;
-    if (h->is_avc)
+    if (p->is_avc)
        return next_avc;
    return i - (state & 5) - 5 * (state > 7);
 }
@ -222,6 +224,26 @@ static int scan_mmco_reset(AVCodecParserContext *s, GetBitContext *gb)
    return 0;
 }

+static inline int get_avc_nalsize(H264ParseContext *p, const uint8_t *buf,
+                                  int buf_size, int *buf_index, void *logctx)
+{
+    int i, nalsize = 0;
+
+    if (*buf_index >= buf_size - p->nal_length_size) {
+        // the end of the buffer is reached, refill it
+        return AVERROR(EAGAIN);
+    }
+
+    for (i = 0; i < p->nal_length_size; i++)
+        nalsize = ((unsigned)nalsize << 8) | buf[(*buf_index)++];
+    if (nalsize <= 0 || nalsize > buf_size - *buf_index) {
+        av_log(logctx, AV_LOG_ERROR,
+               "AVC: nal size %d\n", nalsize);
+        return AVERROR_INVALIDDATA;
+    }
+    return nalsize;
+}
+
 /**
 * Parse NAL units of found picture and decode some basic information.
 *
@ -258,13 +280,13 @@ static inline int parse_nal_units(AVCodecParserContext *s,
        return 0;

    buf_index     = 0;
-    next_avc      = h->is_avc ? 0 : buf_size;
+    next_avc      = p->is_avc ? 0 : buf_size;
    for (;;) {
        const SPS *sps;
        int src_length, consumed, nalsize = 0;

        if (buf_index >= next_avc) {
-            nalsize = get_avc_nalsize(h, buf, buf_size, &buf_index);
+            nalsize = get_avc_nalsize(p, buf, buf_size, &buf_index, avctx);
            if (nalsize < 0)
                break;
            next_avc = buf_index + nalsize;
@ -547,8 +569,6 @@ static int h264_parse(AVCodecParserContext *s,
    if (!p->got_first) {
        p->got_first = 1;
        if (avctx->extradata_size) {
-            int i;
-
            h->avctx = avctx;
            // must be done like in decoder, otherwise opening the parser,
            // letting it create extradata and then closing and opening again
@ -556,26 +576,9 @@ static int h264_parse(AVCodecParserContext *s,
            // Note that estimate_timings_from_pts does exactly this.
            if (!avctx->has_b_frames)
                h->low_delay = 1;
-            ff_h264_decode_extradata(h, avctx->extradata, avctx->extradata_size);
-
-            for (i = 0; i < FF_ARRAY_ELEMS(p->ps.sps_list); i++) {
-                av_buffer_unref(&p->ps.sps_list[i]);
-                if (h->ps.sps_list[i]) {
-                    p->ps.sps_list[i] = av_buffer_ref(h->ps.sps_list[i]);
-                    if (!p->ps.sps_list[i])
-                        return AVERROR(ENOMEM);
-                }
-            }
-            for (i = 0; i < FF_ARRAY_ELEMS(p->ps.pps_list); i++) {
-                av_buffer_unref(&p->ps.pps_list[i]);
-                if (h->ps.pps_list[i]) {
-                    p->ps.pps_list[i] = av_buffer_ref(h->ps.pps_list[i]);
-                    if (!p->ps.pps_list[i])
-                        return AVERROR(ENOMEM);
-                }
-            }
-
-            p->ps.sps = h->ps.sps;
+            ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size,
+                                     &p->ps, &p->is_avc, &p->nal_length_size,
+                                     avctx->err_recognition, avctx);
        }
    }