Merge pull request #15290 from cudawarped:ffmpeg_raw_retrieve

Add retrieve encoded frame to VideoCapture * Add capacity to retrieve the encoded frame from a VideoCapture object. * Correct raw codec and pixle format output from ffmpeg capture. * Remove warnings from build. * Added VideoCaptureRaw subclass. * Include abstract base class VideoCaptureBase and rename new subclass VideoContainer as suggested by mshabunin. * Remove using. * Change base class name for compatibility with jave bindings generator. * Move grab and retrieve and add override specifier * Add setRaw and readRaw to IVideoCapture interface -setRaw to disable video decoding and enable bitstream filters from mp4 to h254 and h265. -readRaw to return the raw undecoded/filtered bitstream. Add createRawCapture to initiate a backend with setRaw enabled. Remove inheritance and use an independant VideoContainer subclass with IVideoCapture member. * Address unused parameter warings. Remove VideoContainer from python bindings as it no longer returns a Mat. Use opencv type uchar instead of unsigned char. Add missing destructor to VideoContainer class. * Address build warnings and include all params in documentation. * Include deprecated bitstream filtering API. * Update codec_id query to work with older ffmpeg api's. Change api version defines to be consistent - most recent api version first. * Fix typo. * Update test to work with naming of new files in the extra repo * Investigate test failure * Check bytes read by ffmpeg * Removed mp4 video container test * Applied suggested changes. * videoio: rework API for extraction of RAW video streams - FFmpeg only * address review comments
6 years ago · 0867e3188d
parent af230ec133
commit 0867e3188d
3 changed files with 273 additions and 6 deletions
--- a/modules/videoio/include/opencv2/videoio.hpp
+++ b/modules/videoio/include/opencv2/videoio.hpp
@ -137,7 +137,8 @@ enum VideoCaptureProperties {
       CAP_PROP_FPS            =5, //!< Frame rate.
       CAP_PROP_FOURCC         =6, //!< 4-character code of codec. see VideoWriter::fourcc .
       CAP_PROP_FRAME_COUNT    =7, //!< Number of frames in the video file.
-       CAP_PROP_FORMAT         =8, //!< Format of the %Mat objects returned by VideoCapture::retrieve().
+       CAP_PROP_FORMAT         =8, //!< Format of the %Mat objects (see Mat::type()) returned by VideoCapture::retrieve().
+                                   //!< Set value -1 to fetch undecoded RAW video streams (as Mat 8UC1).
       CAP_PROP_MODE           =9, //!< Backend-specific value indicating the current capture mode.
       CAP_PROP_BRIGHTNESS    =10, //!< Brightness of the image (only for those cameras that support).
       CAP_PROP_CONTRAST      =11, //!< Contrast of the image (only for cameras).
@ -174,6 +175,7 @@ enum VideoCaptureProperties {
       CAP_PROP_CHANNEL       =43, //!< Video input or Channel Number (only for those cameras that support)
       CAP_PROP_AUTO_WB       =44, //!< enable/ disable auto white-balance
       CAP_PROP_WB_TEMPERATURE=45, //!< white-balance color temperature
+       CAP_PROP_CODEC_PIXEL_FORMAT =46,    //!< (read-only) codec's pixel format. 4-character code - see VideoWriter::fourcc . Subset of [AV_PIX_FMT_*](https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/raw.c) or -1 if unknown
 #ifndef CV_DOXYGEN
       CV__CAP_PROP_LATEST
 #endif
--- a/modules/videoio/src/cap_ffmpeg_impl.hpp
+++ b/modules/videoio/src/cap_ffmpeg_impl.hpp
@ -531,6 +531,17 @@ struct CvCapture_FFMPEG
 #if USE_AV_INTERRUPT_CALLBACK
    AVInterruptCallbackMetadata interrupt_metadata;
 #endif
+
+    bool setRaw();
+    bool processRawPacket();
+    bool rawMode;
+    bool rawModeInitialized;
+    AVPacket packet_filtered;
+#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
+    AVBSFContext* bsfc;
+ #else
+    AVBitStreamFilterContext* bsfc;
+#endif
 };

 void CvCapture_FFMPEG::init()
@ -555,6 +566,12 @@ void CvCapture_FFMPEG::init()
 #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(52, 111, 0)
    dict = NULL;
 #endif
+
+    rawMode = false;
+    rawModeInitialized = false;
+    memset(&packet_filtered, 0, sizeof(packet_filtered));
+    av_init_packet(&packet_filtered);
+    bsfc = NULL;
 }


@ -623,6 +640,21 @@ void CvCapture_FFMPEG::close()
       av_dict_free(&dict);
 #endif

+    if (packet_filtered.data)
+    {
+        _opencv_ffmpeg_av_packet_unref(&packet_filtered);
+        packet_filtered.data = NULL;
+    }
+
+    if (bsfc)
+    {
+#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
+        av_bsf_free(&bsfc);
+#else
+        av_bitstream_filter_close(bsfc);
+#endif
+    }
+
    init();
 }

@ -997,6 +1029,116 @@ exit_func:
    return valid;
 }

+bool CvCapture_FFMPEG::setRaw()
+{
+    if (!rawMode)
+    {
+        if (frame_number != 0)
+        {
+            CV_WARN("Incorrect usage: do not grab frames before .set(CAP_PROP_FORMAT, -1)");
+        }
+        // binary stream filter creation is moved into processRawPacket()
+        rawMode = true;
+    }
+    return true;
+}
+
+bool CvCapture_FFMPEG::processRawPacket()
+{
+    if (packet.data == NULL)  // EOF
+        return false;
+    if (!rawModeInitialized)
+    {
+        rawModeInitialized = true;
+#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
+        AVCodecID eVideoCodec = ic->streams[video_stream]->codecpar->codec_id;
+#elif LIBAVFORMAT_BUILD > 4628
+        AVCodecID eVideoCodec = video_st->codec->codec_id;
+#else
+        AVCodecID eVideoCodec = video_st->codec.codec_id;
+#endif
+        const char* filterName = NULL;
+        if (eVideoCodec == CV_CODEC(CODEC_ID_H264) || eVideoCodec == CV_CODEC(CODEC_ID_H265))
+        {
+            // check start code prefixed mode (as defined in the Annex B H.264 / H.265 specification)
+            if (packet.size >= 5
+                 && !(packet.data[0] == 0 && packet.data[1] == 0 && packet.data[2] == 0 && packet.data[3] == 1)
+                 && !(packet.data[0] == 0 && packet.data[1] == 0 && packet.data[2] == 1)
+            )
+            {
+                filterName = eVideoCodec == CV_CODEC(CODEC_ID_H264) ? "h264_mp4toannexb" : "hevc_mp4toannexb";
+            }
+        }
+        if (filterName)
+        {
+#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
+            const AVBitStreamFilter * bsf = av_bsf_get_by_name(filterName);
+            if (!bsf)
+            {
+                CV_WARN(cv::format("Bitstream filter is not available: %s", filterName).c_str());
+                return false;
+            }
+            int err = av_bsf_alloc(bsf, &bsfc);
+            if (err < 0)
+            {
+                CV_WARN("Error allocating context for bitstream buffer");
+                return false;
+            }
+            avcodec_parameters_copy(bsfc->par_in, ic->streams[video_stream]->codecpar);
+            err = av_bsf_init(bsfc);
+            if (err < 0)
+            {
+                CV_WARN("Error initializing bitstream buffer");
+                return false;
+            }
+#else
+            bsfc = av_bitstream_filter_init(filterName);
+            if (!bsfc)
+            {
+                CV_WARN(cv::format("Bitstream filter is not available: %s", filterName).c_str());
+                return false;
+            }
+#endif
+        }
+    }
+    if (bsfc)
+    {
+        if (packet_filtered.data)
+        {
+            av_packet_unref(&packet_filtered);
+        }
+
+#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
+        int err = av_bsf_send_packet(bsfc, &packet);
+        if (err < 0)
+        {
+            CV_WARN("Packet submission for filtering failed");
+            return false;
+        }
+        err = av_bsf_receive_packet(bsfc, &packet_filtered);
+        if (err < 0)
+        {
+            CV_WARN("Filtered packet retrieve failed");
+            return false;
+        }
+#else
+#if LIBAVFORMAT_BUILD > 4628
+        AVCodecContext* ctx = ic->streams[video_stream]->codec;
+#else
+        AVCodecContext* ctx = &ic->streams[video_stream]->codec;
+#endif
+        int err = av_bitstream_filter_filter(bsfc, ctx, NULL, &packet_filtered.data,
+            &packet_filtered.size, packet.data, packet.size, packet_filtered.flags & AV_PKT_FLAG_KEY);
+        if (err < 0)
+        {
+            CV_WARN("Packet filtering failed");
+            return false;
+        }
+#endif
+        return packet_filtered.data != NULL;
+    }
+    return packet.data != NULL;
+}

 bool CvCapture_FFMPEG::grabFrame()
 {
@ -1048,6 +1190,12 @@ bool CvCapture_FFMPEG::grabFrame()
            continue;
        }

+        if (rawMode)
+        {
+            valid = processRawPacket();
+            break;
+        }
+
        // Decode video frame
        #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 2, 0)
            avcodec_decode_video2(video_st->codec, picture, &got_picture, &packet);
@ -1068,7 +1216,6 @@ bool CvCapture_FFMPEG::grabFrame()
            if( picture_pts == AV_NOPTS_VALUE_ )
                picture_pts = picture->pkt_pts != AV_NOPTS_VALUE_ && picture->pkt_pts != 0 ? picture->pkt_pts : picture->pkt_dts;

-            frame_number++;
            valid = true;
        }
        else
@ -1079,7 +1226,10 @@ bool CvCapture_FFMPEG::grabFrame()
        }
    }

-    if( valid && first_frame_number < 0 )
+    if (valid)
+        frame_number++;
+
+    if (!rawMode && valid && first_frame_number < 0)
        first_frame_number = dts_to_frame_number(picture_pts);

 #if USE_AV_INTERRUPT_CALLBACK
@ -1087,14 +1237,28 @@ bool CvCapture_FFMPEG::grabFrame()
    interrupt_metadata.timeout_after_ms = 0;
 #endif

-    // return if we have a new picture or not
+    // return if we have a new frame or not
    return valid;
 }


 bool CvCapture_FFMPEG::retrieveFrame(int, unsigned char** data, int* step, int* width, int* height, int* cn)
 {
-    if( !video_st || !picture->data[0] )
+    if (!video_st)
+        return false;
+
+    if (rawMode)
+    {
+        AVPacket& p = bsfc ? packet_filtered : packet;
+        *data = p.data;
+        *step = p.size;
+        *width = p.size;
+        *height = 1;
+        *cn = 1;
+        return p.data != NULL;
+    }
+
+    if (!picture->data[0])
        return false;

    if( img_convert_ctx == NULL ||
@ -1163,7 +1327,6 @@ bool CvCapture_FFMPEG::retrieveFrame(int, unsigned char** data, int* step, int*
    return true;
 }

-
 double CvCapture_FFMPEG::getProperty( int property_id ) const
 {
    if( !video_st ) return 0;
@ -1217,6 +1380,20 @@ double CvCapture_FFMPEG::getProperty( int property_id ) const
        return _opencv_ffmpeg_get_sample_aspect_ratio(ic->streams[video_stream]).num;
    case CAP_PROP_SAR_DEN:
        return _opencv_ffmpeg_get_sample_aspect_ratio(ic->streams[video_stream]).den;
+    case CAP_PROP_CODEC_PIXEL_FORMAT:
+    {
+#if LIBAVFORMAT_BUILD > 4628
+        AVPixelFormat pix_fmt = video_st->codec->pix_fmt;
+#else
+        AVPixelFormat pix_fmt = video_st->codec.pix_fmt;
+#endif
+        unsigned int fourcc_tag = avcodec_pix_fmt_to_codec_tag(pix_fmt);
+        return (fourcc_tag == 0) ? (double)-1 : (double)fourcc_tag;
+    }
+    case CAP_PROP_FORMAT:
+        if (rawMode)
+            return -1;
+        break;
    default:
        break;
    }
@ -1386,6 +1563,10 @@ bool CvCapture_FFMPEG::setProperty( int property_id, double value )
            picture_pts=(int64_t)value;
        }
        break;
+    case CAP_PROP_FORMAT:
+        if (value == -1)
+            return setRaw();
+        return false;
    default:
        return false;
    }
--- a/modules/videoio/test/test_ffmpeg.cpp
+++ b/modules/videoio/test/test_ffmpeg.cpp
@ -95,6 +95,90 @@ TEST(videoio_ffmpeg, image)

 //==========================================================================

+typedef tuple<VideoCaptureAPIs, string, string, string, string, string> videoio_container_params_t;
+typedef testing::TestWithParam< videoio_container_params_t > videoio_container;
+
+TEST_P(videoio_container, read)
+{
+    const VideoCaptureAPIs api = get<0>(GetParam());
+
+    if (!videoio_registry::hasBackend(api))
+        throw SkipTestException("Backend was not found");
+
+    const string path = get<1>(GetParam());
+    const string ext = get<2>(GetParam());
+    const string ext_raw = get<3>(GetParam());
+    const string codec = get<4>(GetParam());
+    const string pixelFormat = get<5>(GetParam());
+    const string fileName = path + "." + ext;
+    const string fileNameOut = tempfile(cv::format("test_container_stream.%s", ext_raw.c_str()).c_str());
+
+    // Write encoded video read using VideoContainer to tmp file
+    {
+        VideoCapture container(findDataFile(fileName), api);
+        ASSERT_TRUE(container.isOpened());
+        if (!container.set(CAP_PROP_FORMAT, -1))  // turn off video decoder (extract stream)
+            throw SkipTestException("Fetching of RAW video streams is not supported");
+        ASSERT_EQ(-1.f, container.get(CAP_PROP_FORMAT));  // check
+        EXPECT_EQ(codec, fourccToString((int)container.get(CAP_PROP_FOURCC)));
+        EXPECT_EQ(pixelFormat, fourccToString((int)container.get(CAP_PROP_CODEC_PIXEL_FORMAT)));
+
+        std::ofstream file(fileNameOut, ios::out | ios::trunc | std::ios::binary);
+        size_t totalBytes = 0;
+        Mat raw_data;
+        while (true)
+        {
+            container >> raw_data;
+            size_t size = raw_data.total();
+            if (raw_data.empty())
+                break;
+            ASSERT_EQ(CV_8UC1, raw_data.type());
+            ASSERT_LE(raw_data.dims, 2);
+            ASSERT_EQ(raw_data.rows, 1);
+            ASSERT_EQ((size_t)raw_data.cols, raw_data.total());
+            ASSERT_TRUE(raw_data.isContinuous());
+            totalBytes += size;
+            file.write(reinterpret_cast<char*>(raw_data.data), size);
+            ASSERT_FALSE(file.fail());
+        }
+        ASSERT_GE(totalBytes, (size_t)65536) << "Encoded stream is too small";
+    }
+
+    std::cout << "Checking extracted video stream: " << fileNameOut << std::endl;
+
+    // Check decoded frames read from original media are equal to frames decoded from tmp file
+    {
+        VideoCapture capReference(findDataFile(fileName), api);
+        ASSERT_TRUE(capReference.isOpened());
+        VideoCapture capActual(fileNameOut.c_str(), api);
+        ASSERT_TRUE(capActual.isOpened());
+        Mat reference, actual;
+        int nframes = 0, n_err = 0;
+        while (capReference.read(reference) && n_err < 3)
+        {
+            nframes++;
+            ASSERT_TRUE(capActual.read(actual)) << nframes;
+            EXPECT_EQ(0, cvtest::norm(actual, reference, NORM_INF)) << nframes << " err=" << ++n_err;
+        }
+        ASSERT_GT(nframes, 0);
+    }
+
+    ASSERT_EQ(0, remove(fileNameOut.c_str()));
+}
+
+const videoio_container_params_t videoio_container_params[] =
+{
+    make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h264", "h264", "h264", "I420"),
+    make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h265", "h265", "hevc", "I420"),
+    //make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h264.mkv", "mkv.h264", "h264", "I420"),
+    //make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h265.mkv", "mkv.h265", "hevc", "I420"),
+    //make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h264.mp4", "mp4.avc1", "avc1", "I420"),
+    //make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h265.mp4", "mp4.hev1", "hev1", "I420"),
+};
+
+INSTANTIATE_TEST_CASE_P(/**/, videoio_container, testing::ValuesIn(videoio_container_params));
+
+//==========================================================================

 static void generateFrame(Mat &frame, unsigned int i, const Point &center, const Scalar &color)
 {