From e4ad7e3778d0be868568085f0a1fc6b0d84d9cfe Mon Sep 17 00:00:00 2001 From: cudawarped <12133430+cudawarped@users.noreply.github.com> Date: Wed, 19 Jul 2023 07:59:05 +0300 Subject: [PATCH] VideoCapture: remove decoder initialization when CAP_PROP_FORMAT== -1 (rawMode == true) --- modules/videoio/include/opencv2/videoio.hpp | 2 +- modules/videoio/src/cap_ffmpeg_impl.hpp | 105 +++++++++++++------- modules/videoio/test/test_ffmpeg.cpp | 30 ++++++ 3 files changed, 102 insertions(+), 35 deletions(-) diff --git a/modules/videoio/include/opencv2/videoio.hpp b/modules/videoio/include/opencv2/videoio.hpp index dbed243b56..f40afbb4a2 100644 --- a/modules/videoio/include/opencv2/videoio.hpp +++ b/modules/videoio/include/opencv2/videoio.hpp @@ -140,7 +140,7 @@ enum VideoCaptureAPIs { */ enum VideoCaptureProperties { CAP_PROP_POS_MSEC =0, //!< Current position of the video file in milliseconds. - CAP_PROP_POS_FRAMES =1, //!< 0-based index of the frame to be decoded/captured next. + CAP_PROP_POS_FRAMES =1, //!< 0-based index of the frame to be decoded/captured next. When the index i is set in RAW mode (CAP_PROP_FORMAT == -1) this will seek to the key frame k, where k <= i. CAP_PROP_POS_AVI_RATIO =2, //!< Relative position of the video file: 0=start of the film, 1=end of the film. CAP_PROP_FRAME_WIDTH =3, //!< Width of the frames in the video stream. CAP_PROP_FRAME_HEIGHT =4, //!< Height of the frames in the video stream. diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp index 982bc5c87d..e4431b323e 100644 --- a/modules/videoio/src/cap_ffmpeg_impl.hpp +++ b/modules/videoio/src/cap_ffmpeg_impl.hpp @@ -580,6 +580,7 @@ struct CvCapture_FFMPEG bool processRawPacket(); bool rawMode; bool rawModeInitialized; + bool rawSeek; bool convertRGB; AVPacket packet_filtered; #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100) @@ -633,6 +634,7 @@ void CvCapture_FFMPEG::init() rawMode = false; rawModeInitialized = false; + rawSeek = false; convertRGB = true; memset(&packet_filtered, 0, sizeof(packet_filtered)); av_init_packet(&packet_filtered); @@ -1051,33 +1053,35 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& return false; } } - if (params.has(CAP_PROP_HW_ACCELERATION)) - { - va_type = params.get(CAP_PROP_HW_ACCELERATION); -#if !USE_AV_HW_CODECS - if (va_type != VIDEO_ACCELERATION_NONE && va_type != VIDEO_ACCELERATION_ANY) + if(!rawMode) { + if (params.has(CAP_PROP_HW_ACCELERATION)) { - CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: FFmpeg backend is build without acceleration support. Can't handle CAP_PROP_HW_ACCELERATION parameter. Bailout"); - return false; - } + va_type = params.get(CAP_PROP_HW_ACCELERATION); +#if !USE_AV_HW_CODECS + if (va_type != VIDEO_ACCELERATION_NONE && va_type != VIDEO_ACCELERATION_ANY) + { + CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: FFmpeg backend is build without acceleration support. Can't handle CAP_PROP_HW_ACCELERATION parameter. Bailout"); + return false; + } #endif - } - if (params.has(CAP_PROP_HW_DEVICE)) - { - hw_device = params.get(CAP_PROP_HW_DEVICE); - if (va_type == VIDEO_ACCELERATION_NONE && hw_device != -1) - { - CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: Invalid usage of CAP_PROP_HW_DEVICE without requested H/W acceleration. Bailout"); - return false; } - if (va_type == VIDEO_ACCELERATION_ANY && hw_device != -1) + if (params.has(CAP_PROP_HW_DEVICE)) { - CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: Invalid usage of CAP_PROP_HW_DEVICE with 'ANY' H/W acceleration. Bailout"); - return false; + hw_device = params.get(CAP_PROP_HW_DEVICE); + if (va_type == VIDEO_ACCELERATION_NONE && hw_device != -1) + { + CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: Invalid usage of CAP_PROP_HW_DEVICE without requested H/W acceleration. Bailout"); + return false; + } + if (va_type == VIDEO_ACCELERATION_ANY && hw_device != -1) + { + CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: Invalid usage of CAP_PROP_HW_DEVICE with 'ANY' H/W acceleration. Bailout"); + return false; + } + } + if (params.has(CAP_PROP_HW_ACCELERATION_USE_OPENCL)) { + use_opencl = params.get(CAP_PROP_HW_ACCELERATION_USE_OPENCL); } - } - if (params.has(CAP_PROP_HW_ACCELERATION_USE_OPENCL)) { - use_opencl = params.get(CAP_PROP_HW_ACCELERATION_USE_OPENCL); } #if USE_AV_INTERRUPT_CALLBACK if (params.has(CAP_PROP_OPEN_TIMEOUT_MSEC)) @@ -1153,6 +1157,23 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& CV_LOG_WARNING(NULL, "Unable to read codec parameters from stream (" << _opencv_ffmpeg_get_error_string(err) << ")"); goto exit_func; } + + if (rawMode) { + video_stream = av_find_best_stream(ic, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0); + if (video_stream < 0) { + close(); + return false; + } + video_st = ic->streams[video_stream]; +#ifndef CV_FFMPEG_CODECPAR + frame.height = video_st->codec->height; + frame.width = video_st->codec->width; +#else + frame.height = video_st->codecpar->height; + frame.width = video_st->codecpar->width; +#endif + return true; + } for(i = 0; i < ic->nb_streams; i++) { #ifndef CV_FFMPEG_CODECPAR @@ -1440,6 +1461,10 @@ bool CvCapture_FFMPEG::processRawPacket() bool CvCapture_FFMPEG::grabFrame() { + if (rawSeek) { + rawSeek = false; + return true; + } bool valid = false; static const size_t max_read_attempts = cv::utils::getConfigurationParameterSizeT("OPENCV_FFMPEG_READ_ATTEMPTS", 4096); @@ -1447,7 +1472,7 @@ bool CvCapture_FFMPEG::grabFrame() size_t cur_read_attempts = 0; size_t cur_decode_attempts = 0; - if( !ic || !video_st || !context ) return false; + if( !ic || !video_st || (!rawMode && !context) ) return false; if( ic->streams[video_stream]->nb_frames > 0 && frame_number > ic->streams[video_stream]->nb_frames ) @@ -1464,7 +1489,7 @@ bool CvCapture_FFMPEG::grabFrame() #if USE_AV_SEND_FRAME_API // check if we can receive frame from previously decoded packet - valid = avcodec_receive_frame(context, picture) >= 0; + valid = rawMode ? false : avcodec_receive_frame(context, picture) >= 0; #endif // get the next frame @@ -1548,12 +1573,16 @@ bool CvCapture_FFMPEG::grabFrame() } if (valid) { - if( picture_pts == AV_NOPTS_VALUE_ ) - picture_pts = picture->CV_FFMPEG_PTS_FIELD != AV_NOPTS_VALUE_ && picture->CV_FFMPEG_PTS_FIELD != 0 ? picture->CV_FFMPEG_PTS_FIELD : picture->pkt_dts; - frame_number++; + if (picture_pts == AV_NOPTS_VALUE_) { + if (!rawMode) + picture_pts = picture->CV_FFMPEG_PTS_FIELD != AV_NOPTS_VALUE_ && picture->CV_FFMPEG_PTS_FIELD != 0 ? picture->CV_FFMPEG_PTS_FIELD : picture->pkt_dts; + else + picture_pts = packet.pts != AV_NOPTS_VALUE_ && packet.pts != 0 ? packet.pts : packet.dts; + frame_number++; + } } - if (!rawMode && valid && first_frame_number < 0) + if (valid && first_frame_number < 0) first_frame_number = dts_to_frame_number(picture_pts); #if USE_AV_INTERRUPT_CALLBACK @@ -1567,7 +1596,7 @@ bool CvCapture_FFMPEG::grabFrame() bool CvCapture_FFMPEG::retrieveFrame(int flag, unsigned char** data, int* step, int* width, int* height, int* cn, int* depth) { - if (!video_st || !context) + if (!video_st || (!rawMode && !context)) return false; if (rawMode || flag == extraDataIdx) @@ -1735,7 +1764,7 @@ static inline double getCodecIdFourcc(const AVCodecID codec_id) double CvCapture_FFMPEG::getProperty( int property_id ) const { - if( !video_st || !context ) return 0; + if( !video_st || (!rawMode && !context) ) return 0; switch( property_id ) { @@ -1814,7 +1843,8 @@ double CvCapture_FFMPEG::getProperty( int property_id ) const //ic->start_time_realtime is in microseconds return ((double)ic->start_time_realtime); case CAP_PROP_N_THREADS: - return static_cast(context->thread_count); + if (!rawMode) + return static_cast(context->thread_count); default: break; } @@ -1910,9 +1940,11 @@ void CvCapture_FFMPEG::get_rotation_angle() void CvCapture_FFMPEG::seek(int64_t _frame_number) { - CV_Assert(context); + if (!rawMode) { + CV_Assert(context); + } _frame_number = std::min(_frame_number, get_total_frames()); - int delta = 16; + int delta = !rawMode ? 16 : 0; // if we have not grabbed a single frame before first seek, let's read the first frame // and get some valuable information during the process @@ -1927,7 +1959,8 @@ void CvCapture_FFMPEG::seek(int64_t _frame_number) double time_base = r2d(ic->streams[video_stream]->time_base); time_stamp += (int64_t)(sec / time_base + 0.5); if (get_total_frames() > 1) av_seek_frame(ic, video_stream, time_stamp, AVSEEK_FLAG_BACKWARD); - avcodec_flush_buffers(context); + if(!rawMode) + avcodec_flush_buffers(context); if( _frame_number > 0 ) { grabFrame(); @@ -1935,6 +1968,10 @@ void CvCapture_FFMPEG::seek(int64_t _frame_number) if( _frame_number > 1 ) { frame_number = dts_to_frame_number(picture_pts) - first_frame_number; + if (rawMode) { + rawSeek = true; + break; + } //printf("_frame_number = %d, frame_number = %d, delta = %d\n", // (int)_frame_number, (int)frame_number, delta); diff --git a/modules/videoio/test/test_ffmpeg.cpp b/modules/videoio/test/test_ffmpeg.cpp index 35d425d5c1..0496b8c369 100644 --- a/modules/videoio/test/test_ffmpeg.cpp +++ b/modules/videoio/test/test_ffmpeg.cpp @@ -476,6 +476,16 @@ static void ffmpeg_check_read_raw(VideoCapture& cap) EXPECT_EQ(CV_8UC1, data.type()) << "CV_8UC1 != " << typeToString(data.type()); EXPECT_TRUE(data.rows == 1 || data.cols == 1) << data.size; EXPECT_EQ((size_t)37118, data.total()); + +#ifndef WIN32 + // 12 is the nearset key frame to frame 18 + EXPECT_TRUE(cap.set(CAP_PROP_POS_FRAMES, 18.)); + EXPECT_EQ(cap.get(CAP_PROP_POS_FRAMES), 12.); + cap >> data; + EXPECT_EQ(CV_8UC1, data.type()) << "CV_8UC1 != " << typeToString(data.type()); + EXPECT_TRUE(data.rows == 1 || data.cols == 1) << data.size; + EXPECT_EQ((size_t)8726, data.total()); +#endif } TEST(videoio_ffmpeg, ffmpeg_check_extra_data) @@ -506,6 +516,16 @@ TEST(videoio_ffmpeg, open_with_property) CAP_PROP_FORMAT, -1 // demux only })); + // confirm properties are returned without initializing AVCodecContext + EXPECT_EQ(cap.get(CAP_PROP_FORMAT), -1); + EXPECT_EQ(static_cast(cap.get(CAP_PROP_FOURCC)), fourccFromString("FMP4")); +#ifndef WIN32 + EXPECT_EQ(cap.get(CAP_PROP_N_THREADS), 0.0); +#endif + EXPECT_EQ(cap.get(CAP_PROP_FRAME_HEIGHT), 384.0); + EXPECT_EQ(cap.get(CAP_PROP_FRAME_WIDTH), 672.0); + EXPECT_EQ(cap.get(CAP_PROP_FRAME_COUNT), 125); + EXPECT_EQ(cap.get(CAP_PROP_FPS), 24.0); ffmpeg_check_read_raw(cap); } @@ -519,6 +539,16 @@ TEST(videoio_ffmpeg, create_with_property) CAP_PROP_FORMAT, -1 // demux only }); + // confirm properties are returned without initializing AVCodecContext + EXPECT_TRUE(cap.get(CAP_PROP_FORMAT) == -1); + EXPECT_EQ(static_cast(cap.get(CAP_PROP_FOURCC)), fourccFromString("FMP4")); +#ifndef WIN32 + EXPECT_EQ(cap.get(CAP_PROP_N_THREADS), 0.0); +#endif + EXPECT_EQ(cap.get(CAP_PROP_FRAME_HEIGHT), 384.0); + EXPECT_EQ(cap.get(CAP_PROP_FRAME_WIDTH), 672.0); + EXPECT_EQ(cap.get(CAP_PROP_FRAME_COUNT), 125); + EXPECT_EQ(cap.get(CAP_PROP_FPS), 24.0); ffmpeg_check_read_raw(cap); }