diff --git a/modules/videoio/include/opencv2/videoio.hpp b/modules/videoio/include/opencv2/videoio.hpp index 890e854714..999643d241 100644 --- a/modules/videoio/include/opencv2/videoio.hpp +++ b/modules/videoio/include/opencv2/videoio.hpp @@ -137,7 +137,8 @@ enum VideoCaptureProperties { CAP_PROP_FPS =5, //!< Frame rate. CAP_PROP_FOURCC =6, //!< 4-character code of codec. see VideoWriter::fourcc . CAP_PROP_FRAME_COUNT =7, //!< Number of frames in the video file. - CAP_PROP_FORMAT =8, //!< Format of the %Mat objects returned by VideoCapture::retrieve(). + CAP_PROP_FORMAT =8, //!< Format of the %Mat objects (see Mat::type()) returned by VideoCapture::retrieve(). + //!< Set value -1 to fetch undecoded RAW video streams (as Mat 8UC1). CAP_PROP_MODE =9, //!< Backend-specific value indicating the current capture mode. CAP_PROP_BRIGHTNESS =10, //!< Brightness of the image (only for those cameras that support). CAP_PROP_CONTRAST =11, //!< Contrast of the image (only for cameras). @@ -174,6 +175,7 @@ enum VideoCaptureProperties { CAP_PROP_CHANNEL =43, //!< Video input or Channel Number (only for those cameras that support) CAP_PROP_AUTO_WB =44, //!< enable/ disable auto white-balance CAP_PROP_WB_TEMPERATURE=45, //!< white-balance color temperature + CAP_PROP_CODEC_PIXEL_FORMAT =46, //!< (read-only) codec's pixel format. 4-character code - see VideoWriter::fourcc . Subset of [AV_PIX_FMT_*](https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/raw.c) or -1 if unknown #ifndef CV_DOXYGEN CV__CAP_PROP_LATEST #endif diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp index c6d2efd665..1101c72311 100644 --- a/modules/videoio/src/cap_ffmpeg_impl.hpp +++ b/modules/videoio/src/cap_ffmpeg_impl.hpp @@ -531,6 +531,17 @@ struct CvCapture_FFMPEG #if USE_AV_INTERRUPT_CALLBACK AVInterruptCallbackMetadata interrupt_metadata; #endif + + bool setRaw(); + bool processRawPacket(); + bool rawMode; + bool rawModeInitialized; + AVPacket packet_filtered; +#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100) + AVBSFContext* bsfc; + #else + AVBitStreamFilterContext* bsfc; +#endif }; void CvCapture_FFMPEG::init() @@ -555,6 +566,12 @@ void CvCapture_FFMPEG::init() #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(52, 111, 0) dict = NULL; #endif + + rawMode = false; + rawModeInitialized = false; + memset(&packet_filtered, 0, sizeof(packet_filtered)); + av_init_packet(&packet_filtered); + bsfc = NULL; } @@ -623,6 +640,21 @@ void CvCapture_FFMPEG::close() av_dict_free(&dict); #endif + if (packet_filtered.data) + { + _opencv_ffmpeg_av_packet_unref(&packet_filtered); + packet_filtered.data = NULL; + } + + if (bsfc) + { +#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100) + av_bsf_free(&bsfc); +#else + av_bitstream_filter_close(bsfc); +#endif + } + init(); } @@ -997,6 +1029,116 @@ exit_func: return valid; } +bool CvCapture_FFMPEG::setRaw() +{ + if (!rawMode) + { + if (frame_number != 0) + { + CV_WARN("Incorrect usage: do not grab frames before .set(CAP_PROP_FORMAT, -1)"); + } + // binary stream filter creation is moved into processRawPacket() + rawMode = true; + } + return true; +} + +bool CvCapture_FFMPEG::processRawPacket() +{ + if (packet.data == NULL) // EOF + return false; + if (!rawModeInitialized) + { + rawModeInitialized = true; +#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100) + AVCodecID eVideoCodec = ic->streams[video_stream]->codecpar->codec_id; +#elif LIBAVFORMAT_BUILD > 4628 + AVCodecID eVideoCodec = video_st->codec->codec_id; +#else + AVCodecID eVideoCodec = video_st->codec.codec_id; +#endif + const char* filterName = NULL; + if (eVideoCodec == CV_CODEC(CODEC_ID_H264) || eVideoCodec == CV_CODEC(CODEC_ID_H265)) + { + // check start code prefixed mode (as defined in the Annex B H.264 / H.265 specification) + if (packet.size >= 5 + && !(packet.data[0] == 0 && packet.data[1] == 0 && packet.data[2] == 0 && packet.data[3] == 1) + && !(packet.data[0] == 0 && packet.data[1] == 0 && packet.data[2] == 1) + ) + { + filterName = eVideoCodec == CV_CODEC(CODEC_ID_H264) ? "h264_mp4toannexb" : "hevc_mp4toannexb"; + } + } + if (filterName) + { +#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100) + const AVBitStreamFilter * bsf = av_bsf_get_by_name(filterName); + if (!bsf) + { + CV_WARN(cv::format("Bitstream filter is not available: %s", filterName).c_str()); + return false; + } + int err = av_bsf_alloc(bsf, &bsfc); + if (err < 0) + { + CV_WARN("Error allocating context for bitstream buffer"); + return false; + } + avcodec_parameters_copy(bsfc->par_in, ic->streams[video_stream]->codecpar); + err = av_bsf_init(bsfc); + if (err < 0) + { + CV_WARN("Error initializing bitstream buffer"); + return false; + } +#else + bsfc = av_bitstream_filter_init(filterName); + if (!bsfc) + { + CV_WARN(cv::format("Bitstream filter is not available: %s", filterName).c_str()); + return false; + } +#endif + } + } + if (bsfc) + { + if (packet_filtered.data) + { + av_packet_unref(&packet_filtered); + } + +#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100) + int err = av_bsf_send_packet(bsfc, &packet); + if (err < 0) + { + CV_WARN("Packet submission for filtering failed"); + return false; + } + err = av_bsf_receive_packet(bsfc, &packet_filtered); + if (err < 0) + { + CV_WARN("Filtered packet retrieve failed"); + return false; + } +#else +#if LIBAVFORMAT_BUILD > 4628 + AVCodecContext* ctx = ic->streams[video_stream]->codec; +#else + AVCodecContext* ctx = &ic->streams[video_stream]->codec; +#endif + int err = av_bitstream_filter_filter(bsfc, ctx, NULL, &packet_filtered.data, + &packet_filtered.size, packet.data, packet.size, packet_filtered.flags & AV_PKT_FLAG_KEY); + if (err < 0) + { + CV_WARN("Packet filtering failed"); + return false; + } +#endif + return packet_filtered.data != NULL; + } + return packet.data != NULL; +} bool CvCapture_FFMPEG::grabFrame() { @@ -1048,6 +1190,12 @@ bool CvCapture_FFMPEG::grabFrame() continue; } + if (rawMode) + { + valid = processRawPacket(); + break; + } + // Decode video frame #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 2, 0) avcodec_decode_video2(video_st->codec, picture, &got_picture, &packet); @@ -1068,7 +1216,6 @@ bool CvCapture_FFMPEG::grabFrame() if( picture_pts == AV_NOPTS_VALUE_ ) picture_pts = picture->pkt_pts != AV_NOPTS_VALUE_ && picture->pkt_pts != 0 ? picture->pkt_pts : picture->pkt_dts; - frame_number++; valid = true; } else @@ -1079,7 +1226,10 @@ bool CvCapture_FFMPEG::grabFrame() } } - if( valid && first_frame_number < 0 ) + if (valid) + frame_number++; + + if (!rawMode && valid && first_frame_number < 0) first_frame_number = dts_to_frame_number(picture_pts); #if USE_AV_INTERRUPT_CALLBACK @@ -1087,14 +1237,28 @@ bool CvCapture_FFMPEG::grabFrame() interrupt_metadata.timeout_after_ms = 0; #endif - // return if we have a new picture or not + // return if we have a new frame or not return valid; } bool CvCapture_FFMPEG::retrieveFrame(int, unsigned char** data, int* step, int* width, int* height, int* cn) { - if( !video_st || !picture->data[0] ) + if (!video_st) + return false; + + if (rawMode) + { + AVPacket& p = bsfc ? packet_filtered : packet; + *data = p.data; + *step = p.size; + *width = p.size; + *height = 1; + *cn = 1; + return p.data != NULL; + } + + if (!picture->data[0]) return false; if( img_convert_ctx == NULL || @@ -1163,7 +1327,6 @@ bool CvCapture_FFMPEG::retrieveFrame(int, unsigned char** data, int* step, int* return true; } - double CvCapture_FFMPEG::getProperty( int property_id ) const { if( !video_st ) return 0; @@ -1217,6 +1380,20 @@ double CvCapture_FFMPEG::getProperty( int property_id ) const return _opencv_ffmpeg_get_sample_aspect_ratio(ic->streams[video_stream]).num; case CAP_PROP_SAR_DEN: return _opencv_ffmpeg_get_sample_aspect_ratio(ic->streams[video_stream]).den; + case CAP_PROP_CODEC_PIXEL_FORMAT: + { +#if LIBAVFORMAT_BUILD > 4628 + AVPixelFormat pix_fmt = video_st->codec->pix_fmt; +#else + AVPixelFormat pix_fmt = video_st->codec.pix_fmt; +#endif + unsigned int fourcc_tag = avcodec_pix_fmt_to_codec_tag(pix_fmt); + return (fourcc_tag == 0) ? (double)-1 : (double)fourcc_tag; + } + case CAP_PROP_FORMAT: + if (rawMode) + return -1; + break; default: break; } @@ -1386,6 +1563,10 @@ bool CvCapture_FFMPEG::setProperty( int property_id, double value ) picture_pts=(int64_t)value; } break; + case CAP_PROP_FORMAT: + if (value == -1) + return setRaw(); + return false; default: return false; } diff --git a/modules/videoio/test/test_ffmpeg.cpp b/modules/videoio/test/test_ffmpeg.cpp index e581cb5996..bdff1b7ac2 100644 --- a/modules/videoio/test/test_ffmpeg.cpp +++ b/modules/videoio/test/test_ffmpeg.cpp @@ -95,6 +95,90 @@ TEST(videoio_ffmpeg, image) //========================================================================== +typedef tuple videoio_container_params_t; +typedef testing::TestWithParam< videoio_container_params_t > videoio_container; + +TEST_P(videoio_container, read) +{ + const VideoCaptureAPIs api = get<0>(GetParam()); + + if (!videoio_registry::hasBackend(api)) + throw SkipTestException("Backend was not found"); + + const string path = get<1>(GetParam()); + const string ext = get<2>(GetParam()); + const string ext_raw = get<3>(GetParam()); + const string codec = get<4>(GetParam()); + const string pixelFormat = get<5>(GetParam()); + const string fileName = path + "." + ext; + const string fileNameOut = tempfile(cv::format("test_container_stream.%s", ext_raw.c_str()).c_str()); + + // Write encoded video read using VideoContainer to tmp file + { + VideoCapture container(findDataFile(fileName), api); + ASSERT_TRUE(container.isOpened()); + if (!container.set(CAP_PROP_FORMAT, -1)) // turn off video decoder (extract stream) + throw SkipTestException("Fetching of RAW video streams is not supported"); + ASSERT_EQ(-1.f, container.get(CAP_PROP_FORMAT)); // check + EXPECT_EQ(codec, fourccToString((int)container.get(CAP_PROP_FOURCC))); + EXPECT_EQ(pixelFormat, fourccToString((int)container.get(CAP_PROP_CODEC_PIXEL_FORMAT))); + + std::ofstream file(fileNameOut, ios::out | ios::trunc | std::ios::binary); + size_t totalBytes = 0; + Mat raw_data; + while (true) + { + container >> raw_data; + size_t size = raw_data.total(); + if (raw_data.empty()) + break; + ASSERT_EQ(CV_8UC1, raw_data.type()); + ASSERT_LE(raw_data.dims, 2); + ASSERT_EQ(raw_data.rows, 1); + ASSERT_EQ((size_t)raw_data.cols, raw_data.total()); + ASSERT_TRUE(raw_data.isContinuous()); + totalBytes += size; + file.write(reinterpret_cast(raw_data.data), size); + ASSERT_FALSE(file.fail()); + } + ASSERT_GE(totalBytes, (size_t)65536) << "Encoded stream is too small"; + } + + std::cout << "Checking extracted video stream: " << fileNameOut << std::endl; + + // Check decoded frames read from original media are equal to frames decoded from tmp file + { + VideoCapture capReference(findDataFile(fileName), api); + ASSERT_TRUE(capReference.isOpened()); + VideoCapture capActual(fileNameOut.c_str(), api); + ASSERT_TRUE(capActual.isOpened()); + Mat reference, actual; + int nframes = 0, n_err = 0; + while (capReference.read(reference) && n_err < 3) + { + nframes++; + ASSERT_TRUE(capActual.read(actual)) << nframes; + EXPECT_EQ(0, cvtest::norm(actual, reference, NORM_INF)) << nframes << " err=" << ++n_err; + } + ASSERT_GT(nframes, 0); + } + + ASSERT_EQ(0, remove(fileNameOut.c_str())); +} + +const videoio_container_params_t videoio_container_params[] = +{ + make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h264", "h264", "h264", "I420"), + make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h265", "h265", "hevc", "I420"), + //make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h264.mkv", "mkv.h264", "h264", "I420"), + //make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h265.mkv", "mkv.h265", "hevc", "I420"), + //make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h264.mp4", "mp4.avc1", "avc1", "I420"), + //make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h265.mp4", "mp4.hev1", "hev1", "I420"), +}; + +INSTANTIATE_TEST_CASE_P(/**/, videoio_container, testing::ValuesIn(videoio_container_params)); + +//========================================================================== static void generateFrame(Mat &frame, unsigned int i, const Point ¢er, const Scalar &color) {