Merge pull request #15290 from cudawarped:ffmpeg_raw_retrieve

Add retrieve encoded frame to VideoCapture

* Add capacity to retrieve the encoded frame from a VideoCapture object.

* Correct raw codec and pixle format output from ffmpeg capture.

* Remove warnings from build.

* Added VideoCaptureRaw subclass.

* Include abstract base class VideoCaptureBase and rename new subclass VideoContainer as suggested by mshabunin.

* Remove using.

* Change base class name for compatibility with jave bindings generator.

* Move grab and retrieve and add override specifier

* Add setRaw and readRaw to IVideoCapture interface
-setRaw to disable video decoding and enable bitstream filters from mp4 to h254 and h265.
-readRaw to return the raw undecoded/filtered bitstream.
Add createRawCapture to initiate a backend with setRaw enabled.
Remove inheritance and use an independant VideoContainer subclass with IVideoCapture member.

* Address unused parameter warings.
Remove VideoContainer from python bindings as it no longer returns a Mat.
Use opencv type uchar instead of unsigned char.
Add missing destructor to VideoContainer class.

* Address build warnings and include all params in documentation.

* Include deprecated bitstream filtering API.

* Update codec_id query to work with older ffmpeg api's.
Change api version defines to be consistent - most recent api version first.

* Fix typo.

* Update test to work with naming of new files in the extra repo

* Investigate test failure

* Check bytes read by ffmpeg

* Removed mp4 video container test

* Applied suggested changes.

* videoio: rework API for extraction of RAW video streams

- FFmpeg only

* address review comments
pull/15753/head
cudawarped 5 years ago committed by Alexander Alekhin
parent af230ec133
commit 0867e3188d
  1. 4
      modules/videoio/include/opencv2/videoio.hpp
  2. 191
      modules/videoio/src/cap_ffmpeg_impl.hpp
  3. 84
      modules/videoio/test/test_ffmpeg.cpp

@ -137,7 +137,8 @@ enum VideoCaptureProperties {
CAP_PROP_FPS =5, //!< Frame rate.
CAP_PROP_FOURCC =6, //!< 4-character code of codec. see VideoWriter::fourcc .
CAP_PROP_FRAME_COUNT =7, //!< Number of frames in the video file.
CAP_PROP_FORMAT =8, //!< Format of the %Mat objects returned by VideoCapture::retrieve().
CAP_PROP_FORMAT =8, //!< Format of the %Mat objects (see Mat::type()) returned by VideoCapture::retrieve().
//!< Set value -1 to fetch undecoded RAW video streams (as Mat 8UC1).
CAP_PROP_MODE =9, //!< Backend-specific value indicating the current capture mode.
CAP_PROP_BRIGHTNESS =10, //!< Brightness of the image (only for those cameras that support).
CAP_PROP_CONTRAST =11, //!< Contrast of the image (only for cameras).
@ -174,6 +175,7 @@ enum VideoCaptureProperties {
CAP_PROP_CHANNEL =43, //!< Video input or Channel Number (only for those cameras that support)
CAP_PROP_AUTO_WB =44, //!< enable/ disable auto white-balance
CAP_PROP_WB_TEMPERATURE=45, //!< white-balance color temperature
CAP_PROP_CODEC_PIXEL_FORMAT =46, //!< (read-only) codec's pixel format. 4-character code - see VideoWriter::fourcc . Subset of [AV_PIX_FMT_*](https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/raw.c) or -1 if unknown
#ifndef CV_DOXYGEN
CV__CAP_PROP_LATEST
#endif

@ -531,6 +531,17 @@ struct CvCapture_FFMPEG
#if USE_AV_INTERRUPT_CALLBACK
AVInterruptCallbackMetadata interrupt_metadata;
#endif
bool setRaw();
bool processRawPacket();
bool rawMode;
bool rawModeInitialized;
AVPacket packet_filtered;
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
AVBSFContext* bsfc;
#else
AVBitStreamFilterContext* bsfc;
#endif
};
void CvCapture_FFMPEG::init()
@ -555,6 +566,12 @@ void CvCapture_FFMPEG::init()
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(52, 111, 0)
dict = NULL;
#endif
rawMode = false;
rawModeInitialized = false;
memset(&packet_filtered, 0, sizeof(packet_filtered));
av_init_packet(&packet_filtered);
bsfc = NULL;
}
@ -623,6 +640,21 @@ void CvCapture_FFMPEG::close()
av_dict_free(&dict);
#endif
if (packet_filtered.data)
{
_opencv_ffmpeg_av_packet_unref(&packet_filtered);
packet_filtered.data = NULL;
}
if (bsfc)
{
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
av_bsf_free(&bsfc);
#else
av_bitstream_filter_close(bsfc);
#endif
}
init();
}
@ -997,6 +1029,116 @@ exit_func:
return valid;
}
bool CvCapture_FFMPEG::setRaw()
{
if (!rawMode)
{
if (frame_number != 0)
{
CV_WARN("Incorrect usage: do not grab frames before .set(CAP_PROP_FORMAT, -1)");
}
// binary stream filter creation is moved into processRawPacket()
rawMode = true;
}
return true;
}
bool CvCapture_FFMPEG::processRawPacket()
{
if (packet.data == NULL) // EOF
return false;
if (!rawModeInitialized)
{
rawModeInitialized = true;
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
AVCodecID eVideoCodec = ic->streams[video_stream]->codecpar->codec_id;
#elif LIBAVFORMAT_BUILD > 4628
AVCodecID eVideoCodec = video_st->codec->codec_id;
#else
AVCodecID eVideoCodec = video_st->codec.codec_id;
#endif
const char* filterName = NULL;
if (eVideoCodec == CV_CODEC(CODEC_ID_H264) || eVideoCodec == CV_CODEC(CODEC_ID_H265))
{
// check start code prefixed mode (as defined in the Annex B H.264 / H.265 specification)
if (packet.size >= 5
&& !(packet.data[0] == 0 && packet.data[1] == 0 && packet.data[2] == 0 && packet.data[3] == 1)
&& !(packet.data[0] == 0 && packet.data[1] == 0 && packet.data[2] == 1)
)
{
filterName = eVideoCodec == CV_CODEC(CODEC_ID_H264) ? "h264_mp4toannexb" : "hevc_mp4toannexb";
}
}
if (filterName)
{
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
const AVBitStreamFilter * bsf = av_bsf_get_by_name(filterName);
if (!bsf)
{
CV_WARN(cv::format("Bitstream filter is not available: %s", filterName).c_str());
return false;
}
int err = av_bsf_alloc(bsf, &bsfc);
if (err < 0)
{
CV_WARN("Error allocating context for bitstream buffer");
return false;
}
avcodec_parameters_copy(bsfc->par_in, ic->streams[video_stream]->codecpar);
err = av_bsf_init(bsfc);
if (err < 0)
{
CV_WARN("Error initializing bitstream buffer");
return false;
}
#else
bsfc = av_bitstream_filter_init(filterName);
if (!bsfc)
{
CV_WARN(cv::format("Bitstream filter is not available: %s", filterName).c_str());
return false;
}
#endif
}
}
if (bsfc)
{
if (packet_filtered.data)
{
av_packet_unref(&packet_filtered);
}
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
int err = av_bsf_send_packet(bsfc, &packet);
if (err < 0)
{
CV_WARN("Packet submission for filtering failed");
return false;
}
err = av_bsf_receive_packet(bsfc, &packet_filtered);
if (err < 0)
{
CV_WARN("Filtered packet retrieve failed");
return false;
}
#else
#if LIBAVFORMAT_BUILD > 4628
AVCodecContext* ctx = ic->streams[video_stream]->codec;
#else
AVCodecContext* ctx = &ic->streams[video_stream]->codec;
#endif
int err = av_bitstream_filter_filter(bsfc, ctx, NULL, &packet_filtered.data,
&packet_filtered.size, packet.data, packet.size, packet_filtered.flags & AV_PKT_FLAG_KEY);
if (err < 0)
{
CV_WARN("Packet filtering failed");
return false;
}
#endif
return packet_filtered.data != NULL;
}
return packet.data != NULL;
}
bool CvCapture_FFMPEG::grabFrame()
{
@ -1048,6 +1190,12 @@ bool CvCapture_FFMPEG::grabFrame()
continue;
}
if (rawMode)
{
valid = processRawPacket();
break;
}
// Decode video frame
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 2, 0)
avcodec_decode_video2(video_st->codec, picture, &got_picture, &packet);
@ -1068,7 +1216,6 @@ bool CvCapture_FFMPEG::grabFrame()
if( picture_pts == AV_NOPTS_VALUE_ )
picture_pts = picture->pkt_pts != AV_NOPTS_VALUE_ && picture->pkt_pts != 0 ? picture->pkt_pts : picture->pkt_dts;
frame_number++;
valid = true;
}
else
@ -1079,7 +1226,10 @@ bool CvCapture_FFMPEG::grabFrame()
}
}
if( valid && first_frame_number < 0 )
if (valid)
frame_number++;
if (!rawMode && valid && first_frame_number < 0)
first_frame_number = dts_to_frame_number(picture_pts);
#if USE_AV_INTERRUPT_CALLBACK
@ -1087,14 +1237,28 @@ bool CvCapture_FFMPEG::grabFrame()
interrupt_metadata.timeout_after_ms = 0;
#endif
// return if we have a new picture or not
// return if we have a new frame or not
return valid;
}
bool CvCapture_FFMPEG::retrieveFrame(int, unsigned char** data, int* step, int* width, int* height, int* cn)
{
if( !video_st || !picture->data[0] )
if (!video_st)
return false;
if (rawMode)
{
AVPacket& p = bsfc ? packet_filtered : packet;
*data = p.data;
*step = p.size;
*width = p.size;
*height = 1;
*cn = 1;
return p.data != NULL;
}
if (!picture->data[0])
return false;
if( img_convert_ctx == NULL ||
@ -1163,7 +1327,6 @@ bool CvCapture_FFMPEG::retrieveFrame(int, unsigned char** data, int* step, int*
return true;
}
double CvCapture_FFMPEG::getProperty( int property_id ) const
{
if( !video_st ) return 0;
@ -1217,6 +1380,20 @@ double CvCapture_FFMPEG::getProperty( int property_id ) const
return _opencv_ffmpeg_get_sample_aspect_ratio(ic->streams[video_stream]).num;
case CAP_PROP_SAR_DEN:
return _opencv_ffmpeg_get_sample_aspect_ratio(ic->streams[video_stream]).den;
case CAP_PROP_CODEC_PIXEL_FORMAT:
{
#if LIBAVFORMAT_BUILD > 4628
AVPixelFormat pix_fmt = video_st->codec->pix_fmt;
#else
AVPixelFormat pix_fmt = video_st->codec.pix_fmt;
#endif
unsigned int fourcc_tag = avcodec_pix_fmt_to_codec_tag(pix_fmt);
return (fourcc_tag == 0) ? (double)-1 : (double)fourcc_tag;
}
case CAP_PROP_FORMAT:
if (rawMode)
return -1;
break;
default:
break;
}
@ -1386,6 +1563,10 @@ bool CvCapture_FFMPEG::setProperty( int property_id, double value )
picture_pts=(int64_t)value;
}
break;
case CAP_PROP_FORMAT:
if (value == -1)
return setRaw();
return false;
default:
return false;
}

@ -95,6 +95,90 @@ TEST(videoio_ffmpeg, image)
//==========================================================================
typedef tuple<VideoCaptureAPIs, string, string, string, string, string> videoio_container_params_t;
typedef testing::TestWithParam< videoio_container_params_t > videoio_container;
TEST_P(videoio_container, read)
{
const VideoCaptureAPIs api = get<0>(GetParam());
if (!videoio_registry::hasBackend(api))
throw SkipTestException("Backend was not found");
const string path = get<1>(GetParam());
const string ext = get<2>(GetParam());
const string ext_raw = get<3>(GetParam());
const string codec = get<4>(GetParam());
const string pixelFormat = get<5>(GetParam());
const string fileName = path + "." + ext;
const string fileNameOut = tempfile(cv::format("test_container_stream.%s", ext_raw.c_str()).c_str());
// Write encoded video read using VideoContainer to tmp file
{
VideoCapture container(findDataFile(fileName), api);
ASSERT_TRUE(container.isOpened());
if (!container.set(CAP_PROP_FORMAT, -1)) // turn off video decoder (extract stream)
throw SkipTestException("Fetching of RAW video streams is not supported");
ASSERT_EQ(-1.f, container.get(CAP_PROP_FORMAT)); // check
EXPECT_EQ(codec, fourccToString((int)container.get(CAP_PROP_FOURCC)));
EXPECT_EQ(pixelFormat, fourccToString((int)container.get(CAP_PROP_CODEC_PIXEL_FORMAT)));
std::ofstream file(fileNameOut, ios::out | ios::trunc | std::ios::binary);
size_t totalBytes = 0;
Mat raw_data;
while (true)
{
container >> raw_data;
size_t size = raw_data.total();
if (raw_data.empty())
break;
ASSERT_EQ(CV_8UC1, raw_data.type());
ASSERT_LE(raw_data.dims, 2);
ASSERT_EQ(raw_data.rows, 1);
ASSERT_EQ((size_t)raw_data.cols, raw_data.total());
ASSERT_TRUE(raw_data.isContinuous());
totalBytes += size;
file.write(reinterpret_cast<char*>(raw_data.data), size);
ASSERT_FALSE(file.fail());
}
ASSERT_GE(totalBytes, (size_t)65536) << "Encoded stream is too small";
}
std::cout << "Checking extracted video stream: " << fileNameOut << std::endl;
// Check decoded frames read from original media are equal to frames decoded from tmp file
{
VideoCapture capReference(findDataFile(fileName), api);
ASSERT_TRUE(capReference.isOpened());
VideoCapture capActual(fileNameOut.c_str(), api);
ASSERT_TRUE(capActual.isOpened());
Mat reference, actual;
int nframes = 0, n_err = 0;
while (capReference.read(reference) && n_err < 3)
{
nframes++;
ASSERT_TRUE(capActual.read(actual)) << nframes;
EXPECT_EQ(0, cvtest::norm(actual, reference, NORM_INF)) << nframes << " err=" << ++n_err;
}
ASSERT_GT(nframes, 0);
}
ASSERT_EQ(0, remove(fileNameOut.c_str()));
}
const videoio_container_params_t videoio_container_params[] =
{
make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h264", "h264", "h264", "I420"),
make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h265", "h265", "hevc", "I420"),
//make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h264.mkv", "mkv.h264", "h264", "I420"),
//make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h265.mkv", "mkv.h265", "hevc", "I420"),
//make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h264.mp4", "mp4.avc1", "avc1", "I420"),
//make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h265.mp4", "mp4.hev1", "hev1", "I420"),
};
INSTANTIATE_TEST_CASE_P(/**/, videoio_container, testing::ValuesIn(videoio_container_params));
//==========================================================================
static void generateFrame(Mat &frame, unsigned int i, const Point &center, const Scalar &color)
{

Loading…
Cancel
Save