cudacodec: update VideoWriter to correctly set the pts inside the encapsulated video stream

pull/3784/head
cudawarped 8 months ago
parent 9bfa192e42
commit 234297d452
  1. 9
      modules/cudacodec/include/opencv2/cudacodec.hpp
  2. 16
      modules/cudacodec/src/NvEncoder.cpp
  3. 7
      modules/cudacodec/src/NvEncoder.h
  4. 41
      modules/cudacodec/src/video_writer.cpp
  5. 15
      modules/cudacodec/test/test_video.cpp

@ -208,8 +208,15 @@ public:
/** @brief Callback function to signal that the encoded bitstream for one or more frames is ready.
@param vPacket The raw bitstream for one or more frames.
@param pts Presentation timestamps for each frame in vPacket using the FPS time base. e.g. fps = 25, pts = 3, presentation time = 3/25 seconds.
*/
virtual void onEncoded(const std::vector<std::vector<uint8_t>>& vPacket) = 0;
virtual void onEncoded(const std::vector<std::vector<uint8_t>>& vPacket, const std::vector<uint64_t>& pts) = 0;
/** @brief Set the GOP pattern used by the encoder.
@param frameIntervalP Specify the GOP pattern as follows : \p frameIntervalP = 0: I, 1 : IPP, 2 : IBP, 3 : IBBP.
*/
virtual bool setFrameIntervalP(const int frameIntervalP) = 0;
/** @brief Callback function to that the encoding has finished.
* */

@ -305,9 +305,10 @@ void NvEncoder::MapResources(uint32_t bfrIdx)
m_vMappedInputBuffers[bfrIdx] = mapInputResource.mappedResource;
}
void NvEncoder::EncodeFrame(std::vector<std::vector<uint8_t>>& vPacket, NV_ENC_PIC_PARAMS* pPicParams)
void NvEncoder::EncodeFrame(std::vector<std::vector<uint8_t>>& vPacket, std::vector<uint64_t>& outputTimeStamps, NV_ENC_PIC_PARAMS* pPicParams)
{
vPacket.clear();
outputTimeStamps.clear();
if (!IsHWEncoderInitialized())
{
NVENC_THROW_ERROR("Encoder device not found", NV_ENC_ERR_NO_ENCODE_DEVICE);
@ -322,7 +323,7 @@ void NvEncoder::EncodeFrame(std::vector<std::vector<uint8_t>>& vPacket, NV_ENC_P
if (nvStatus == NV_ENC_SUCCESS || nvStatus == NV_ENC_ERR_NEED_MORE_INPUT)
{
m_iToSend++;
GetEncodedPacket(m_vBitstreamOutputBuffer, vPacket, true);
GetEncodedPacket(m_vBitstreamOutputBuffer, vPacket, outputTimeStamps, true);
}
else
{
@ -353,6 +354,7 @@ NVENCSTATUS NvEncoder::DoEncode(NV_ENC_INPUT_PTR inputBuffer, NV_ENC_OUTPUT_PTR
{
picParams = *pPicParams;
}
picParams.inputTimeStamp = m_iInputFrame++;
picParams.version = NV_ENC_PIC_PARAMS_VER;
picParams.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
picParams.inputBuffer = inputBuffer;
@ -376,7 +378,7 @@ void NvEncoder::SendEOS()
NVENC_API_CALL(m_nvenc.nvEncEncodePicture(m_hEncoder, &picParams));
}
void NvEncoder::EndEncode(std::vector<std::vector<uint8_t>>& vPacket)
void NvEncoder::EndEncode(std::vector<std::vector<uint8_t>>& vPacket, std::vector<uint64_t>& outputTimeStamps)
{
vPacket.clear();
if (!IsHWEncoderInitialized())
@ -386,10 +388,10 @@ void NvEncoder::EndEncode(std::vector<std::vector<uint8_t>>& vPacket)
SendEOS();
GetEncodedPacket(m_vBitstreamOutputBuffer, vPacket, false);
GetEncodedPacket(m_vBitstreamOutputBuffer, vPacket, outputTimeStamps, false);
}
void NvEncoder::GetEncodedPacket(std::vector<NV_ENC_OUTPUT_PTR>& vOutputBuffer, std::vector<std::vector<uint8_t>>& vPacket, bool bOutputDelay)
void NvEncoder::GetEncodedPacket(std::vector<NV_ENC_OUTPUT_PTR>& vOutputBuffer, std::vector<std::vector<uint8_t>>& vPacket, std::vector<uint64_t>& outputTimeStamps, bool bOutputDelay)
{
unsigned i = 0;
int iEnd = bOutputDelay ? m_iToSend - m_nOutputDelay : m_iToSend;
@ -402,6 +404,7 @@ void NvEncoder::GetEncodedPacket(std::vector<NV_ENC_OUTPUT_PTR>& vOutputBuffer,
lockBitstreamData.doNotWait = false;
NVENC_API_CALL(m_nvenc.nvEncLockBitstream(m_hEncoder, &lockBitstreamData));
outputTimeStamps.push_back(lockBitstreamData.outputTimeStamp);
uint8_t* pData = (uint8_t*)lockBitstreamData.bitstreamBufferPtr;
if (vPacket.size() < i + 1)
{
@ -499,7 +502,8 @@ void NvEncoder::FlushEncoder()
try
{
std::vector<std::vector<uint8_t>> vPacket;
EndEncode(vPacket);
std::vector<uint64_t> outputTimeStamps;
EndEncode(vPacket, outputTimeStamps);
}
catch (...)
{

@ -100,7 +100,7 @@ public:
* data, which has been copied to an input buffer obtained from the
* GetNextInputFrame() function.
*/
virtual void EncodeFrame(std::vector<std::vector<uint8_t>>& vPacket, NV_ENC_PIC_PARAMS* pPicParams = nullptr);
virtual void EncodeFrame(std::vector<std::vector<uint8_t>>& vPacket, std::vector<uint64_t>& outputTimeStamps, NV_ENC_PIC_PARAMS* pPicParams = nullptr);
/**
* @brief This function to flush the encoder queue.
@ -109,7 +109,7 @@ public:
* from the encoder. The application must call this function before destroying
* an encoder session.
*/
virtual void EndEncode(std::vector<std::vector<uint8_t>>& vPacket);
virtual void EndEncode(std::vector<std::vector<uint8_t>>& vPacket, std::vector<uint64_t>& outputTimeStamps);
/**
* @brief This function is used to query hardware encoder capabilities.
@ -317,7 +317,7 @@ private:
* This is called by DoEncode() function. If there is buffering enabled,
* this may return without any output data.
*/
void GetEncodedPacket(std::vector<NV_ENC_OUTPUT_PTR>& vOutputBuffer, std::vector<std::vector<uint8_t>>& vPacket, bool bOutputDelay);
void GetEncodedPacket(std::vector<NV_ENC_OUTPUT_PTR>& vOutputBuffer, std::vector<std::vector<uint8_t>>& vPacket, std::vector<uint64_t>& outputTimeStamps, bool bOutputDelay);
/**
* @brief This is a private function which is used to initialize the bitstream buffers.
@ -369,6 +369,7 @@ protected:
int32_t m_iGot = 0;
int32_t m_nEncoderBuffer = 0;
int32_t m_nOutputDelay = 0;
int32_t m_iInputFrame = 0;
private:
void* m_pDevice;

@ -53,6 +53,10 @@ Ptr<cudacodec::VideoWriter> createVideoWriter(const String&, const Size, const C
#else // !defined HAVE_NVCUVENC
#if defined(WIN32) // remove when FFmpeg wrapper includes PR25874
#define WIN32_WAIT_FOR_FFMPEG_WRAPPER_UPDATE
#endif
NV_ENC_BUFFER_FORMAT EncBufferFormat(const ColorFormat colorFormat);
int NChannels(const ColorFormat colorFormat);
GUID CodecGuid(const Codec codec);
@ -72,8 +76,9 @@ class FFmpegVideoWriter : public EncoderCallback
public:
FFmpegVideoWriter(const String& fileName, const Codec codec, const int fps, const Size sz, const int idrPeriod);
~FFmpegVideoWriter();
void onEncoded(const std::vector<std::vector<uint8_t>>& vPacket);
void onEncoded(const std::vector<std::vector<uint8_t>>& vPacket, const std::vector<uint64_t>& pts);
void onEncodingFinished();
bool setFrameIntervalP(const int frameIntervalP);
private:
cv::VideoWriter writer;
};
@ -95,21 +100,32 @@ FFmpegVideoWriter::~FFmpegVideoWriter() {
onEncodingFinished();
}
void FFmpegVideoWriter::onEncoded(const std::vector<std::vector<uint8_t>>& vPacket) {
for (auto& packet : vPacket) {
void FFmpegVideoWriter::onEncoded(const std::vector<std::vector<uint8_t>>& vPacket, const std::vector<uint64_t>& pts) {
CV_Assert(vPacket.size() == pts.size());
for (int i = 0; i < vPacket.size(); i++){
std::vector<uint8_t> packet = vPacket.at(i);
Mat wrappedPacket(1, packet.size(), CV_8UC1, (void*)packet.data());
const double ptsDouble = static_cast<double>(pts.at(i));
CV_Assert(static_cast<uint64_t>(ptsDouble) == pts.at(i));
#if !defined(WIN32_WAIT_FOR_FFMPEG_WRAPPER_UPDATE)
CV_Assert(writer.set(VIDEOWRITER_PROP_PTS, ptsDouble));
#endif
writer.write(wrappedPacket);
}
}
bool FFmpegVideoWriter::setFrameIntervalP(const int frameIntervalP) {
return writer.set(VIDEOWRITER_PROP_DTS_DELAY, static_cast<double>(frameIntervalP - 1));
}
class RawVideoWriter : public EncoderCallback
{
public:
RawVideoWriter(const String fileName);
~RawVideoWriter();
void onEncoded(const std::vector<std::vector<uint8_t>>& vPacket);
void onEncoded(const std::vector<std::vector<uint8_t>>& vPacket, const std::vector<uint64_t>& pts);
void onEncodingFinished();
bool setFrameIntervalP(const int) { return false;}
private:
std::ofstream fpOut;
};
@ -128,7 +144,7 @@ RawVideoWriter::~RawVideoWriter() {
onEncodingFinished();
}
void RawVideoWriter::onEncoded(const std::vector<std::vector<uint8_t>>& vPacket) {
void RawVideoWriter::onEncoded(const std::vector<std::vector<uint8_t>>& vPacket, const std::vector<uint64_t>&) {
for (auto& packet : vPacket)
fpOut.write(reinterpret_cast<const char*>(packet.data()), packet.size());
}
@ -208,8 +224,9 @@ VideoWriterImpl::VideoWriterImpl(const Ptr<EncoderCallback>& encoderCallBack_, c
}
void VideoWriterImpl::release() {
pEnc->EndEncode(vPacket);
encoderCallback->onEncoded(vPacket);
std::vector<uint64_t> pts;
pEnc->EndEncode(vPacket, pts);
encoderCallback->onEncoded(vPacket, pts);
encoderCallback->onEncodingFinished();
}
@ -316,6 +333,11 @@ void VideoWriterImpl::InitializeEncoder(const GUID codec, const double fps)
initializeParams.encodeConfig->rcParams.maxBitRate = encoderParams.maxBitRate;
initializeParams.encodeConfig->rcParams.targetQuality = encoderParams.targetQuality;
initializeParams.encodeConfig->gopLength = encoderParams.gopLength;
#if !defined(WIN32_WAIT_FOR_FFMPEG_WRAPPER_UPDATE)
if (initializeParams.encodeConfig->frameIntervalP > 1) {
CV_Assert(encoderCallback->setFrameIntervalP(initializeParams.encodeConfig->frameIntervalP));
}
#endif
if (codec == NV_ENC_CODEC_H264_GUID)
initializeParams.encodeConfig->encodeCodecConfig.h264Config.idrPeriod = encoderParams.idrPeriod;
else if (codec == NV_ENC_CODEC_HEVC_GUID)
@ -383,8 +405,9 @@ void VideoWriterImpl::CopyToNvSurface(const InputArray src)
void VideoWriterImpl::write(const InputArray frame) {
CV_Assert(frame.channels() == nSrcChannels);
CopyToNvSurface(frame);
pEnc->EncodeFrame(vPacket);
encoderCallback->onEncoded(vPacket);
std::vector<uint64_t> pts;
pEnc->EncodeFrame(vPacket, pts);
encoderCallback->onEncoded(vPacket, pts);
};
EncoderParams VideoWriterImpl::getEncoderParams() const {

@ -650,6 +650,9 @@ struct TransCode : testing::TestWithParam<cv::cuda::DeviceInfo>
}
};
#if defined(WIN32) // remove when FFmpeg wrapper includes PR25874
#define WIN32_WAIT_FOR_FFMPEG_WRAPPER_UPDATE
#endif
CUDA_TEST_P(TransCode, H264ToH265)
{
@ -691,6 +694,10 @@ CUDA_TEST_P(TransCode, H264ToH265)
for (int i = 0; i < nFrames; ++i) {
cap >> frame;
ASSERT_FALSE(frame.empty());
#if !defined(WIN32_WAIT_FOR_FFMPEG_WRAPPER_UPDATE)
const int pts = static_cast<int>(cap.get(CAP_PROP_PTS));
ASSERT_EQ(i, pts > 0 ? pts : 0); // FFmpeg back end returns dts if pts is zero.
#endif
}
}
ASSERT_EQ(0, remove(outputFile.c_str()));
@ -773,6 +780,10 @@ CUDA_TEST_P(Write, Writer)
for (int i = 0; i < nFrames; ++i) {
cap >> frame;
ASSERT_FALSE(frame.empty());
#if !defined(WIN32_WAIT_FOR_FFMPEG_WRAPPER_UPDATE)
const int pts = static_cast<int>(cap.get(CAP_PROP_PTS));
ASSERT_EQ(i, pts > 0 ? pts : 0); // FFmpeg back end returns dts if pts is zero.
#endif
}
}
ASSERT_EQ(0, remove(outputFile.c_str()));
@ -867,6 +878,10 @@ CUDA_TEST_P(EncoderParams, Writer)
const bool keyFrameActual = capRaw.get(CAP_PROP_LRF_HAS_KEY_FRAME) == 1.0;
const bool keyFrameReference = i % idrPeriod == 0;
ASSERT_EQ(keyFrameActual, keyFrameReference);
#if !defined(WIN32_WAIT_FOR_FFMPEG_WRAPPER_UPDATE)
const int pts = static_cast<int>(cap.get(CAP_PROP_PTS));
ASSERT_EQ(i, pts > 0 ? pts : 0); // FFmpeg back end returns dts if pts is zero.
#endif
}
}
}

Loading…
Cancel
Save