cudacodec: fix nv12 to bgr/bgra conversion

pull/3468/head
cudawarped 2 years ago
parent ed1873bc2c
commit d9967922ce
  1. 3
      modules/cudacodec/include/opencv2/cudacodec.hpp
  2. 45
      modules/cudacodec/src/cuda/nv12_to_rgb.cu
  3. 1
      modules/cudacodec/src/precomp.hpp
  4. 1
      modules/cudacodec/src/video_parser.cpp
  5. 39
      modules/cudacodec/src/video_reader.cpp
  6. 46
      modules/cudacodec/test/test_video.cpp

@ -310,7 +310,7 @@ enum DeinterlaceMode
struct CV_EXPORTS_W_SIMPLE FormatInfo
{
CV_WRAP FormatInfo() : nBitDepthMinus8(-1), ulWidth(0), ulHeight(0), width(0), height(0), ulMaxWidth(0), ulMaxHeight(0), valid(false),
fps(0), ulNumDecodeSurfaces(0) {};
fps(0), ulNumDecodeSurfaces(0), videoFullRangeFlag(false) {};
CV_PROP_RW Codec codec;
CV_PROP_RW ChromaFormat chromaFormat;
@ -329,6 +329,7 @@ struct CV_EXPORTS_W_SIMPLE FormatInfo
CV_PROP_RW cv::Size targetSz;//!< Post-processed size of the output frame.
CV_PROP_RW cv::Rect srcRoi;//!< Region of interest decoded from video source.
CV_PROP_RW cv::Rect targetRoi;//!< Region of interest in the output frame containing the decoded frame.
CV_PROP_RW bool videoFullRangeFlag;//!< Output value indicating if the black level, luma and chroma of the source are represented using the full or limited range (AKA TV or "analogue" range) of values as defined in Annex E of the ITU-T Specification. Internally the conversion from NV12 to BGR obeys ITU 709.
};
/** @brief cv::cudacodec::VideoReader generic properties identifier.

@ -66,14 +66,20 @@ namespace
{
__constant__ float constHueColorSpaceMat[9] = {1.1644f, 0.0f, 1.596f, 1.1644f, -0.3918f, -0.813f, 1.1644f, 2.0172f, 0.0f};
template<bool fullRange>
__device__ static void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue)
{
float luma, chromaCb, chromaCr;
// Prepare for hue adjustment
luma = (float)yuvi[0];
if (fullRange) {
luma = (float)(((int)yuvi[0] * 219.0f / 255.0f));
chromaCb = (float)(((int)yuvi[1] - 512.0f) * 224.0f / 255.0f);
chromaCr = (float)(((int)yuvi[2] - 512.0f) * 224.0f / 255.0f);
}
else {
luma = (float)((int)yuvi[0] - 64.0f);
chromaCb = (float)((int)yuvi[1] - 512.0f);
chromaCr = (float)((int)yuvi[2] - 512.0f);
}
// Convert YUV To RGB with hue adjustment
*red = (luma * constHueColorSpaceMat[0]) +
@ -112,6 +118,7 @@ namespace
#define COLOR_COMPONENT_BIT_SIZE 10
#define COLOR_COMPONENT_MASK 0x3FF
template<bool fullRange>
__global__ void NV12_to_BGRA(const uchar* srcImage, size_t nSourcePitch,
uint* dstImage, size_t nDestPitch,
uint width, uint height)
@ -135,31 +142,11 @@ namespace
const int y_chroma = y >> 1;
if (y & 1) // odd scanline ?
{
uint chromaCb = srcImage[chromaOffset + y_chroma * nSourcePitch + x ];
uint chromaCr = srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1];
if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically
{
chromaCb = (chromaCb + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x ] + 1) >> 1;
chromaCr = (chromaCr + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x + 1] + 1) >> 1;
}
yuv101010Pel[0] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2));
yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
yuv101010Pel[1] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2));
yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
}
else
{
yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2));
yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2));
yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
}
// this steps performs the color conversion
uint yuvi[6];
@ -174,8 +161,8 @@ namespace
yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
// YUV to RGB Transformation conversion
YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]);
YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]);
YUV2RGB<fullRange>(&yuvi[0], &red[0], &green[0], &blue[0]);
YUV2RGB<fullRange>(&yuvi[3], &red[1], &green[1], &blue[1]);
// Clamp the results to RGBA
@ -186,13 +173,15 @@ namespace
}
}
void nv12ToBgra(const GpuMat& decodedFrame, GpuMat& outFrame, int width, int height, cudaStream_t stream)
void nv12ToBgra(const GpuMat& decodedFrame, GpuMat& outFrame, int width, int height, const bool videoFullRangeFlag, cudaStream_t stream)
{
outFrame.create(height, width, CV_8UC4);
dim3 block(32, 8);
dim3 grid(divUp(width, 2 * block.x), divUp(height, block.y));
NV12_to_BGRA<< <grid, block, 0, stream >> > (decodedFrame.ptr<uchar>(), decodedFrame.step,
outFrame.ptr<uint>(), outFrame.step, width, height);
if (videoFullRangeFlag)
NV12_to_BGRA<true> << <grid, block, 0, stream >> > (decodedFrame.ptr<uchar>(), decodedFrame.step, outFrame.ptr<uint>(), outFrame.step, width, height);
else
NV12_to_BGRA<false> << <grid, block, 0, stream >> > (decodedFrame.ptr<uchar>(), decodedFrame.step, outFrame.ptr<uint>(), outFrame.step, width, height);
CV_CUDEV_SAFE_CALL(cudaGetLastError());
if (stream == 0)
CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());

@ -82,6 +82,7 @@
#include "frame_queue.hpp"
#include "video_decoder.hpp"
#include "video_parser.hpp"
#include <opencv2/cudaarithm.hpp>
#endif
#if defined(HAVE_NVCUVENC)
#include <fstream>

@ -115,6 +115,7 @@ int CUDAAPI cv::cudacodec::detail::VideoParser::HandleVideoSequence(void* userDa
format->min_num_decode_surfaces != thiz->videoDecoder_->nDecodeSurfaces())
{
FormatInfo newFormat;
newFormat.videoFullRangeFlag = format->video_signal_description.video_full_range_flag;
newFormat.codec = static_cast<Codec>(format->codec);
newFormat.chromaFormat = static_cast<ChromaFormat>(format->chroma_format);
newFormat.nBitDepthMinus8 = format->bit_depth_luma_minus8;

@ -53,27 +53,54 @@ Ptr<VideoReader> cv::cudacodec::createVideoReader(const Ptr<RawVideoSource>&, co
#else // HAVE_NVCUVID
void nv12ToBgra(const GpuMat& decodedFrame, GpuMat& outFrame, int width, int height, cudaStream_t stream);
void nv12ToBgra(const GpuMat& decodedFrame, GpuMat& outFrame, int width, int height, const bool videoFullRangeFlag, cudaStream_t stream);
bool ValidColorFormat(const ColorFormat colorFormat);
void videoDecPostProcessFrame(const GpuMat& decodedFrame, GpuMat& outFrame, int width, int height, const ColorFormat colorFormat,
void cvtFromNv12(const GpuMat& decodedFrame, GpuMat& outFrame, int width, int height, const ColorFormat colorFormat, const bool videoFullRangeFlag,
Stream stream)
{
CV_Assert(decodedFrame.cols == width && decodedFrame.rows == height * 1.5f);
if (colorFormat == ColorFormat::BGRA) {
nv12ToBgra(decodedFrame, outFrame, width, height, StreamAccessor::getStream(stream));
nv12ToBgra(decodedFrame, outFrame, width, height, videoFullRangeFlag, StreamAccessor::getStream(stream));
}
else if (colorFormat == ColorFormat::BGR) {
outFrame.create(height, width, CV_8UC3);
Npp8u* pSrc[2] = { decodedFrame.data, &decodedFrame.data[decodedFrame.step * height] };
NppiSize oSizeROI = { width,height };
#if (CUDART_VERSION < 9200)
CV_Error(Error::StsUnsupportedFormat, "ColorFormat::BGR is not supported until CUDA 9.2, use default ColorFormat::BGRA.");
#elif (CUDART_VERSION < 10100)
cv::cuda::NppStreamHandler h(stream);
if (videoFullRangeFlag)
nppSafeCall(nppiNV12ToBGR_709HDTV_8u_P2C3R(pSrc, decodedFrame.step, outFrame.data, outFrame.step, oSizeROI));
else {
CV_LOG_DEBUG(NULL, "Color reproduction may be inaccurate due CUDA version <= 11.0, for better results upgrade CUDA runtime or try ColorFormat::BGRA.");
nppSafeCall(nppiNV12ToBGR_8u_P2C3R(pSrc, decodedFrame.step, outFrame.data, outFrame.step, oSizeROI));
}
#elif (CUDART_VERSION >= 10100)
NppStreamContext nppStreamCtx;
nppSafeCall(nppGetStreamContext(&nppStreamCtx));
nppStreamCtx.hStream = StreamAccessor::getStream(stream);
if (videoFullRangeFlag)
nppSafeCall(nppiNV12ToBGR_709HDTV_8u_P2C3R_Ctx(pSrc, decodedFrame.step, outFrame.data, outFrame.step, oSizeROI, nppStreamCtx));
else {
#if (CUDART_VERSION < 11000)
CV_LOG_DEBUG(NULL, "Color reproduction may be inaccurate due CUDA version <= 11.0, for better results upgrade CUDA runtime or try ColorFormat::BGRA.");
nppSafeCall(nppiNV12ToBGR_8u_P2C3R_Ctx(pSrc, decodedFrame.step, outFrame.data, outFrame.step, oSizeROI, nppStreamCtx));
#else
nppSafeCall(nppiNV12ToBGR_709CSC_8u_P2C3R_Ctx(pSrc, decodedFrame.step, outFrame.data, outFrame.step, oSizeROI, nppStreamCtx));
#endif
}
#endif
}
else if (colorFormat == ColorFormat::GRAY) {
outFrame.create(height, width, CV_8UC1);
cudaMemcpy2DAsync(outFrame.ptr(), outFrame.step, decodedFrame.ptr(), decodedFrame.step, width, height, cudaMemcpyDeviceToDevice, StreamAccessor::getStream(stream));
if(videoFullRangeFlag)
cudaSafeCall(cudaMemcpy2DAsync(outFrame.ptr(), outFrame.step, decodedFrame.ptr(), decodedFrame.step, width, height, cudaMemcpyDeviceToDevice, StreamAccessor::getStream(stream)));
else {
cv::cuda::subtract(decodedFrame(Rect(0,0,width,height)), 16, outFrame, noArray(), CV_8U, stream);
cv::cuda::multiply(outFrame, 255.0f / 219.0f, outFrame, 1.0, CV_8U, stream);
}
}
else if (colorFormat == ColorFormat::NV_NV12) {
decodedFrame.copyTo(outFrame, stream);
@ -222,9 +249,7 @@ namespace
// map decoded video frame to CUDA surface
GpuMat decodedFrame = videoDecoder_->mapFrame(frameInfo.first.picture_index, frameInfo.second);
// perform post processing on the CUDA surface (performs colors space conversion and post processing)
// comment this out if we include the line of code seen above
videoDecPostProcessFrame(decodedFrame, frame, videoDecoder_->targetWidth(), videoDecoder_->targetHeight(), colorFormat, stream);
cvtFromNv12(decodedFrame, frame, videoDecoder_->targetWidth(), videoDecoder_->targetHeight(), colorFormat, videoDecoder_->format().videoFullRangeFlag, stream);
// unmap video frame
// unmapFrame() synchronizes with the VideoDecode API (ensures the frame has finished decoding)

@ -62,6 +62,11 @@ PARAM_TEST_CASE(Video, cv::cuda::DeviceInfo, std::string)
{
};
typedef tuple<std::string, bool> color_conversion_params_t;
PARAM_TEST_CASE(ColorConversion, cv::cuda::DeviceInfo, cv::cudacodec::ColorFormat, color_conversion_params_t)
{
};
PARAM_TEST_CASE(VideoReadRaw, cv::cuda::DeviceInfo, std::string)
{
};
@ -220,7 +225,7 @@ CUDA_TEST_P(Scaling, Reader)
cv::cuda::resize(frameOr(srcRoiOut), frameGs, targetRoiOut.size(), 0, 0, INTER_AREA);
// assert on mean absolute error due to different resize algorithms
const double mae = cv::cuda::norm(frameGs, frame(targetRoiOut), NORM_L1)/frameGs.size().area();
ASSERT_LT(mae, 2.35);
ASSERT_LT(mae, 2.75);
}
CUDA_TEST_P(Video, Reader)
@ -265,6 +270,33 @@ CUDA_TEST_P(Video, Reader)
}
}
CUDA_TEST_P(ColorConversion, Reader)
{
cv::cuda::setDevice(GET_PARAM(0).deviceID());
const cv::cudacodec::ColorFormat colorFormat = GET_PARAM(1);
const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../" + get<0>(GET_PARAM(2));
const bool videoFullRangeFlag = get<1>(GET_PARAM(2));
cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile);
reader->set(colorFormat);
cv::VideoCapture cap(inputFile);
cv::cuda::GpuMat frame;
Mat frameHost, frameHostGs, frameFromDevice;
for (int i = 0; i < 10; i++)
{
reader->nextFrame(frame);
frame.download(frameFromDevice);
cap.read(frameHost);
const cv::cudacodec::FormatInfo fmt = reader->format();
ASSERT_TRUE(fmt.valid && fmt.videoFullRangeFlag == videoFullRangeFlag);
if (colorFormat == cv::cudacodec::ColorFormat::BGRA)
cv::cvtColor(frameHost, frameHostGs, COLOR_BGR2BGRA);
else
frameHostGs = frameHost;
EXPECT_MAT_NEAR(frameHostGs, frameFromDevice, 2.0);
}
}
CUDA_TEST_P(VideoReadRaw, Reader)
{
cv::cuda::setDevice(GET_PARAM(0).deviceID());
@ -672,6 +704,18 @@ INSTANTIATE_TEST_CASE_P(CUDA_Codec, Video, testing::Combine(
ALL_DEVICES,
testing::Values(VIDEO_SRC_R)));
const color_conversion_params_t color_conversion_params[] =
{
color_conversion_params_t("highgui/video/big_buck_bunny.h264", false),
color_conversion_params_t("highgui/video/big_buck_bunny_full_color_range.h264", true),
};
#define VIDEO_COLOR_OUTPUTS cv::cudacodec::ColorFormat::BGRA, cv::cudacodec::ColorFormat::BGRA
INSTANTIATE_TEST_CASE_P(CUDA_Codec, ColorConversion, testing::Combine(
ALL_DEVICES,
testing::Values(VIDEO_COLOR_OUTPUTS),
testing::ValuesIn(color_conversion_params)));
#define VIDEO_SRC_RW "highgui/video/big_buck_bunny.h264", "highgui/video/big_buck_bunny.h265"
INSTANTIATE_TEST_CASE_P(CUDA_Codec, VideoReadRaw, testing::Combine(
ALL_DEVICES,

Loading…
Cancel
Save