diff --git a/modules/cudacodec/include/opencv2/cudacodec.hpp b/modules/cudacodec/include/opencv2/cudacodec.hpp index d61673cd7..602f9ff17 100644 --- a/modules/cudacodec/include/opencv2/cudacodec.hpp +++ b/modules/cudacodec/include/opencv2/cudacodec.hpp @@ -310,7 +310,7 @@ enum DeinterlaceMode struct CV_EXPORTS_W_SIMPLE FormatInfo { CV_WRAP FormatInfo() : nBitDepthMinus8(-1), ulWidth(0), ulHeight(0), width(0), height(0), ulMaxWidth(0), ulMaxHeight(0), valid(false), - fps(0), ulNumDecodeSurfaces(0) {}; + fps(0), ulNumDecodeSurfaces(0), videoFullRangeFlag(false) {}; CV_PROP_RW Codec codec; CV_PROP_RW ChromaFormat chromaFormat; @@ -329,6 +329,7 @@ struct CV_EXPORTS_W_SIMPLE FormatInfo CV_PROP_RW cv::Size targetSz;//!< Post-processed size of the output frame. CV_PROP_RW cv::Rect srcRoi;//!< Region of interest decoded from video source. CV_PROP_RW cv::Rect targetRoi;//!< Region of interest in the output frame containing the decoded frame. + CV_PROP_RW bool videoFullRangeFlag;//!< Output value indicating if the black level, luma and chroma of the source are represented using the full or limited range (AKA TV or "analogue" range) of values as defined in Annex E of the ITU-T Specification. Internally the conversion from NV12 to BGR obeys ITU 709. }; /** @brief cv::cudacodec::VideoReader generic properties identifier. diff --git a/modules/cudacodec/src/cuda/nv12_to_rgb.cu b/modules/cudacodec/src/cuda/nv12_to_rgb.cu index 744983b04..049041a75 100644 --- a/modules/cudacodec/src/cuda/nv12_to_rgb.cu +++ b/modules/cudacodec/src/cuda/nv12_to_rgb.cu @@ -66,14 +66,20 @@ namespace { __constant__ float constHueColorSpaceMat[9] = {1.1644f, 0.0f, 1.596f, 1.1644f, -0.3918f, -0.813f, 1.1644f, 2.0172f, 0.0f}; + template __device__ static void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue) { float luma, chromaCb, chromaCr; - - // Prepare for hue adjustment - luma = (float)yuvi[0]; - chromaCb = (float)((int)yuvi[1] - 512.0f); - chromaCr = (float)((int)yuvi[2] - 512.0f); + if (fullRange) { + luma = (float)(((int)yuvi[0] * 219.0f / 255.0f)); + chromaCb = (float)(((int)yuvi[1] - 512.0f) * 224.0f / 255.0f); + chromaCr = (float)(((int)yuvi[2] - 512.0f) * 224.0f / 255.0f); + } + else { + luma = (float)((int)yuvi[0] - 64.0f); + chromaCb = (float)((int)yuvi[1] - 512.0f); + chromaCr = (float)((int)yuvi[2] - 512.0f); + } // Convert YUV To RGB with hue adjustment *red = (luma * constHueColorSpaceMat[0]) + @@ -112,6 +118,7 @@ namespace #define COLOR_COMPONENT_BIT_SIZE 10 #define COLOR_COMPONENT_MASK 0x3FF + template __global__ void NV12_to_BGRA(const uchar* srcImage, size_t nSourcePitch, uint* dstImage, size_t nDestPitch, uint width, uint height) @@ -135,31 +142,11 @@ namespace const int y_chroma = y >> 1; - if (y & 1) // odd scanline ? - { - uint chromaCb = srcImage[chromaOffset + y_chroma * nSourcePitch + x ]; - uint chromaCr = srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1]; - - if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically - { - chromaCb = (chromaCb + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x ] + 1) >> 1; - chromaCr = (chromaCr + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x + 1] + 1) >> 1; - } + yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); - yuv101010Pel[0] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2)); - yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); - - yuv101010Pel[1] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2)); - yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); - } - else - { - yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2)); - yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); - - yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2)); - yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); - } + yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); // this steps performs the color conversion uint yuvi[6]; @@ -174,8 +161,8 @@ namespace yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); // YUV to RGB Transformation conversion - YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]); - YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]); + YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]); + YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]); // Clamp the results to RGBA @@ -186,13 +173,15 @@ namespace } } -void nv12ToBgra(const GpuMat& decodedFrame, GpuMat& outFrame, int width, int height, cudaStream_t stream) +void nv12ToBgra(const GpuMat& decodedFrame, GpuMat& outFrame, int width, int height, const bool videoFullRangeFlag, cudaStream_t stream) { outFrame.create(height, width, CV_8UC4); dim3 block(32, 8); dim3 grid(divUp(width, 2 * block.x), divUp(height, block.y)); - NV12_to_BGRA<< > > (decodedFrame.ptr(), decodedFrame.step, - outFrame.ptr(), outFrame.step, width, height); + if (videoFullRangeFlag) + NV12_to_BGRA << > > (decodedFrame.ptr(), decodedFrame.step, outFrame.ptr(), outFrame.step, width, height); + else + NV12_to_BGRA << > > (decodedFrame.ptr(), decodedFrame.step, outFrame.ptr(), outFrame.step, width, height); CV_CUDEV_SAFE_CALL(cudaGetLastError()); if (stream == 0) CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize()); diff --git a/modules/cudacodec/src/precomp.hpp b/modules/cudacodec/src/precomp.hpp index 99a788a01..004cf85c8 100644 --- a/modules/cudacodec/src/precomp.hpp +++ b/modules/cudacodec/src/precomp.hpp @@ -82,6 +82,7 @@ #include "frame_queue.hpp" #include "video_decoder.hpp" #include "video_parser.hpp" + #include #endif #if defined(HAVE_NVCUVENC) #include diff --git a/modules/cudacodec/src/video_parser.cpp b/modules/cudacodec/src/video_parser.cpp index 8bccd065a..62f70e34b 100644 --- a/modules/cudacodec/src/video_parser.cpp +++ b/modules/cudacodec/src/video_parser.cpp @@ -115,6 +115,7 @@ int CUDAAPI cv::cudacodec::detail::VideoParser::HandleVideoSequence(void* userDa format->min_num_decode_surfaces != thiz->videoDecoder_->nDecodeSurfaces()) { FormatInfo newFormat; + newFormat.videoFullRangeFlag = format->video_signal_description.video_full_range_flag; newFormat.codec = static_cast(format->codec); newFormat.chromaFormat = static_cast(format->chroma_format); newFormat.nBitDepthMinus8 = format->bit_depth_luma_minus8; diff --git a/modules/cudacodec/src/video_reader.cpp b/modules/cudacodec/src/video_reader.cpp index a566bd4de..3d71c3532 100644 --- a/modules/cudacodec/src/video_reader.cpp +++ b/modules/cudacodec/src/video_reader.cpp @@ -53,27 +53,54 @@ Ptr cv::cudacodec::createVideoReader(const Ptr&, co #else // HAVE_NVCUVID -void nv12ToBgra(const GpuMat& decodedFrame, GpuMat& outFrame, int width, int height, cudaStream_t stream); +void nv12ToBgra(const GpuMat& decodedFrame, GpuMat& outFrame, int width, int height, const bool videoFullRangeFlag, cudaStream_t stream); bool ValidColorFormat(const ColorFormat colorFormat); -void videoDecPostProcessFrame(const GpuMat& decodedFrame, GpuMat& outFrame, int width, int height, const ColorFormat colorFormat, +void cvtFromNv12(const GpuMat& decodedFrame, GpuMat& outFrame, int width, int height, const ColorFormat colorFormat, const bool videoFullRangeFlag, Stream stream) { + CV_Assert(decodedFrame.cols == width && decodedFrame.rows == height * 1.5f); if (colorFormat == ColorFormat::BGRA) { - nv12ToBgra(decodedFrame, outFrame, width, height, StreamAccessor::getStream(stream)); + nv12ToBgra(decodedFrame, outFrame, width, height, videoFullRangeFlag, StreamAccessor::getStream(stream)); } else if (colorFormat == ColorFormat::BGR) { outFrame.create(height, width, CV_8UC3); Npp8u* pSrc[2] = { decodedFrame.data, &decodedFrame.data[decodedFrame.step * height] }; NppiSize oSizeROI = { width,height }; +#if (CUDART_VERSION < 9200) + CV_Error(Error::StsUnsupportedFormat, "ColorFormat::BGR is not supported until CUDA 9.2, use default ColorFormat::BGRA."); +#elif (CUDART_VERSION < 10100) + cv::cuda::NppStreamHandler h(stream); + if (videoFullRangeFlag) + nppSafeCall(nppiNV12ToBGR_709HDTV_8u_P2C3R(pSrc, decodedFrame.step, outFrame.data, outFrame.step, oSizeROI)); + else { + CV_LOG_DEBUG(NULL, "Color reproduction may be inaccurate due CUDA version <= 11.0, for better results upgrade CUDA runtime or try ColorFormat::BGRA."); + nppSafeCall(nppiNV12ToBGR_8u_P2C3R(pSrc, decodedFrame.step, outFrame.data, outFrame.step, oSizeROI)); + } +#elif (CUDART_VERSION >= 10100) NppStreamContext nppStreamCtx; nppSafeCall(nppGetStreamContext(&nppStreamCtx)); nppStreamCtx.hStream = StreamAccessor::getStream(stream); - nppSafeCall(nppiNV12ToBGR_8u_P2C3R_Ctx(pSrc, decodedFrame.step, outFrame.data, outFrame.step, oSizeROI, nppStreamCtx)); + if (videoFullRangeFlag) + nppSafeCall(nppiNV12ToBGR_709HDTV_8u_P2C3R_Ctx(pSrc, decodedFrame.step, outFrame.data, outFrame.step, oSizeROI, nppStreamCtx)); + else { +#if (CUDART_VERSION < 11000) + CV_LOG_DEBUG(NULL, "Color reproduction may be inaccurate due CUDA version <= 11.0, for better results upgrade CUDA runtime or try ColorFormat::BGRA."); + nppSafeCall(nppiNV12ToBGR_8u_P2C3R_Ctx(pSrc, decodedFrame.step, outFrame.data, outFrame.step, oSizeROI, nppStreamCtx)); +#else + nppSafeCall(nppiNV12ToBGR_709CSC_8u_P2C3R_Ctx(pSrc, decodedFrame.step, outFrame.data, outFrame.step, oSizeROI, nppStreamCtx)); +#endif + } +#endif } else if (colorFormat == ColorFormat::GRAY) { outFrame.create(height, width, CV_8UC1); - cudaMemcpy2DAsync(outFrame.ptr(), outFrame.step, decodedFrame.ptr(), decodedFrame.step, width, height, cudaMemcpyDeviceToDevice, StreamAccessor::getStream(stream)); + if(videoFullRangeFlag) + cudaSafeCall(cudaMemcpy2DAsync(outFrame.ptr(), outFrame.step, decodedFrame.ptr(), decodedFrame.step, width, height, cudaMemcpyDeviceToDevice, StreamAccessor::getStream(stream))); + else { + cv::cuda::subtract(decodedFrame(Rect(0,0,width,height)), 16, outFrame, noArray(), CV_8U, stream); + cv::cuda::multiply(outFrame, 255.0f / 219.0f, outFrame, 1.0, CV_8U, stream); + } } else if (colorFormat == ColorFormat::NV_NV12) { decodedFrame.copyTo(outFrame, stream); @@ -222,9 +249,7 @@ namespace // map decoded video frame to CUDA surface GpuMat decodedFrame = videoDecoder_->mapFrame(frameInfo.first.picture_index, frameInfo.second); - // perform post processing on the CUDA surface (performs colors space conversion and post processing) - // comment this out if we include the line of code seen above - videoDecPostProcessFrame(decodedFrame, frame, videoDecoder_->targetWidth(), videoDecoder_->targetHeight(), colorFormat, stream); + cvtFromNv12(decodedFrame, frame, videoDecoder_->targetWidth(), videoDecoder_->targetHeight(), colorFormat, videoDecoder_->format().videoFullRangeFlag, stream); // unmap video frame // unmapFrame() synchronizes with the VideoDecode API (ensures the frame has finished decoding) diff --git a/modules/cudacodec/test/test_video.cpp b/modules/cudacodec/test/test_video.cpp index b9b4e9f25..1f8ad66eb 100644 --- a/modules/cudacodec/test/test_video.cpp +++ b/modules/cudacodec/test/test_video.cpp @@ -62,6 +62,11 @@ PARAM_TEST_CASE(Video, cv::cuda::DeviceInfo, std::string) { }; +typedef tuple color_conversion_params_t; +PARAM_TEST_CASE(ColorConversion, cv::cuda::DeviceInfo, cv::cudacodec::ColorFormat, color_conversion_params_t) +{ +}; + PARAM_TEST_CASE(VideoReadRaw, cv::cuda::DeviceInfo, std::string) { }; @@ -220,7 +225,7 @@ CUDA_TEST_P(Scaling, Reader) cv::cuda::resize(frameOr(srcRoiOut), frameGs, targetRoiOut.size(), 0, 0, INTER_AREA); // assert on mean absolute error due to different resize algorithms const double mae = cv::cuda::norm(frameGs, frame(targetRoiOut), NORM_L1)/frameGs.size().area(); - ASSERT_LT(mae, 2.35); + ASSERT_LT(mae, 2.75); } CUDA_TEST_P(Video, Reader) @@ -265,6 +270,33 @@ CUDA_TEST_P(Video, Reader) } } +CUDA_TEST_P(ColorConversion, Reader) +{ + cv::cuda::setDevice(GET_PARAM(0).deviceID()); + const cv::cudacodec::ColorFormat colorFormat = GET_PARAM(1); + const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../" + get<0>(GET_PARAM(2)); + const bool videoFullRangeFlag = get<1>(GET_PARAM(2)); + cv::Ptr reader = cv::cudacodec::createVideoReader(inputFile); + reader->set(colorFormat); + cv::VideoCapture cap(inputFile); + + cv::cuda::GpuMat frame; + Mat frameHost, frameHostGs, frameFromDevice; + for (int i = 0; i < 10; i++) + { + reader->nextFrame(frame); + frame.download(frameFromDevice); + cap.read(frameHost); + const cv::cudacodec::FormatInfo fmt = reader->format(); + ASSERT_TRUE(fmt.valid && fmt.videoFullRangeFlag == videoFullRangeFlag); + if (colorFormat == cv::cudacodec::ColorFormat::BGRA) + cv::cvtColor(frameHost, frameHostGs, COLOR_BGR2BGRA); + else + frameHostGs = frameHost; + EXPECT_MAT_NEAR(frameHostGs, frameFromDevice, 2.0); + } +} + CUDA_TEST_P(VideoReadRaw, Reader) { cv::cuda::setDevice(GET_PARAM(0).deviceID()); @@ -672,6 +704,18 @@ INSTANTIATE_TEST_CASE_P(CUDA_Codec, Video, testing::Combine( ALL_DEVICES, testing::Values(VIDEO_SRC_R))); +const color_conversion_params_t color_conversion_params[] = +{ + color_conversion_params_t("highgui/video/big_buck_bunny.h264", false), + color_conversion_params_t("highgui/video/big_buck_bunny_full_color_range.h264", true), +}; + +#define VIDEO_COLOR_OUTPUTS cv::cudacodec::ColorFormat::BGRA, cv::cudacodec::ColorFormat::BGRA +INSTANTIATE_TEST_CASE_P(CUDA_Codec, ColorConversion, testing::Combine( + ALL_DEVICES, + testing::Values(VIDEO_COLOR_OUTPUTS), + testing::ValuesIn(color_conversion_params))); + #define VIDEO_SRC_RW "highgui/video/big_buck_bunny.h264", "highgui/video/big_buck_bunny.h265" INSTANTIATE_TEST_CASE_P(CUDA_Codec, VideoReadRaw, testing::Combine( ALL_DEVICES,