Merge pull request #3360 from cudawarped:nvcuvenc

Fix cudacodec::VideoWriter
3 years ago · 9d84eaed80
parent 11b056b263 ea1fe97635
commit 9d84eaed80
13 changed files with 2277 additions and 1121 deletions
--- a/modules/cudacodec/CMakeLists.txt
+++ b/modules/cudacodec/CMakeLists.txt
@ -6,24 +6,33 @@ set(the_description "CUDA-accelerated Video Encoding/Decoding")

 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wshadow)

-ocv_add_module(cudacodec opencv_core opencv_videoio opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudev WRAP python)
+set(required_dependencies opencv_core opencv_videoio opencv_cudaarithm opencv_cudawarping)
+if(HAVE_NVCUVENC)
+  list(APPEND required_dependencies opencv_cudaimgproc)
+endif()
+
+ocv_add_module(cudacodec ${required_dependencies} OPTIONAL opencv_cudev WRAP python)

 ocv_module_include_directories()
 ocv_glob_module_sources()

 set(extra_libs "")

-if(HAVE_NVCUVID)
-  list(APPEND extra_libs ${CUDA_CUDA_LIBRARY} ${CUDA_nvcuvid_LIBRARY})
-endif()
-
-if(HAVE_NVCUVENC)
-  if(WIN32)
-    list(APPEND extra_libs ${CUDA_nvcuvenc_LIBRARY})
+if(HAVE_NVCUVID OR HAVE_NVCUVENC)
+  list(APPEND extra_libs ${CUDA_CUDA_LIBRARY})
+  if(HAVE_NVCUVID)
+    list(APPEND extra_libs ${CUDA_nvcuvid_LIBRARY})
+  endif()
+  if(HAVE_NVCUVENC)
+      if(WIN32)
+        list(APPEND extra_libs ${CUDA_nvencodeapi_LIBRARY})
+      else()
+        list(APPEND extra_libs ${CUDA_nvidia-encode_LIBRARY})
+      endif()
  endif()
 endif()

 ocv_create_module(${extra_libs})

 ocv_add_accuracy_tests()
-ocv_add_perf_tests()
+ocv_add_perf_tests()
--- a/modules/cudacodec/include/opencv2/cudacodec.hpp
+++ b/modules/cudacodec/include/opencv2/cudacodec.hpp
@ -66,114 +66,162 @@ using namespace cuda;  // Stream

 ////////////////////////////////// Video Encoding //////////////////////////////////

-// Works only under Windows.
-// Supports only H264 video codec and AVI files.
-
-enum SurfaceFormat
-{
-    SF_UYVY = 0,
-    SF_YUY2,
-    SF_YV12,
-    SF_NV12,
-    SF_IYUV,
-    SF_BGR,
-    SF_GRAY = SF_BGR
-};
-
-/** @brief Different parameters for CUDA video encoder.
+/** @brief Video codecs supported by cudacodec::VideoReader and cudacodec::VideoWriter.
+@note
+   -   Support will depend on your hardware, refer to the Nvidia Video Codec SDK Video Encode and Decode GPU Support Matrix for details.
 */
-struct CV_EXPORTS_W EncoderParams
+enum Codec
 {
-    int P_Interval;      //!< NVVE_P_INTERVAL,
-    int IDR_Period;      //!< NVVE_IDR_PERIOD,
-    int DynamicGOP;      //!< NVVE_DYNAMIC_GOP,
-    int RCType;          //!< NVVE_RC_TYPE,
-    int AvgBitrate;      //!< NVVE_AVG_BITRATE,
-    int PeakBitrate;     //!< NVVE_PEAK_BITRATE,
-    int QP_Level_Intra;  //!< NVVE_QP_LEVEL_INTRA,
-    int QP_Level_InterP; //!< NVVE_QP_LEVEL_INTER_P,
-    int QP_Level_InterB; //!< NVVE_QP_LEVEL_INTER_B,
-    int DeblockMode;     //!< NVVE_DEBLOCK_MODE,
-    int ProfileLevel;    //!< NVVE_PROFILE_LEVEL,
-    int ForceIntra;      //!< NVVE_FORCE_INTRA,
-    int ForceIDR;        //!< NVVE_FORCE_IDR,
-    int ClearStat;       //!< NVVE_CLEAR_STAT,
-    int DIMode;          //!< NVVE_SET_DEINTERLACE,
-    int Presets;         //!< NVVE_PRESETS,
-    int DisableCabac;    //!< NVVE_DISABLE_CABAC,
-    int NaluFramingType; //!< NVVE_CONFIGURE_NALU_FRAMING_TYPE
-    int DisableSPSPPS;   //!< NVVE_DISABLE_SPS_PPS
-
-    EncoderParams();
-    /** @brief Constructors.
-
-    @param configFile Config file name.
-
-    Creates default parameters or reads parameters from config file.
-     */
-    explicit EncoderParams(const String& configFile);
+    MPEG1 = 0,
+    MPEG2,
+    MPEG4,
+    VC1,
+    H264,
+    JPEG,
+    H264_SVC,
+    H264_MVC,
+    HEVC,
+    VP8,
+    VP9,
+    AV1,
+    NumCodecs,

-    /** @brief Reads parameters from config file.
+    Uncompressed_YUV420 = (('I' << 24) | ('Y' << 16) | ('U' << 8) | ('V')),   //!< Y,U,V (4:2:0)
+    Uncompressed_YV12 = (('Y' << 24) | ('V' << 16) | ('1' << 8) | ('2')),   //!< Y,V,U (4:2:0)
+    Uncompressed_NV12 = (('N' << 24) | ('V' << 16) | ('1' << 8) | ('2')),   //!< Y,UV  (4:2:0)
+    Uncompressed_YUYV = (('Y' << 24) | ('U' << 16) | ('Y' << 8) | ('V')),   //!< YUYV/YUY2 (4:2:2)
+    Uncompressed_UYVY = (('U' << 24) | ('Y' << 16) | ('V' << 8) | ('Y'))    //!< UYVY (4:2:2)
+};

-    @param configFile Config file name.
-     */
-    void load(const String& configFile);
-    /** @brief Saves parameters to config file.
+/** @brief ColorFormat for the frame returned by VideoReader::nextFrame() and VideoReader::retrieve() or used to initialize a VideoWriter.
+*/
+enum class ColorFormat {
+    UNDEFINED = 0,
+    BGRA = 1, //!< OpenCV color format, can be used with both VideoReader and VideoWriter.
+    BGR = 2, //!< OpenCV color format, can be used with both VideoReader and VideoWriter.
+    GRAY = 3, //!< OpenCV color format, can be used with both VideoReader and VideoWriter.
+    NV_NV12 = 4, //!< Nvidia color format - equivalent to YUV - Semi-Planar YUV [Y plane followed by interleaved UV plane], can be used with both VideoReader and VideoWriter.
+
+    RGB = 5, //!< OpenCV color format, can only be used with VideoWriter.
+    RGBA = 6, //!< OpenCV color format, can only be used with VideoWriter.
+    NV_YV12 = 8, //!< Nvidia Buffer Format - Planar YUV [Y plane followed by V and U planes], use with VideoReader, can only be used with VideoWriter.
+    NV_IYUV = 9, //!< Nvidia Buffer Format - Planar YUV [Y plane followed by U and V planes], use with VideoReader, can only be used with VideoWriter.
+    NV_YUV444 = 10, //!< Nvidia Buffer Format - Planar YUV [Y plane followed by U and V planes], use with VideoReader, can only be used with VideoWriter.
+    NV_AYUV = 11, //!< Nvidia Buffer Format - 8 bit Packed A8Y8U8V8. This is a word-ordered format where a pixel is represented by a 32-bit word with V in the lowest 8 bits, U in the next 8 bits, Y in the 8 bits after that and A in the highest 8 bits, can only be used with VideoWriter.
+#ifndef CV_DOXYGEN
+    PROP_NOT_SUPPORTED
+#endif
+};

-    @param configFile Config file name.
-     */
-    void save(const String& configFile) const;
+/** @brief Rate Control Modes.
+*/
+enum EncodeParamsRcMode {
+    ENC_PARAMS_RC_CONSTQP = 0x0, //!< Constant QP mode.
+    ENC_PARAMS_RC_VBR = 0x1, //!< Variable bitrate mode.
+    ENC_PARAMS_RC_CBR = 0x2 //!< Constant bitrate mode.
 };

-/** @brief Callbacks for CUDA video encoder.
- */
-class CV_EXPORTS_W EncoderCallBack
+/** @brief Multi Pass Encoding.
+*/
+enum EncodeMultiPass
 {
-public:
-    enum PicType
-    {
-        IFRAME = 1,
-        PFRAME = 2,
-        BFRAME = 3
-    };
+    ENC_MULTI_PASS_DISABLED = 0x0, //!< Single Pass.
+    ENC_TWO_PASS_QUARTER_RESOLUTION = 0x1, //!< Two Pass encoding is enabled where first Pass is quarter resolution.
+    ENC_TWO_PASS_FULL_RESOLUTION = 0x2, //!< Two Pass encoding is enabled where first Pass is full resolution.
+};

-    virtual ~EncoderCallBack() {}

-    /** @brief Callback function to signal the start of bitstream that is to be encoded.
+/** @brief Supported Encoder Profiles.
+*/
+enum EncodeProfile {
+    ENC_CODEC_PROFILE_AUTOSELECT = 0,
+    ENC_H264_PROFILE_BASELINE = 1,
+    ENC_H264_PROFILE_MAIN = 2,
+    ENC_H264_PROFILE_HIGH = 3,
+    ENC_H264_PROFILE_HIGH_444 = 4,
+    ENC_H264_PROFILE_STEREO = 5,
+    ENC_H264_PROFILE_PROGRESSIVE_HIGH = 6,
+    ENC_H264_PROFILE_CONSTRAINED_HIGH = 7,
+    ENC_HEVC_PROFILE_MAIN = 8,
+    ENC_HEVC_PROFILE_MAIN10 = 9,
+    ENC_HEVC_PROFILE_FREXT = 10
+};

-    Callback must allocate buffer for CUDA encoder and return pointer to it and it's size.
-     */
-    virtual uchar* acquireBitStream(int* bufferSize) = 0;
+/** @brief Nvidia Encoding Presets. Performance degrades and quality improves as we move from P1 to P7.
+*/
+enum EncodePreset {
+    ENC_PRESET_P1 = 1,
+    ENC_PRESET_P2 = 2,
+    ENC_PRESET_P3 = 3,
+    ENC_PRESET_P4 = 4,
+    ENC_PRESET_P5 = 5,
+    ENC_PRESET_P6 = 6,
+    ENC_PRESET_P7 = 7
+};

-    /** @brief Callback function to signal that the encoded bitstream is ready to be written to file.
-    */
-    virtual void releaseBitStream(unsigned char* data, int size) = 0;
+/** @brief Tuning information.
+*/
+enum EncodeTuningInfo {
+    ENC_TUNING_INFO_UNDEFINED = 0, //!< Undefined tuningInfo. Invalid value for encoding.
+    ENC_TUNING_INFO_HIGH_QUALITY = 1, //!< Tune presets for latency tolerant encoding.
+    ENC_TUNING_INFO_LOW_LATENCY = 2, //!< Tune presets for low latency streaming.
+    ENC_TUNING_INFO_ULTRA_LOW_LATENCY = 3, //!< Tune presets for ultra low latency streaming.
+    ENC_TUNING_INFO_LOSSLESS = 4, //!< Tune presets for lossless encoding.
+    ENC_TUNING_INFO_COUNT
+};

-    /** @brief Callback function to signal that the encoding operation on the frame has started.
+/** Quantization Parameter for each type of frame when using ENC_PARAMS_RC_MODE::ENC_PARAMS_RC_CONSTQP.
+*/
+struct CV_EXPORTS_W_SIMPLE EncodeQp
+{
+    CV_PROP_RW uint32_t qpInterP; //!< Specifies QP value for P-frame.
+    CV_PROP_RW uint32_t qpInterB; //!< Specifies QP value for B-frame.
+    CV_PROP_RW uint32_t qpIntra; //!< Specifies QP value for Intra Frame.
+};

-    @param frameNumber
-    @param picType Specify frame type (I-Frame, P-Frame or B-Frame).
-     */
-    CV_WRAP virtual void onBeginFrame(int frameNumber, EncoderCallBack::PicType picType) = 0;
+/** @brief Different parameters for CUDA video encoder.
+*/
+struct CV_EXPORTS_W_SIMPLE EncoderParams
+{
+public:
+    CV_WRAP EncoderParams();
+    CV_PROP_RW EncodePreset nvPreset;
+    CV_PROP_RW EncodeTuningInfo tuningInfo;
+    CV_PROP_RW EncodeProfile encodingProfile;
+    CV_PROP_RW EncodeParamsRcMode rateControlMode;
+    CV_PROP_RW EncodeMultiPass multiPassEncoding;
+    CV_PROP_RW EncodeQp constQp; //!< QP's for ENC_PARAMS_RC_CONSTQP.
+    CV_PROP_RW int averageBitRate; //!< target bitrate for ENC_PARAMS_RC_VBR and ENC_PARAMS_RC_CBR.
+    CV_PROP_RW int maxBitRate; //!< upper bound on bitrate for ENC_PARAMS_RC_VBR and ENC_PARAMS_RC_CONSTQP.
+    CV_PROP_RW uint8_t targetQuality; //!< value 0 - 51 where video quality decreases as targetQuality increases, used with ENC_PARAMS_RC_VBR.
+    CV_PROP_RW int gopLength;
+};
+CV_EXPORTS bool operator==(const EncoderParams& lhs, const EncoderParams& rhs);

-    /** @brief Callback function signals that the encoding operation on the frame has finished.
+/** @brief Interface for encoder callbacks.

-    @param frameNumber
-    @param picType Specify frame type (I-Frame, P-Frame or B-Frame).
-     */
-    CV_WRAP virtual void onEndFrame(int frameNumber, EncoderCallBack::PicType picType) = 0;
-};
+User can implement own multiplexing by implementing this interface.
+*/
+class CV_EXPORTS_W EncoderCallback {
+public:
+    /** @brief Callback function to signal that the encoded bitstream for one or more frames is ready.

-/** @brief Video writer interface.
+    @param vPacket The raw bitstream for one or more frames.
+    */
+    virtual void onEncoded(std::vector<std::vector<uint8_t>> vPacket) = 0;

-The implementation uses H264 video codec.
+    /** @brief Callback function to that the encoding has finished.
+    * */
+    virtual void onEncodingFinished() = 0;

-@note Currently only Windows platform is supported.
+    virtual ~EncoderCallback() {}
+};

+/** @brief Video writer interface.
@note
   -   An example on how to use the videoWriter class can be found at
        opencv_source_code/samples/gpu/video_writer.cpp
- */
+*/
 class CV_EXPORTS_W VideoWriter
 {
 public:
@ -181,90 +229,51 @@ public:

    /** @brief Writes the next video frame.

-    @param frame The written frame.
-    @param lastFrame Indicates that it is end of stream. The parameter can be ignored.
+    @param frame The framet to be written.

-    The method write the specified image to video file. The image must have the same size and the same
+    The method encodes the specified image to a video stream. The image must have the same size and the same
    surface format as has been specified when opening the video writer.
-     */
-    CV_WRAP virtual void write(InputArray frame, bool lastFrame = false) = 0;
+    */
+    CV_WRAP virtual void write(InputArray frame) = 0;

+    /** @brief Retrieve the encoding parameters.
+    */
    CV_WRAP virtual EncoderParams getEncoderParams() const = 0;
+
+    /** @brief Waits until the encoding process has finished before calling EncoderCallback::onEncodingFinished().
+    */
+    CV_WRAP virtual void release() = 0;
 };

 /** @brief Creates video writer.

-@param fileName Name of the output video file. Only AVI file format is supported.
+@param fileName Name of the output video file. Only raw h264 or hevc files are supported.
@param frameSize Size of the input video frames.
+@param codec Codec.
@param fps Framerate of the created video stream.
-@param format Surface format of input frames ( SF_UYVY , SF_YUY2 , SF_YV12 , SF_NV12 ,
-SF_IYUV , SF_BGR or SF_GRAY). BGR or gray frames will be converted to YV12 format before
-encoding, frames with other formats will be used as is.
-
-The constructors initialize video writer. FFMPEG is used to write videos. User can implement own
-multiplexing with cudacodec::EncoderCallBack .
- */
-CV_EXPORTS_W Ptr<cudacodec::VideoWriter> createVideoWriter(const String& fileName, Size frameSize, double fps, SurfaceFormat format = SF_BGR);
-/** @overload
-@param fileName Name of the output video file. Only AVI file format is supported.
-@param frameSize Size of the input video frames.
-@param fps Framerate of the created video stream.
-@param params Encoder parameters. See cudacodec::EncoderParams .
-@param format Surface format of input frames ( SF_UYVY , SF_YUY2 , SF_YV12 , SF_NV12 ,
-SF_IYUV , SF_BGR or SF_GRAY). BGR or gray frames will be converted to YV12 format before
-encoding, frames with other formats will be used as is.
+@param colorFormat OpenCv color format of the frames to be encoded.
+@param encoderCallback Callbacks for video encoder. See cudacodec::EncoderCallback. Required for working with the encoded video stream.
+@param stream Stream for frame pre-processing.
 */
-CV_EXPORTS_W Ptr<cudacodec::VideoWriter> createVideoWriter(const String& fileName, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR);
+CV_EXPORTS_W Ptr<cudacodec::VideoWriter> createVideoWriter(const String& fileName, const Size frameSize, const Codec codec = Codec::H264, const double fps = 25.0,
+    const ColorFormat colorFormat = ColorFormat::BGR, Ptr<EncoderCallback> encoderCallback = 0, const Stream& stream = Stream::Null());

-/** @overload
-@param encoderCallback Callbacks for video encoder. See cudacodec::EncoderCallBack . Use it if you
-want to work with raw video stream.
-@param frameSize Size of the input video frames.
-@param fps Framerate of the created video stream.
-@param format Surface format of input frames ( SF_UYVY , SF_YUY2 , SF_YV12 , SF_NV12 ,
-SF_IYUV , SF_BGR or SF_GRAY). BGR or gray frames will be converted to YV12 format before
-encoding, frames with other formats will be used as is.
-*/
-CV_EXPORTS_W Ptr<cudacodec::VideoWriter> createVideoWriter(const Ptr<EncoderCallBack>& encoderCallback, Size frameSize, double fps, SurfaceFormat format = SF_BGR);
-/** @overload
-@param encoderCallback Callbacks for video encoder. See cudacodec::EncoderCallBack . Use it if you
-want to work with raw video stream.
+/** @brief Creates video writer.
+
+@param fileName Name of the output video file. Only raw h264 or hevc files are supported.
@param frameSize Size of the input video frames.
+@param codec Codec.
@param fps Framerate of the created video stream.
-@param params Encoder parameters. See cudacodec::EncoderParams.
-@param format Surface format of input frames ( SF_UYVY , SF_YUY2 , SF_YV12 , SF_NV12 ,
-SF_IYUV , SF_BGR or SF_GRAY). BGR or gray frames will be converted to YV12 format before
-encoding, frames with other formats will be used as is.
+@param colorFormat OpenCv color format of the frames to be encoded.
+@param params Additional encoding parameters.
+@param encoderCallback Callbacks for video encoder. See cudacodec::EncoderCallback. Required for working with the encoded video stream.
+@param stream Stream for frame pre-processing.
 */
-CV_EXPORTS_W Ptr<cudacodec::VideoWriter> createVideoWriter(const Ptr<EncoderCallBack>& encoderCallback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR);
+CV_EXPORTS_W Ptr<cudacodec::VideoWriter> createVideoWriter(const String& fileName, const Size frameSize, const Codec codec, const double fps,  const ColorFormat colorFormat,
+    const EncoderParams& params, Ptr<EncoderCallback> encoderCallback = 0, const Stream& stream = Stream::Null());

 ////////////////////////////////// Video Decoding //////////////////////////////////////////

-/** @brief Video codecs supported by cudacodec::VideoReader .
- */
-enum Codec
-{
-    MPEG1 = 0,
-    MPEG2,
-    MPEG4,
-    VC1,
-    H264,
-    JPEG,
-    H264_SVC,
-    H264_MVC,
-    HEVC,
-    VP8,
-    VP9,
-    AV1,
-    NumCodecs,
-
-    Uncompressed_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')),   //!< Y,U,V (4:2:0)
-    Uncompressed_YV12   = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')),   //!< Y,V,U (4:2:0)
-    Uncompressed_NV12   = (('N'<<24)|('V'<<16)|('1'<<8)|('2')),   //!< Y,UV  (4:2:0)
-    Uncompressed_YUYV   = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')),   //!< YUYV/YUY2 (4:2:2)
-    Uncompressed_UYVY   = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y'))    //!< UYVY (4:2:2)
-};
-
 /** @brief Chroma formats supported by cudacodec::VideoReader.
 */
 enum ChromaFormat
@ -331,18 +340,6 @@ enum class VideoReaderProps {
 #endif
 };

-/** @brief ColorFormat for the frame returned by nextFrame()/retrieve().
-*/
-enum class ColorFormat {
-    BGRA = 1,
-    BGR = 2,
-    GRAY = 3,
-    YUV = 4,
-#ifndef CV_DOXYGEN
-    PROP_NOT_SUPPORTED
-#endif
-};
-
 /** @brief Video reader interface.

@note
@ -438,8 +435,9 @@ public:
    /** @brief Set the desired ColorFormat for the frame returned by nextFrame()/retrieve().

    @param colorFormat Value of the ColorFormat.
+    @return `true` unless the colorFormat is not supported.
     */
-    CV_WRAP virtual void set(const ColorFormat colorFormat) = 0;
+    CV_WRAP virtual bool set(const ColorFormat colorFormat) = 0;

    /** @brief Returns the specified VideoReader property

--- a/modules/cudacodec/misc/python/pyopencv_cudacodec.hpp
+++ b/modules/cudacodec/misc/python/pyopencv_cudacodec.hpp
@ -1,10 +0,0 @@
-#ifdef HAVE_OPENCV_CUDACODEC
-
-#include "opencv2/cudacodec.hpp"
-
-typedef cudacodec::EncoderCallBack::PicType EncoderCallBack_PicType;
-
-CV_PY_TO_CLASS(cudacodec::EncoderParams);
-CV_PY_FROM_CLASS(cudacodec::EncoderParams);
-
-#endif
--- a/modules/cudacodec/misc/python/test/test_cudacodec.py
+++ b/modules/cudacodec/misc/python/test/test_cudacodec.py
@ -2,7 +2,7 @@
 import os
 import cv2 as cv
 import numpy as np
-
+import tempfile
 from tests_common import NewOpenCVTests, unittest

 class cudacodec_test(NewOpenCVTests):
@ -79,12 +79,28 @@ class cudacodec_test(NewOpenCVTests):
        #Test at least the existence of wrapped functions for now

        try:
-            _writer = cv.cudacodec.createVideoWriter("tmp", (128, 128), 30)
+            fd, fname = tempfile.mkstemp(suffix=".h264")
+            os.close(fd)
+            encoder_params_in = cv.cudacodec.EncoderParams()
+            encoder_params_in.gopLength = 10
+            stream = cv.cuda.Stream()
+            sz = (1920,1080)
+            writer = cv.cudacodec.createVideoWriter(fname, sz, cv.cudacodec.H264, 30, cv.cudacodec.ColorFormat_BGR,
+                                                    encoder_params_in, stream=stream)
+            blankFrameIn = cv.cuda.GpuMat(sz,cv.CV_8UC3)
+            writer.write(blankFrameIn)
+            writer.release()
+            encoder_params_out = writer.getEncoderParams()
+            self.assert_true(encoder_params_in.gopLength == encoder_params_out.gopLength)
+            cap = cv.VideoCapture(fname,cv.CAP_FFMPEG)
+            self.assert_true(cap.isOpened())
+            ret, blankFrameOut = cap.read()
+            self.assert_true(ret and blankFrameOut.shape == blankFrameIn.download().shape)
        except cv.error as e:
            self.assertEqual(e.code, cv.Error.StsNotImplemented)
-            self.skipTest("NVCUVENC is not installed")
+            self.skipTest("Either NVCUVENC or a GPU hardware encoder is missing or the encoding profile is not supported.")

-        self.assertTrue(True) #It is sufficient that no exceptions have been there
+        os.remove(fname)

 if __name__ == '__main__':
    NewOpenCVTests.bootstrap()
--- a/modules/cudacodec/perf/perf_video.cpp
+++ b/modules/cudacodec/perf/perf_video.cpp
@ -45,7 +45,7 @@

 namespace opencv_test { namespace {

-#if defined(HAVE_NVCUVID)
+#if defined(HAVE_NVCUVID) || defined(HAVE_NVCUVENC)

 #if defined(HAVE_FFMPEG_WRAPPER) // should this be set in preprocessor or in cvconfig.h
 #define VIDEO_SRC Values("cv/video/768x576.avi", "cv/video/1920x1080.avi")
@ -54,6 +54,8 @@ namespace opencv_test { namespace {
 #define VIDEO_SRC Values( "cv/video/1920x1080.avi")
 #endif

+#if defined(HAVE_NVCUVID)
+
 DEF_PARAM_TEST_1(FileName, string);

 //////////////////////////////////////////////////////
@ -93,63 +95,97 @@ PERF_TEST_P(FileName, VideoReader, VIDEO_SRC)
 //////////////////////////////////////////////////////
 // VideoWriter

-#if defined(HAVE_NVCUVID) && defined(_WIN32)
+#if defined(HAVE_NVCUVENC)

-PERF_TEST_P(FileName, VideoWriter, VIDEO_SRC)
-{
-    declare.time(30);
-
-    const string inputFile = perf::TestBase::getDataPath(GetParam());
-    const string outputFile = cv::tempfile(".avi");
+DEF_PARAM_TEST(WriteToFile, string, cv::cudacodec::ColorFormat, cv::cudacodec::Codec);

-    const double FPS = 25.0;
+#define COLOR_FORMAT Values(cv::cudacodec::ColorFormat::BGR, cv::cudacodec::ColorFormat::RGB, cv::cudacodec::ColorFormat::BGRA, \
+cv::cudacodec::ColorFormat::RGBA, cv::cudacodec::ColorFormat::GRAY)
+#define CODEC Values(cv::cudacodec::Codec::H264, cv::cudacodec::Codec::HEVC)

+PERF_TEST_P(WriteToFile, VideoWriter, Combine(VIDEO_SRC, COLOR_FORMAT, CODEC))
+{
+    declare.time(30);
+    const string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
+    const cv::cudacodec::ColorFormat surfaceFormat = GET_PARAM(1);
+    const cudacodec::Codec codec = GET_PARAM(2);
+    const double fps = 25;
+    const int nFrames = 20;
    cv::VideoCapture reader(inputFile);
-    ASSERT_TRUE( reader.isOpened() );
-
-    cv::Mat frame;
-
-    if (PERF_RUN_CUDA())
-    {
-        cv::Ptr<cv::cudacodec::VideoWriter> d_writer;
-
-        cv::cuda::GpuMat d_frame;
-
-        for (int i = 0; i < 10; ++i)
-        {
-            reader >> frame;
-            ASSERT_FALSE(frame.empty());
-
-            d_frame.upload(frame);
-
-            if (d_writer.empty())
-                d_writer = cv::cudacodec::createVideoWriter(outputFile, frame.size(), FPS);
-
-            startTimer(); next();
-            d_writer->write(d_frame);
+    ASSERT_TRUE(reader.isOpened());
+    Mat frameBgr;
+    if (PERF_RUN_CUDA()) {
+        const std::string ext = codec == cudacodec::Codec::H264 ? ".h264" : ".hevc";
+        const string outputFile = cv::tempfile(ext.c_str());
+        std::vector<GpuMat> frames;
+        cv::Mat frameNewSf;
+        cuda::Stream stream;
+        ColorConversionCodes conversionCode = COLOR_COLORCVT_MAX;
+        switch (surfaceFormat) {
+        case cudacodec::ColorFormat::RGB:
+            conversionCode = COLOR_BGR2RGB;
+            break;
+        case cudacodec::ColorFormat::BGRA:
+            conversionCode = COLOR_BGR2BGRA;
+            break;
+        case cudacodec::ColorFormat::RGBA:
+            conversionCode = COLOR_BGR2RGBA;
+            break;
+        case cudacodec::ColorFormat::GRAY:
+            conversionCode = COLOR_BGR2GRAY;
+        default:
+            break;
+        }
+        for (int i = 0; i < nFrames; i++) {
+            reader >> frameBgr;
+            ASSERT_FALSE(frameBgr.empty());
+            if (conversionCode == COLOR_COLORCVT_MAX)
+                frameNewSf = frameBgr;
+            else
+                cv::cvtColor(frameBgr, frameNewSf, conversionCode);
+            GpuMat frame; frame.upload(frameNewSf, stream);
+            frames.push_back(frame);
+        }
+        stream.waitForCompletion();
+        cv::Ptr<cv::cudacodec::VideoWriter> d_writer = cv::cudacodec::createVideoWriter(outputFile, frameBgr.size(), codec, fps, surfaceFormat, 0, stream);
+        for (int i = 0; i < nFrames - 1; ++i) {
+            startTimer();
+            d_writer->write(frames[i]);
            stopTimer();
        }
+        startTimer();
+        d_writer->write(frames[nFrames - 1]);
+        d_writer->release();
+        stopTimer();
+
+        ASSERT_EQ(0, remove(outputFile.c_str()));
    }
-    else
-    {
+    else {
+        if (surfaceFormat != cv::cudacodec::ColorFormat::BGR || codec != cv::cudacodec::Codec::H264)
+            throw PerfSkipTestException();
        cv::VideoWriter writer;
-
-        for (int i = 0; i < 10; ++i)
-        {
-            reader >> frame;
-            ASSERT_FALSE(frame.empty());
-
+        const string outputFile = cv::tempfile(".avi");
+        for (int i = 0; i < nFrames-1; ++i) {
+            reader >> frameBgr;
+            ASSERT_FALSE(frameBgr.empty());
            if (!writer.isOpened())
-                writer.open(outputFile, VideoWriter::fourcc('X', 'V', 'I', 'D'), FPS, frame.size());
-
-            startTimer(); next();
-            writer.write(frame);
+                writer.open(outputFile, VideoWriter::fourcc('X', 'V', 'I', 'D'), fps, frameBgr.size());
+            startTimer();
+            writer.write(frameBgr);
            stopTimer();
        }
+        reader >> frameBgr;
+        ASSERT_FALSE(frameBgr.empty());
+        startTimer();
+        writer.write(frameBgr);
+        writer.release();
+        stopTimer();
+
+        ASSERT_EQ(0, remove(outputFile.c_str()));
    }
-
-    SANITY_CHECK(frame);
+    SANITY_CHECK(frameBgr);
 }

+#endif
 #endif
 }} // namespace
--- a/modules/cudacodec/src/NvEncoder.cpp
+++ b/modules/cudacodec/src/NvEncoder.cpp
@ -0,0 +1,786 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#include "precomp.hpp"
+
+#if defined(HAVE_NVCUVENC)
+#include "NvEncoder.h"
+
+namespace cv { namespace cudacodec {
+#ifndef _WIN32
+#include <cstring>
+static inline bool operator==(const GUID& guid1, const GUID& guid2) {
+    return !memcmp(&guid1, &guid2, sizeof(GUID));
+}
+
+static inline bool operator!=(const GUID& guid1, const GUID& guid2) {
+    return !(guid1 == guid2);
+}
+#endif
+
+NvEncoder::NvEncoder(NV_ENC_DEVICE_TYPE eDeviceType, void* pDevice, uint32_t nWidth, uint32_t nHeight, NV_ENC_BUFFER_FORMAT eBufferFormat,
+    uint32_t nExtraOutputDelay) :
+    m_hEncoder(nullptr),
+    m_pDevice(pDevice),
+    m_eDeviceType(eDeviceType),
+    m_nWidth(nWidth),
+    m_nHeight(nHeight),
+    m_nMaxEncodeWidth(nWidth),
+    m_nMaxEncodeHeight(nHeight),
+    m_eBufferFormat(eBufferFormat),
+    m_nExtraOutputDelay(nExtraOutputDelay)
+{
+    LoadNvEncApi();
+
+    if (!m_nvenc.nvEncOpenEncodeSession)
+    {
+        m_nEncoderBuffer = 0;
+        NVENC_THROW_ERROR("EncodeAPI not found", NV_ENC_ERR_NO_ENCODE_DEVICE);
+    }
+
+    NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS encodeSessionExParams = {};
+    encodeSessionExParams.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
+    encodeSessionExParams.device = m_pDevice;
+    encodeSessionExParams.deviceType = m_eDeviceType;
+    encodeSessionExParams.apiVersion = NVENCAPI_VERSION;
+    void* hEncoder = NULL;
+    NVENC_API_CALL(m_nvenc.nvEncOpenEncodeSessionEx(&encodeSessionExParams, &hEncoder));
+    m_hEncoder = hEncoder;
+}
+
+void NvEncoder::LoadNvEncApi()
+{
+
+    uint32_t version = 0;
+    uint32_t currentVersion = (NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION;
+    NVENC_API_CALL(NvEncodeAPIGetMaxSupportedVersion(&version));
+    if (currentVersion > version)
+    {
+        NVENC_THROW_ERROR("Current Driver Version does not support this NvEncodeAPI version, please upgrade driver", NV_ENC_ERR_INVALID_VERSION);
+    }
+
+    m_nvenc = {};
+    m_nvenc.version = NV_ENCODE_API_FUNCTION_LIST_VER;
+    NVENC_API_CALL(NvEncodeAPICreateInstance(&m_nvenc));
+}
+
+NvEncoder::~NvEncoder()
+{
+    DestroyHWEncoder();
+}
+
+void NvEncoder::CreateDefaultEncoderParams(NV_ENC_INITIALIZE_PARAMS* pIntializeParams, GUID codecGuid, GUID presetGuid, NV_ENC_TUNING_INFO tuningInfo)
+{
+    if (!m_hEncoder)
+    {
+        NVENC_THROW_ERROR("Encoder Initialization failed", NV_ENC_ERR_NO_ENCODE_DEVICE);
+        return;
+    }
+
+    if (pIntializeParams == nullptr || pIntializeParams->encodeConfig == nullptr)
+    {
+        NVENC_THROW_ERROR("pInitializeParams and pInitializeParams->encodeConfig can't be NULL", NV_ENC_ERR_INVALID_PTR);
+    }
+
+    memset(pIntializeParams->encodeConfig, 0, sizeof(NV_ENC_CONFIG));
+    auto pEncodeConfig = pIntializeParams->encodeConfig;
+    memset(pIntializeParams, 0, sizeof(NV_ENC_INITIALIZE_PARAMS));
+    pIntializeParams->encodeConfig = pEncodeConfig;
+
+
+    pIntializeParams->encodeConfig->version = NV_ENC_CONFIG_VER;
+    pIntializeParams->version = NV_ENC_INITIALIZE_PARAMS_VER;
+
+    pIntializeParams->encodeGUID = codecGuid;
+    pIntializeParams->presetGUID = presetGuid;
+    pIntializeParams->encodeWidth = m_nWidth;
+    pIntializeParams->encodeHeight = m_nHeight;
+    pIntializeParams->darWidth = m_nWidth;
+    pIntializeParams->darHeight = m_nHeight;
+    pIntializeParams->frameRateNum = 30;
+    pIntializeParams->frameRateDen = 1;
+    pIntializeParams->enablePTD = 1;
+    pIntializeParams->reportSliceOffsets = 0;
+    pIntializeParams->enableSubFrameWrite = 0;
+    pIntializeParams->maxEncodeWidth = m_nWidth;
+    pIntializeParams->maxEncodeHeight = m_nHeight;
+    pIntializeParams->enableMEOnlyMode = false;
+    pIntializeParams->enableOutputInVidmem = false;
+#if defined(_WIN32)
+    pIntializeParams->enableEncodeAsync = GetCapabilityValue(codecGuid, NV_ENC_CAPS_ASYNC_ENCODE_SUPPORT);
+#endif
+    pIntializeParams->tuningInfo = tuningInfo;
+    NV_ENC_PRESET_CONFIG presetConfig = {};
+    presetConfig.version = NV_ENC_PRESET_CONFIG_VER;
+    presetConfig.presetCfg.version = NV_ENC_CONFIG_VER;
+    m_nvenc.nvEncGetEncodePresetConfigEx(m_hEncoder, codecGuid, presetGuid, tuningInfo, &presetConfig);
+    memcpy(pIntializeParams->encodeConfig, &presetConfig.presetCfg, sizeof(NV_ENC_CONFIG));
+
+    if (pIntializeParams->encodeGUID == NV_ENC_CODEC_H264_GUID)
+    {
+        if (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444 || m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)
+        {
+            pIntializeParams->encodeConfig->encodeCodecConfig.h264Config.chromaFormatIDC = 3;
+        }
+        pIntializeParams->encodeConfig->encodeCodecConfig.h264Config.idrPeriod = pIntializeParams->encodeConfig->gopLength;
+    }
+    else if (pIntializeParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID)
+    {
+        pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig.pixelBitDepthMinus8 =
+            (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT || m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) ? 2 : 0;
+        if (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444 || m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)
+        {
+            pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig.chromaFormatIDC = 3;
+        }
+        pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig.idrPeriod = pIntializeParams->encodeConfig->gopLength;
+    }
+
+    return;
+}
+
+void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS* pEncoderParams)
+{
+    if (!m_hEncoder)
+    {
+        NVENC_THROW_ERROR("Encoder Initialization failed", NV_ENC_ERR_NO_ENCODE_DEVICE);
+    }
+
+    if (!pEncoderParams)
+    {
+        NVENC_THROW_ERROR("Invalid NV_ENC_INITIALIZE_PARAMS ptr", NV_ENC_ERR_INVALID_PTR);
+    }
+
+    if (pEncoderParams->encodeWidth == 0 || pEncoderParams->encodeHeight == 0)
+    {
+        NVENC_THROW_ERROR("Invalid encoder width and height", NV_ENC_ERR_INVALID_PARAM);
+    }
+
+    if (pEncoderParams->encodeGUID != NV_ENC_CODEC_H264_GUID && pEncoderParams->encodeGUID != NV_ENC_CODEC_HEVC_GUID)
+    {
+        NVENC_THROW_ERROR("Invalid codec guid", NV_ENC_ERR_INVALID_PARAM);
+    }
+
+    if (pEncoderParams->encodeGUID == NV_ENC_CODEC_H264_GUID)
+    {
+        if (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT || m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)
+        {
+            NVENC_THROW_ERROR("10-bit format isn't supported by H264 encoder", NV_ENC_ERR_INVALID_PARAM);
+        }
+    }
+
+    // set other necessary params if not set yet
+    if (pEncoderParams->encodeGUID == NV_ENC_CODEC_H264_GUID)
+    {
+        if ((m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444) &&
+            (pEncoderParams->encodeConfig->encodeCodecConfig.h264Config.chromaFormatIDC != 3))
+        {
+            NVENC_THROW_ERROR("Invalid ChromaFormatIDC", NV_ENC_ERR_INVALID_PARAM);
+        }
+    }
+
+    if (pEncoderParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID)
+    {
+        bool yuv10BitFormat = (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT || m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) ? true : false;
+        if (yuv10BitFormat && pEncoderParams->encodeConfig->encodeCodecConfig.hevcConfig.pixelBitDepthMinus8 != 2)
+        {
+            NVENC_THROW_ERROR("Invalid PixelBitdepth", NV_ENC_ERR_INVALID_PARAM);
+        }
+
+        if ((m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444 || m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) &&
+            (pEncoderParams->encodeConfig->encodeCodecConfig.hevcConfig.chromaFormatIDC != 3))
+        {
+            NVENC_THROW_ERROR("Invalid ChromaFormatIDC", NV_ENC_ERR_INVALID_PARAM);
+        }
+    }
+
+    memcpy(&m_initializeParams, pEncoderParams, sizeof(m_initializeParams));
+    m_initializeParams.version = NV_ENC_INITIALIZE_PARAMS_VER;
+
+    if (pEncoderParams->encodeConfig)
+    {
+        memcpy(&m_encodeConfig, pEncoderParams->encodeConfig, sizeof(m_encodeConfig));
+        m_encodeConfig.version = NV_ENC_CONFIG_VER;
+    }
+    else
+    {
+        NV_ENC_PRESET_CONFIG presetConfig = {};
+        presetConfig.version = NV_ENC_PRESET_CONFIG_VER;
+        presetConfig.presetCfg.version = NV_ENC_CONFIG_VER;
+        m_nvenc.nvEncGetEncodePresetConfigEx(m_hEncoder, pEncoderParams->encodeGUID, pEncoderParams->presetGUID, pEncoderParams->tuningInfo, &presetConfig);
+        memcpy(&m_encodeConfig, &presetConfig.presetCfg, sizeof(NV_ENC_CONFIG));
+    }
+    m_initializeParams.encodeConfig = &m_encodeConfig;
+    NVENC_API_CALL(m_nvenc.nvEncInitializeEncoder(m_hEncoder, &m_initializeParams));
+    m_bEncoderInitialized = true;
+    m_nWidth = m_initializeParams.encodeWidth;
+    m_nHeight = m_initializeParams.encodeHeight;
+    m_nMaxEncodeWidth = m_initializeParams.maxEncodeWidth;
+    m_nMaxEncodeHeight = m_initializeParams.maxEncodeHeight;
+
+    m_nEncoderBuffer = m_encodeConfig.frameIntervalP + m_encodeConfig.rcParams.lookaheadDepth + m_nExtraOutputDelay;
+    m_nOutputDelay = m_nEncoderBuffer - 1;
+
+    m_vpCompletionEvent.resize(m_nEncoderBuffer, nullptr);
+
+#if defined(_WIN32)
+    for (uint32_t i = 0; i < m_vpCompletionEvent.size(); i++)
+    {
+        m_vpCompletionEvent[i] = CreateEvent(NULL, FALSE, FALSE, NULL);
+        NV_ENC_EVENT_PARAMS eventParams = { NV_ENC_EVENT_PARAMS_VER };
+        eventParams.completionEvent = m_vpCompletionEvent[i];
+        m_nvenc.nvEncRegisterAsyncEvent(m_hEncoder, &eventParams);
+    }
+#endif
+
+    m_vMappedInputBuffers.resize(m_nEncoderBuffer, nullptr);
+    m_vBitstreamOutputBuffer.resize(m_nEncoderBuffer, nullptr);
+    InitializeBitstreamBuffer();
+    AllocateInputBuffers(m_nEncoderBuffer);
+}
+
+void NvEncoder::DestroyEncoder()
+{
+    if (!m_hEncoder)
+    {
+        return;
+    }
+
+    ReleaseInputBuffers();
+
+    DestroyHWEncoder();
+}
+
+void NvEncoder::DestroyHWEncoder()
+{
+    if (!m_hEncoder)
+    {
+        return;
+    }
+
+#if defined(_WIN32)
+    for (uint32_t i = 0; i < m_vpCompletionEvent.size(); i++)
+    {
+        if (m_vpCompletionEvent[i])
+        {
+            NV_ENC_EVENT_PARAMS eventParams = { NV_ENC_EVENT_PARAMS_VER };
+            eventParams.completionEvent = m_vpCompletionEvent[i];
+            m_nvenc.nvEncUnregisterAsyncEvent(m_hEncoder, &eventParams);
+            CloseHandle(m_vpCompletionEvent[i]);
+        }
+    }
+    m_vpCompletionEvent.clear();
+#endif
+
+    DestroyBitstreamBuffer();
+
+    m_nvenc.nvEncDestroyEncoder(m_hEncoder);
+
+    m_hEncoder = nullptr;
+
+    m_bEncoderInitialized = false;
+}
+
+const NvEncInputFrame* NvEncoder::GetNextInputFrame()
+{
+    int i = m_iToSend % m_nEncoderBuffer;
+    return &m_vInputFrames[i];
+}
+
+void NvEncoder::MapResources(uint32_t bfrIdx)
+{
+    NV_ENC_MAP_INPUT_RESOURCE mapInputResource = {};
+    mapInputResource.version = NV_ENC_MAP_INPUT_RESOURCE_VER;
+    mapInputResource.registeredResource = m_vRegisteredResources[bfrIdx];
+    NVENC_API_CALL(m_nvenc.nvEncMapInputResource(m_hEncoder, &mapInputResource));
+    m_vMappedInputBuffers[bfrIdx] = mapInputResource.mappedResource;
+}
+
+void NvEncoder::EncodeFrame(std::vector<std::vector<uint8_t>>& vPacket, NV_ENC_PIC_PARAMS* pPicParams)
+{
+    vPacket.clear();
+    if (!IsHWEncoderInitialized())
+    {
+        NVENC_THROW_ERROR("Encoder device not found", NV_ENC_ERR_NO_ENCODE_DEVICE);
+    }
+
+    int bfrIdx = m_iToSend % m_nEncoderBuffer;
+
+    MapResources(bfrIdx);
+
+    NVENCSTATUS nvStatus = DoEncode(m_vMappedInputBuffers[bfrIdx], m_vBitstreamOutputBuffer[bfrIdx], pPicParams);
+
+    if (nvStatus == NV_ENC_SUCCESS || nvStatus == NV_ENC_ERR_NEED_MORE_INPUT)
+    {
+        m_iToSend++;
+        GetEncodedPacket(m_vBitstreamOutputBuffer, vPacket, true);
+    }
+    else
+    {
+        NVENC_THROW_ERROR("nvEncEncodePicture API failed", nvStatus);
+    }
+}
+
+void NvEncoder::GetSequenceParams(std::vector<uint8_t>& seqParams)
+{
+    uint8_t spsppsData[1024]; // Assume maximum spspps data is 1KB or less
+    memset(spsppsData, 0, sizeof(spsppsData));
+    NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = {};
+    payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
+    uint32_t spsppsSize = 0;
+
+    payload.spsppsBuffer = spsppsData;
+    payload.inBufferSize = sizeof(spsppsData);
+    payload.outSPSPPSPayloadSize = &spsppsSize;
+    NVENC_API_CALL(m_nvenc.nvEncGetSequenceParams(m_hEncoder, &payload));
+    seqParams.clear();
+    seqParams.insert(seqParams.end(), &spsppsData[0], &spsppsData[spsppsSize]);
+}
+
+NVENCSTATUS NvEncoder::DoEncode(NV_ENC_INPUT_PTR inputBuffer, NV_ENC_OUTPUT_PTR outputBuffer, NV_ENC_PIC_PARAMS* pPicParams)
+{
+    NV_ENC_PIC_PARAMS picParams = {};
+    if (pPicParams)
+    {
+        picParams = *pPicParams;
+    }
+    picParams.version = NV_ENC_PIC_PARAMS_VER;
+    picParams.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
+    picParams.inputBuffer = inputBuffer;
+    picParams.bufferFmt = GetPixelFormat();
+    picParams.inputWidth = GetEncodeWidth();
+    picParams.inputHeight = GetEncodeHeight();
+    picParams.outputBitstream = outputBuffer;
+    picParams.completionEvent = GetCompletionEvent(m_iToSend % m_nEncoderBuffer);
+    NVENCSTATUS nvStatus = m_nvenc.nvEncEncodePicture(m_hEncoder, &picParams);
+
+    return nvStatus;
+}
+
+void NvEncoder::SendEOS()
+{
+    NV_ENC_PIC_PARAMS picParams = {};
+    picParams.version = NV_ENC_PIC_PARAMS_VER;
+
+    picParams.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
+    picParams.completionEvent = GetCompletionEvent(m_iToSend % m_nEncoderBuffer);
+    NVENC_API_CALL(m_nvenc.nvEncEncodePicture(m_hEncoder, &picParams));
+}
+
+void NvEncoder::EndEncode(std::vector<std::vector<uint8_t>>& vPacket)
+{
+    vPacket.clear();
+    if (!IsHWEncoderInitialized())
+    {
+        NVENC_THROW_ERROR("Encoder device not initialized", NV_ENC_ERR_ENCODER_NOT_INITIALIZED);
+    }
+
+    SendEOS();
+
+    GetEncodedPacket(m_vBitstreamOutputBuffer, vPacket, false);
+}
+
+void NvEncoder::GetEncodedPacket(std::vector<NV_ENC_OUTPUT_PTR>& vOutputBuffer, std::vector<std::vector<uint8_t>>& vPacket, bool bOutputDelay)
+{
+    unsigned i = 0;
+    int iEnd = bOutputDelay ? m_iToSend - m_nOutputDelay : m_iToSend;
+    for (; m_iGot < iEnd; m_iGot++)
+    {
+        WaitForCompletionEvent(m_iGot % m_nEncoderBuffer);
+        NV_ENC_LOCK_BITSTREAM lockBitstreamData = {};
+        lockBitstreamData.version = NV_ENC_LOCK_BITSTREAM_VER;
+        lockBitstreamData.outputBitstream = vOutputBuffer[m_iGot % m_nEncoderBuffer];
+        lockBitstreamData.doNotWait = false;
+        NVENC_API_CALL(m_nvenc.nvEncLockBitstream(m_hEncoder, &lockBitstreamData));
+
+        uint8_t* pData = (uint8_t*)lockBitstreamData.bitstreamBufferPtr;
+        if (vPacket.size() < i + 1)
+        {
+            vPacket.push_back(std::vector<uint8_t>());
+        }
+        vPacket[i].clear();
+        vPacket[i].insert(vPacket[i].end(), &pData[0], &pData[lockBitstreamData.bitstreamSizeInBytes]);
+        i++;
+
+        NVENC_API_CALL(m_nvenc.nvEncUnlockBitstream(m_hEncoder, lockBitstreamData.outputBitstream));
+
+        if (m_vMappedInputBuffers[m_iGot % m_nEncoderBuffer])
+        {
+            NVENC_API_CALL(m_nvenc.nvEncUnmapInputResource(m_hEncoder, m_vMappedInputBuffers[m_iGot % m_nEncoderBuffer]));
+            m_vMappedInputBuffers[m_iGot % m_nEncoderBuffer] = nullptr;
+        }
+    }
+}
+
+bool NvEncoder::Reconfigure(const NV_ENC_RECONFIGURE_PARAMS* pReconfigureParams)
+{
+    NVENC_API_CALL(m_nvenc.nvEncReconfigureEncoder(m_hEncoder, const_cast<NV_ENC_RECONFIGURE_PARAMS*>(pReconfigureParams)));
+
+    memcpy(&m_initializeParams, &(pReconfigureParams->reInitEncodeParams), sizeof(m_initializeParams));
+    if (pReconfigureParams->reInitEncodeParams.encodeConfig)
+    {
+        memcpy(&m_encodeConfig, pReconfigureParams->reInitEncodeParams.encodeConfig, sizeof(m_encodeConfig));
+    }
+
+    m_nWidth = m_initializeParams.encodeWidth;
+    m_nHeight = m_initializeParams.encodeHeight;
+    m_nMaxEncodeWidth = m_initializeParams.maxEncodeWidth;
+    m_nMaxEncodeHeight = m_initializeParams.maxEncodeHeight;
+
+    return true;
+}
+
+NV_ENC_REGISTERED_PTR NvEncoder::RegisterResource(void* pBuffer, NV_ENC_INPUT_RESOURCE_TYPE eResourceType,
+    int width, int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat, NV_ENC_BUFFER_USAGE bufferUsage,
+    NV_ENC_FENCE_POINT_D3D12* pInputFencePoint, NV_ENC_FENCE_POINT_D3D12* pOutputFencePoint)
+{
+    NV_ENC_REGISTER_RESOURCE registerResource = {};
+    registerResource.version = NV_ENC_REGISTER_RESOURCE_VER;
+    registerResource.resourceType = eResourceType;
+    registerResource.resourceToRegister = pBuffer;
+    registerResource.width = width;
+    registerResource.height = height;
+    registerResource.pitch = pitch;
+    registerResource.bufferFormat = bufferFormat;
+    registerResource.bufferUsage = bufferUsage;
+    registerResource.pInputFencePoint = pInputFencePoint;
+    registerResource.pOutputFencePoint = pOutputFencePoint;
+    NVENC_API_CALL(m_nvenc.nvEncRegisterResource(m_hEncoder, &registerResource));
+
+    return registerResource.registeredResource;
+}
+
+void NvEncoder::RegisterInputResources(std::vector<void*> inputframes, NV_ENC_INPUT_RESOURCE_TYPE eResourceType,
+    int width, int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat, bool bReferenceFrame)
+{
+    for (uint32_t i = 0; i < inputframes.size(); ++i)
+    {
+        NV_ENC_REGISTERED_PTR registeredPtr = RegisterResource(inputframes[i], eResourceType, width, height, pitch, bufferFormat, NV_ENC_INPUT_IMAGE);
+
+        std::vector<uint32_t> _chromaOffsets;
+        NvEncoder::GetChromaSubPlaneOffsets(bufferFormat, pitch, height, _chromaOffsets);
+        NvEncInputFrame inputframe = {};
+        inputframe.inputPtr = (void*)inputframes[i];
+        inputframe.chromaOffsets[0] = 0;
+        inputframe.chromaOffsets[1] = 0;
+        for (uint32_t ch = 0; ch < _chromaOffsets.size(); ch++)
+        {
+            inputframe.chromaOffsets[ch] = _chromaOffsets[ch];
+        }
+        inputframe.numChromaPlanes = NvEncoder::GetNumChromaPlanes(bufferFormat);
+        inputframe.pitch = pitch;
+        inputframe.chromaPitch = NvEncoder::GetChromaPitch(bufferFormat, pitch);
+        inputframe.bufferFormat = bufferFormat;
+        inputframe.resourceType = eResourceType;
+
+        if (bReferenceFrame)
+        {
+            m_vRegisteredResourcesForReference.push_back(registeredPtr);
+            m_vReferenceFrames.push_back(inputframe);
+        }
+        else
+        {
+            m_vRegisteredResources.push_back(registeredPtr);
+            m_vInputFrames.push_back(inputframe);
+        }
+    }
+}
+
+void NvEncoder::FlushEncoder()
+{
+    try
+    {
+        std::vector<std::vector<uint8_t>> vPacket;
+        EndEncode(vPacket);
+    }
+    catch (...)
+    {
+
+    }
+}
+
+void NvEncoder::UnregisterInputResources()
+{
+    FlushEncoder();
+
+    m_vMappedRefBuffers.clear();
+
+    for (uint32_t i = 0; i < m_vMappedInputBuffers.size(); ++i)
+    {
+        if (m_vMappedInputBuffers[i])
+        {
+            m_nvenc.nvEncUnmapInputResource(m_hEncoder, m_vMappedInputBuffers[i]);
+        }
+    }
+    m_vMappedInputBuffers.clear();
+
+    for (uint32_t i = 0; i < m_vRegisteredResources.size(); ++i)
+    {
+        if (m_vRegisteredResources[i])
+        {
+            m_nvenc.nvEncUnregisterResource(m_hEncoder, m_vRegisteredResources[i]);
+        }
+    }
+    m_vRegisteredResources.clear();
+
+
+    for (uint32_t i = 0; i < m_vRegisteredResourcesForReference.size(); ++i)
+    {
+        if (m_vRegisteredResourcesForReference[i])
+        {
+            m_nvenc.nvEncUnregisterResource(m_hEncoder, m_vRegisteredResourcesForReference[i]);
+        }
+    }
+    m_vRegisteredResourcesForReference.clear();
+
+}
+
+
+void NvEncoder::WaitForCompletionEvent(int iEvent)
+{
+#if defined(_WIN32)
+    // Check if we are in async mode. If not, don't wait for event;
+    NV_ENC_CONFIG sEncodeConfig = { 0 };
+    NV_ENC_INITIALIZE_PARAMS sInitializeParams = { 0 };
+    sInitializeParams.encodeConfig = &sEncodeConfig;
+    GetInitializeParams(&sInitializeParams);
+
+    if (0U == sInitializeParams.enableEncodeAsync)
+    {
+        return;
+    }
+#ifdef DEBUG
+    WaitForSingleObject(m_vpCompletionEvent[iEvent], INFINITE);
+#else
+    // wait for 20s which is infinite on terms of gpu time
+    if (WaitForSingleObject(m_vpCompletionEvent[iEvent], 20000) == WAIT_FAILED)
+    {
+        NVENC_THROW_ERROR("Failed to encode frame", NV_ENC_ERR_GENERIC);
+    }
+#endif
+#endif
+}
+
+uint32_t NvEncoder::GetWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t width)
+{
+    switch (bufferFormat) {
+    case NV_ENC_BUFFER_FORMAT_NV12:
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+        return width;
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+        return width * 2;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+        return width * 4;
+    default:
+        NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+    }
+}
+
+uint32_t NvEncoder::GetNumChromaPlanes(const NV_ENC_BUFFER_FORMAT bufferFormat)
+{
+    switch (bufferFormat)
+    {
+    case NV_ENC_BUFFER_FORMAT_NV12:
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+        return 1;
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+        return 2;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+        return 0;
+    default:
+        NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+    }
+}
+
+uint32_t NvEncoder::GetChromaPitch(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaPitch)
+{
+    switch (bufferFormat)
+    {
+    case NV_ENC_BUFFER_FORMAT_NV12:
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+        return lumaPitch;
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+        return (lumaPitch + 1) / 2;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+        return 0;
+    default:
+        NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+    }
+}
+
+void NvEncoder::GetChromaSubPlaneOffsets(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t pitch, const uint32_t height, std::vector<uint32_t>& chromaOffsets)
+{
+    chromaOffsets.clear();
+    switch (bufferFormat)
+    {
+    case NV_ENC_BUFFER_FORMAT_NV12:
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+        chromaOffsets.push_back(pitch * height);
+        return;
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+        chromaOffsets.push_back(pitch * height);
+        chromaOffsets.push_back(chromaOffsets[0] + (NvEncoder::GetChromaPitch(bufferFormat, pitch) * GetChromaHeight(bufferFormat, height)));
+        return;
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+        chromaOffsets.push_back(pitch * height);
+        chromaOffsets.push_back(chromaOffsets[0] + (pitch * height));
+        return;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+        return;
+    default:
+        NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+    }
+}
+
+uint32_t NvEncoder::GetChromaHeight(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaHeight)
+{
+    switch (bufferFormat)
+    {
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+    case NV_ENC_BUFFER_FORMAT_NV12:
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+        return (lumaHeight + 1) / 2;
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+        return lumaHeight;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+        return 0;
+    default:
+        NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+    }
+}
+
+uint32_t NvEncoder::GetChromaWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaWidth)
+{
+    switch (bufferFormat)
+    {
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+        return (lumaWidth + 1) / 2;
+    case NV_ENC_BUFFER_FORMAT_NV12:
+        return lumaWidth;
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+        return 2 * lumaWidth;
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+        return lumaWidth;
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+        return 2 * lumaWidth;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+        return 0;
+    default:
+        NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+    }
+}
+
+
+int NvEncoder::GetCapabilityValue(GUID guidCodec, NV_ENC_CAPS capsToQuery)
+{
+    if (!m_hEncoder)
+    {
+        return 0;
+    }
+    NV_ENC_CAPS_PARAM capsParam = {};
+    capsParam.version = NV_ENC_CAPS_PARAM_VER;
+    capsParam.capsToQuery = capsToQuery;
+    int v;
+    m_nvenc.nvEncGetEncodeCaps(m_hEncoder, guidCodec, &capsParam, &v);
+    return v;
+}
+
+int NvEncoder::GetFrameSize() const
+{
+    switch (GetPixelFormat())
+    {
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+    case NV_ENC_BUFFER_FORMAT_NV12:
+        return GetEncodeWidth() * (GetEncodeHeight() + (GetEncodeHeight() + 1) / 2);
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+        return 2 * GetEncodeWidth() * (GetEncodeHeight() + (GetEncodeHeight() + 1) / 2);
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+        return GetEncodeWidth() * GetEncodeHeight() * 3;
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+        return 2 * GetEncodeWidth() * GetEncodeHeight() * 3;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+        return 4 * GetEncodeWidth() * GetEncodeHeight();
+    default:
+        NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+    }
+}
+
+void NvEncoder::GetInitializeParams(NV_ENC_INITIALIZE_PARAMS* pInitializeParams)
+{
+    if (!pInitializeParams || !pInitializeParams->encodeConfig)
+    {
+        NVENC_THROW_ERROR("Both pInitializeParams and pInitializeParams->encodeConfig can't be NULL", NV_ENC_ERR_INVALID_PTR);
+    }
+    NV_ENC_CONFIG* pEncodeConfig = pInitializeParams->encodeConfig;
+    *pEncodeConfig = m_encodeConfig;
+    *pInitializeParams = m_initializeParams;
+    pInitializeParams->encodeConfig = pEncodeConfig;
+}
+
+void NvEncoder::InitializeBitstreamBuffer()
+{
+    for (int i = 0; i < m_nEncoderBuffer; i++)
+    {
+        NV_ENC_CREATE_BITSTREAM_BUFFER createBitstreamBuffer = {};
+        createBitstreamBuffer.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
+        NVENC_API_CALL(m_nvenc.nvEncCreateBitstreamBuffer(m_hEncoder, &createBitstreamBuffer));
+        m_vBitstreamOutputBuffer[i] = createBitstreamBuffer.bitstreamBuffer;
+    }
+}
+
+void NvEncoder::DestroyBitstreamBuffer()
+{
+    for (uint32_t i = 0; i < m_vBitstreamOutputBuffer.size(); i++)
+    {
+        if (m_vBitstreamOutputBuffer[i])
+        {
+            m_nvenc.nvEncDestroyBitstreamBuffer(m_hEncoder, m_vBitstreamOutputBuffer[i]);
+        }
+    }
+
+    m_vBitstreamOutputBuffer.clear();
+}
+}}
+#endif
--- a/modules/cudacodec/src/NvEncoder.h
+++ b/modules/cudacodec/src/NvEncoder.h
@ -0,0 +1,377 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_NVENCODER_HPP
+#define OPENCV_NVENCODER_HPP
+#include <vector>
+#include "nvEncodeAPI.h"
+#include <stdint.h>
+#include <mutex>
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <string.h>
+
+namespace cv { namespace cudacodec {
+
+#define NVENC_THROW_ERROR( errorStr, errorCode ) \
+do \
+{ \
+cv::String msg = cv::format("%s [Code = %d]", errorStr, errorCode); \
+cv::error(cv::Error::GpuApiCallError, msg, __FUNCTION__, __FILE__, __LINE__); \
+} while (0)
+
+
+#define NVENC_API_CALL( nvencAPI ) \
+do \
+{ \
+NVENCSTATUS errorCode = nvencAPI; \
+if( errorCode != NV_ENC_SUCCESS) \
+{ \
+cv::String msg = cv::format("NVENC returned error [Code = %d]", errorCode); \
+cv::error(cv::Error::GpuApiCallError, msg, __FUNCTION__, __FILE__, __LINE__); \
+} \
+} while (0)
+
+struct NvEncInputFrame
+{
+    void* inputPtr = nullptr;
+    uint32_t chromaOffsets[2];
+    uint32_t numChromaPlanes;
+    uint32_t pitch;
+    uint32_t chromaPitch;
+    NV_ENC_BUFFER_FORMAT bufferFormat;
+    NV_ENC_INPUT_RESOURCE_TYPE resourceType;
+};
+
+/**
+* @brief Shared base class for different encoder interfaces.
+*/
+class NvEncoder
+{
+public:
+    /**
+    * @brief This function is used to initialize the encoder session.
+    * Application must call this function to initialize the encoder, before
+    * starting to encode any frames.
+    */
+    virtual void CreateEncoder(const NV_ENC_INITIALIZE_PARAMS* pEncodeParams);
+
+    /**
+    * @brief This function is used to destroy the encoder session.
+    * Application must call this function to destroy the encoder session and
+    * clean up any allocated resources. The application must call EndEncode()
+    * function to get any queued encoded frames before calling DestroyEncoder().
+    */
+    virtual void DestroyEncoder();
+
+    /**
+    * @brief This function is used to reconfigure an existing encoder session.
+    * Application can use this function to dynamically change the bitrate,
+    * resolution and other QOS parameters. If the application changes the
+    * resolution, it must set NV_ENC_RECONFIGURE_PARAMS::forceIDR.
+    */
+    bool Reconfigure(const NV_ENC_RECONFIGURE_PARAMS* pReconfigureParams);
+
+    /**
+    * @brief This function is used to get the next available input buffer.
+    * Applications must call this function to obtain a pointer to the next
+    * input buffer. The application must copy the uncompressed data to the
+    * input buffer and then call EncodeFrame() function to encode it.
+    */
+    const NvEncInputFrame* GetNextInputFrame();
+
+
+    /**
+    * @brief This function is used to encode a frame.
+    * Applications must call EncodeFrame() function to encode the uncompressed
+    * data, which has been copied to an input buffer obtained from the
+    * GetNextInputFrame() function.
+    */
+    virtual void EncodeFrame(std::vector<std::vector<uint8_t>>& vPacket, NV_ENC_PIC_PARAMS* pPicParams = nullptr);
+
+    /**
+    * @brief This function to flush the encoder queue.
+    * The encoder might be queuing frames for B picture encoding or lookahead;
+    * the application must call EndEncode() to get all the queued encoded frames
+    * from the encoder. The application must call this function before destroying
+    * an encoder session.
+    */
+    virtual void EndEncode(std::vector<std::vector<uint8_t>>& vPacket);
+
+    /**
+    * @brief This function is used to query hardware encoder capabilities.
+    * Applications can call this function to query capabilities like maximum encode
+    * dimensions, support for lookahead or the ME-only mode etc.
+    */
+    int GetCapabilityValue(GUID guidCodec, NV_ENC_CAPS capsToQuery);
+
+    /**
+    * @brief This function is used to get the current device on which encoder is running.
+    */
+    void* GetDevice() const { return m_pDevice; }
+
+    /**
+    * @brief This function is used to get the current device type which encoder is running.
+    */
+    NV_ENC_DEVICE_TYPE GetDeviceType() const { return m_eDeviceType; }
+
+    /**
+    * @brief This function is used to get the current encode width.
+    * The encode width can be modified by Reconfigure() function.
+    */
+    int GetEncodeWidth() const { return m_nWidth; }
+
+    /**
+    * @brief This function is used to get the current encode height.
+    * The encode height can be modified by Reconfigure() function.
+    */
+    int GetEncodeHeight() const { return m_nHeight; }
+
+    /**
+    * @brief This function is used to get the current frame size based on pixel format.
+    */
+    int GetFrameSize() const;
+
+    /**
+    * @brief This function is used to initialize config parameters based on
+    * given codec and preset guids.
+    * The application can call this function to get the default configuration
+    * for a certain preset. The application can either use these parameters
+    * directly or override them with application-specific settings before
+    * using them in CreateEncoder() function.
+    */
+    void CreateDefaultEncoderParams(NV_ENC_INITIALIZE_PARAMS* pIntializeParams, GUID codecGuid, GUID presetGuid, NV_ENC_TUNING_INFO tuningInfo = NV_ENC_TUNING_INFO_UNDEFINED);
+
+    /**
+    * @brief This function is used to get the current initialization parameters,
+    * which had been used to configure the encoder session.
+    * The initialization parameters are modified if the application calls
+    * Reconfigure() function.
+    */
+    void GetInitializeParams(NV_ENC_INITIALIZE_PARAMS* pInitializeParams);
+
+    /**
+    * @brief This function is used to get sequence and picture parameter headers.
+    * Application can call this function after encoder is initialized to get SPS and PPS
+    * nalus for the current encoder instance. The sequence header data might change when
+    * application calls Reconfigure() function.
+    */
+    void GetSequenceParams(std::vector<uint8_t>& seqParams);
+
+    /**
+    * @brief NvEncoder class virtual destructor.
+    */
+    virtual ~NvEncoder();
+
+public:
+    /**
+    * @brief This a static function to get chroma offsets for YUV planar formats.
+    */
+    static void GetChromaSubPlaneOffsets(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t pitch,
+        const uint32_t height, std::vector<uint32_t>& chromaOffsets);
+    /**
+    * @brief This a static function to get the chroma plane pitch for YUV planar formats.
+    */
+    static uint32_t GetChromaPitch(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaPitch);
+
+    /**
+    * @brief This a static function to get the number of chroma planes for YUV planar formats.
+    */
+    static uint32_t GetNumChromaPlanes(const NV_ENC_BUFFER_FORMAT bufferFormat);
+
+    /**
+    * @brief This a static function to get the chroma plane width in bytes for YUV planar formats.
+    */
+    static uint32_t GetChromaWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaWidth);
+
+    /**
+    * @brief This a static function to get the chroma planes height in bytes for YUV planar formats.
+    */
+    static uint32_t GetChromaHeight(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaHeight);
+
+
+    /**
+    * @brief This a static function to get the width in bytes for the frame.
+    * For YUV planar format this is the width in bytes of the luma plane.
+    */
+    static uint32_t GetWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t width);
+
+    /**
+    * @brief This function returns the number of allocated buffers.
+    */
+    uint32_t GetEncoderBufferCount() const { return m_nEncoderBuffer; }
+protected:
+
+    /**
+    * @brief NvEncoder class constructor.
+    * NvEncoder class constructor cannot be called directly by the application.
+    */
+    NvEncoder(NV_ENC_DEVICE_TYPE eDeviceType, void* pDevice, uint32_t nWidth, uint32_t nHeight,
+        NV_ENC_BUFFER_FORMAT eBufferFormat, uint32_t nOutputDelay);
+
+    /**
+    * @brief This function is used to check if hardware encoder is properly initialized.
+    */
+    bool IsHWEncoderInitialized() const { return m_hEncoder != NULL && m_bEncoderInitialized; }
+
+    /**
+    * @brief This function is used to register CUDA, D3D or OpenGL input buffers with NvEncodeAPI.
+    * This is non public function and is called by derived class for allocating
+    * and registering input buffers.
+    */
+    void RegisterInputResources(std::vector<void*> inputframes, NV_ENC_INPUT_RESOURCE_TYPE eResourceType,
+        int width, int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat, bool bReferenceFrame = false);
+
+    /**
+    * @brief This function is used to unregister resources which had been previously registered for encoding
+    * using RegisterInputResources() function.
+    */
+    void UnregisterInputResources();
+
+    /**
+    * @brief This function is used to register CUDA, D3D or OpenGL input or output buffers with NvEncodeAPI.
+    */
+    NV_ENC_REGISTERED_PTR RegisterResource(void* pBuffer, NV_ENC_INPUT_RESOURCE_TYPE eResourceType,
+        int width, int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat, NV_ENC_BUFFER_USAGE bufferUsage = NV_ENC_INPUT_IMAGE,
+        NV_ENC_FENCE_POINT_D3D12* pInputFencePoint = NULL, NV_ENC_FENCE_POINT_D3D12* pOutputFencePoint = NULL);
+
+    /**
+    * @brief This function returns maximum width used to open the encoder session.
+    * All encode input buffers are allocated using maximum dimensions.
+    */
+    uint32_t GetMaxEncodeWidth() const { return m_nMaxEncodeWidth; }
+
+    /**
+    * @brief This function returns maximum height used to open the encoder session.
+    * All encode input buffers are allocated using maximum dimensions.
+    */
+    uint32_t GetMaxEncodeHeight() const { return m_nMaxEncodeHeight; }
+
+    /**
+    * @brief This function returns the completion event.
+    */
+    void* GetCompletionEvent(uint32_t eventIdx) { return (m_vpCompletionEvent.size() == m_nEncoderBuffer) ? m_vpCompletionEvent[eventIdx] : nullptr; }
+
+    /**
+    * @brief This function returns the current pixel format.
+    */
+    NV_ENC_BUFFER_FORMAT GetPixelFormat() const { return m_eBufferFormat; }
+
+    /**
+    * @brief This function is used to submit the encode commands to the
+    * NVENC hardware.
+    */
+    NVENCSTATUS DoEncode(NV_ENC_INPUT_PTR inputBuffer, NV_ENC_OUTPUT_PTR outputBuffer, NV_ENC_PIC_PARAMS* pPicParams);
+
+    /**
+    * @brief This function is used to submit the encode commands to the
+    * NVENC hardware for ME only mode.
+    */
+    //NVENCSTATUS DoMotionEstimation(NV_ENC_INPUT_PTR inputBuffer, NV_ENC_INPUT_PTR inputBufferForReference, NV_ENC_OUTPUT_PTR outputBuffer);
+
+    /**
+    * @brief This function is used to map the input buffers to NvEncodeAPI.
+    */
+    void MapResources(uint32_t bfrIdx);
+
+    /**
+    * @brief This function is used to wait for completion of encode command.
+    */
+    void WaitForCompletionEvent(int iEvent);
+
+    /**
+    * @brief This function is used to send EOS to HW encoder.
+    */
+    void SendEOS();
+
+private:
+    /**
+    * @brief This is a private function which is used to check if there is any
+    buffering done by encoder.
+    * The encoder generally buffers data to encode B frames or for lookahead
+    * or pipelining.
+    */
+    bool IsZeroDelay() { return m_nOutputDelay == 0; }
+
+    /**
+    * @brief This is a private function which is used to load the encode api shared library.
+    */
+    void LoadNvEncApi();
+
+    /**
+    * @brief This is a private function which is used to get the output packets
+    * from the encoder HW.
+    * This is called by DoEncode() function. If there is buffering enabled,
+    * this may return without any output data.
+    */
+    void GetEncodedPacket(std::vector<NV_ENC_OUTPUT_PTR>& vOutputBuffer, std::vector<std::vector<uint8_t>>& vPacket, bool bOutputDelay);
+
+    /**
+    * @brief This is a private function which is used to initialize the bitstream buffers.
+    * This is only used in the encoding mode.
+    */
+    void InitializeBitstreamBuffer();
+
+    /**
+    * @brief This is a private function which is used to destroy the bitstream buffers.
+    * This is only used in the encoding mode.
+    */
+    void DestroyBitstreamBuffer();
+
+    /**
+    * @brief This is a private function which is used to destroy HW encoder.
+    */
+    void DestroyHWEncoder();
+
+    /**
+    * @brief This function is used to flush the encoder queue.
+    */
+    void FlushEncoder();
+
+private:
+    /**
+    * @brief This is a pure virtual function which is used to allocate input buffers.
+    * The derived classes must implement this function.
+    */
+    virtual void AllocateInputBuffers(int32_t numInputBuffers) = 0;
+
+    /**
+    * @brief This is a pure virtual function which is used to destroy input buffers.
+    * The derived classes must implement this function.
+    */
+    virtual void ReleaseInputBuffers() = 0;
+
+protected:
+    void* m_hEncoder = nullptr;
+    NV_ENCODE_API_FUNCTION_LIST m_nvenc;
+    std::vector<NvEncInputFrame> m_vInputFrames;
+    std::vector<NV_ENC_REGISTERED_PTR> m_vRegisteredResources;
+    std::vector<NvEncInputFrame> m_vReferenceFrames;
+    std::vector<NV_ENC_REGISTERED_PTR> m_vRegisteredResourcesForReference;
+    std::vector<NV_ENC_INPUT_PTR> m_vMappedInputBuffers;
+    std::vector<NV_ENC_INPUT_PTR> m_vMappedRefBuffers;
+    std::vector<void*> m_vpCompletionEvent;
+
+    int32_t m_iToSend = 0;
+    int32_t m_iGot = 0;
+    int32_t m_nEncoderBuffer = 0;
+    int32_t m_nOutputDelay = 0;
+
+private:
+    void* m_pDevice;
+    NV_ENC_DEVICE_TYPE m_eDeviceType;
+    uint32_t m_nWidth;
+    uint32_t m_nHeight;
+    uint32_t m_nMaxEncodeWidth = 0;
+    uint32_t m_nMaxEncodeHeight = 0;
+    NV_ENC_BUFFER_FORMAT m_eBufferFormat;
+    NV_ENC_INITIALIZE_PARAMS m_initializeParams = {};
+    NV_ENC_CONFIG m_encodeConfig = {};
+    bool m_bEncoderInitialized = false;
+    uint32_t m_nExtraOutputDelay = 3; // To ensure encode and graphics can work in parallel, m_nExtraOutputDelay should be set to at least 1
+    std::vector<NV_ENC_OUTPUT_PTR> m_vBitstreamOutputBuffer;
+};
+}}
+#endif
--- a/modules/cudacodec/src/NvEncoderCuda.cpp
+++ b/modules/cudacodec/src/NvEncoderCuda.cpp
@ -0,0 +1,196 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#include "precomp.hpp"
+
+#if defined(HAVE_NVCUVENC)
+#include "NvEncoderCuda.h"
+
+namespace cv { namespace cudacodec {
+NvEncoderCuda::NvEncoderCuda(CUcontext cuContext, uint32_t nWidth, uint32_t nHeight, NV_ENC_BUFFER_FORMAT eBufferFormat,
+    uint32_t nExtraOutputDelay) :
+    NvEncoder(NV_ENC_DEVICE_TYPE_CUDA, cuContext, nWidth, nHeight, eBufferFormat, nExtraOutputDelay),
+    m_cuContext(cuContext)
+{
+    if (!m_hEncoder)
+    {
+        NVENC_THROW_ERROR("Encoder Initialization failed", NV_ENC_ERR_INVALID_DEVICE);
+    }
+
+    if (!m_cuContext)
+    {
+        NVENC_THROW_ERROR("Invalid Cuda Context", NV_ENC_ERR_INVALID_DEVICE);
+    }
+}
+
+NvEncoderCuda::~NvEncoderCuda()
+{
+    ReleaseCudaResources();
+}
+
+void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers)
+{
+    if (!IsHWEncoderInitialized())
+    {
+        NVENC_THROW_ERROR("Encoder intialization failed", NV_ENC_ERR_ENCODER_NOT_INITIALIZED);
+    }
+
+    cuSafeCall(cuCtxPushCurrent(m_cuContext));
+    std::vector<void*> inputFrames;
+    for (int i = 0; i < numInputBuffers; i++)
+    {
+        CUdeviceptr pDeviceFrame;
+        uint32_t chromaHeight = GetNumChromaPlanes(GetPixelFormat()) * GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight());
+        if (GetPixelFormat() == NV_ENC_BUFFER_FORMAT_YV12 || GetPixelFormat() == NV_ENC_BUFFER_FORMAT_IYUV)
+            chromaHeight = GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight());
+        cuSafeCall(cuMemAllocPitch((CUdeviceptr*)&pDeviceFrame,
+            &m_cudaPitch,
+            GetWidthInBytes(GetPixelFormat(), GetMaxEncodeWidth()),
+            GetMaxEncodeHeight() + chromaHeight, 16));
+        inputFrames.push_back((void*)pDeviceFrame);
+    }
+    cuSafeCall(cuCtxPopCurrent(NULL));
+
+    RegisterInputResources(inputFrames,
+        NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR,
+        GetMaxEncodeWidth(),
+        GetMaxEncodeHeight(),
+        (int)m_cudaPitch,
+        GetPixelFormat(),
+        false);
+}
+
+void NvEncoderCuda::SetIOCudaStreams(NV_ENC_CUSTREAM_PTR inputStream, NV_ENC_CUSTREAM_PTR outputStream)
+{
+    NVENC_API_CALL(m_nvenc.nvEncSetIOCudaStreams(m_hEncoder, inputStream, outputStream));
+}
+
+void NvEncoderCuda::ReleaseInputBuffers()
+{
+    ReleaseCudaResources();
+}
+
+void NvEncoderCuda::ReleaseCudaResources()
+{
+    if (!m_hEncoder)
+    {
+        return;
+    }
+
+    if (!m_cuContext)
+    {
+        return;
+    }
+
+    UnregisterInputResources();
+
+    cuCtxPushCurrent(m_cuContext);
+
+    for (uint32_t i = 0; i < m_vInputFrames.size(); ++i)
+    {
+        if (m_vInputFrames[i].inputPtr)
+        {
+            cuMemFree(reinterpret_cast<CUdeviceptr>(m_vInputFrames[i].inputPtr));
+        }
+    }
+    m_vInputFrames.clear();
+
+    for (uint32_t i = 0; i < m_vReferenceFrames.size(); ++i)
+    {
+        if (m_vReferenceFrames[i].inputPtr)
+        {
+            cuMemFree(reinterpret_cast<CUdeviceptr>(m_vReferenceFrames[i].inputPtr));
+        }
+    }
+    m_vReferenceFrames.clear();
+
+    cuCtxPopCurrent(NULL);
+    m_cuContext = nullptr;
+}
+
+void NvEncoderCuda::CopyToDeviceFrame(CUcontext device,
+    void* pSrcFrame,
+    uint32_t nSrcPitch,
+    CUdeviceptr pDstFrame,
+    uint32_t dstPitch,
+    int width,
+    int height,
+    CUmemorytype srcMemoryType,
+    NV_ENC_BUFFER_FORMAT pixelFormat,
+    const uint32_t dstChromaOffsets[],
+    uint32_t numChromaPlanes,
+    bool bUnAlignedDeviceCopy,
+    CUstream stream)
+{
+    if (srcMemoryType != CU_MEMORYTYPE_HOST && srcMemoryType != CU_MEMORYTYPE_DEVICE)
+    {
+        NVENC_THROW_ERROR("Invalid source memory type for copy", NV_ENC_ERR_INVALID_PARAM);
+    }
+
+    cuSafeCall(cuCtxPushCurrent(device));
+
+    uint32_t srcPitch = nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width);
+    CUDA_MEMCPY2D m = {};
+    m.srcMemoryType = srcMemoryType;
+    if (srcMemoryType == CU_MEMORYTYPE_HOST)
+    {
+        m.srcHost = pSrcFrame;
+    }
+    else
+    {
+        m.srcDevice = (CUdeviceptr)pSrcFrame;
+    }
+    m.srcPitch = srcPitch;
+    m.dstMemoryType = CU_MEMORYTYPE_DEVICE;
+    m.dstDevice = pDstFrame;
+    m.dstPitch = dstPitch;
+    m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width);
+    m.Height = height;
+    if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE)
+    {
+        cuSafeCall(cuMemcpy2DUnaligned(&m));
+    }
+    else
+    {
+        cuSafeCall(stream == NULL ? cuMemcpy2D(&m) : cuMemcpy2DAsync(&m, stream));
+    }
+
+    std::vector<uint32_t> srcChromaOffsets;
+    NvEncoder::GetChromaSubPlaneOffsets(pixelFormat, srcPitch, height, srcChromaOffsets);
+    uint32_t chromaHeight = NvEncoder::GetChromaHeight(pixelFormat, height);
+    uint32_t destChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, dstPitch);
+    uint32_t srcChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, srcPitch);
+    uint32_t chromaWidthInBytes = NvEncoder::GetChromaWidthInBytes(pixelFormat, width);
+
+    for (uint32_t i = 0; i < numChromaPlanes; ++i)
+    {
+        if (chromaHeight)
+        {
+            if (srcMemoryType == CU_MEMORYTYPE_HOST)
+            {
+                m.srcHost = ((uint8_t*)pSrcFrame + srcChromaOffsets[i]);
+            }
+            else
+            {
+                m.srcDevice = (CUdeviceptr)((uint8_t*)pSrcFrame + srcChromaOffsets[i]);
+            }
+            m.srcPitch = srcChromaPitch;
+
+            m.dstDevice = (CUdeviceptr)((uint8_t*)pDstFrame + dstChromaOffsets[i]);
+            m.dstPitch = destChromaPitch;
+            m.WidthInBytes = chromaWidthInBytes;
+            m.Height = chromaHeight;
+            if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE)
+            {
+                cuSafeCall(cuMemcpy2DUnaligned(&m));
+            }
+            else
+            {
+                cuSafeCall(stream == NULL ? cuMemcpy2D(&m) : cuMemcpy2DAsync(&m, stream));
+            }
+        }
+    }
+    cuSafeCall(cuCtxPopCurrent(NULL));
+}
+}}
+#endif
--- a/modules/cudacodec/src/NvEncoderCuda.h
+++ b/modules/cudacodec/src/NvEncoderCuda.h
@ -0,0 +1,75 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_NVENCODERCUDA_HPP
+#define OPENCV_NVENCODERCUDA_HPP
+#include <vector>
+#include <stdint.h>
+#include <mutex>
+#include <cuda.h>
+#include "NvEncoder.h"
+
+namespace cv { namespace cudacodec {
+
+/**
+* @brief Encoder for CUDA device memory.
+*/
+class NvEncoderCuda : public NvEncoder
+{
+public:
+    NvEncoderCuda(CUcontext cuContext, uint32_t nWidth, uint32_t nHeight, NV_ENC_BUFFER_FORMAT eBufferFormat,
+        uint32_t nExtraOutputDelay = 3);
+    virtual ~NvEncoderCuda();
+
+    /**
+    * @brief This is a static function to copy input data from host memory to device memory.
+    * This function assumes YUV plane is a single contiguous memory segment.
+    */
+    static void CopyToDeviceFrame(CUcontext device,
+        void* pSrcFrame,
+        uint32_t nSrcPitch,
+        CUdeviceptr pDstFrame,
+        uint32_t dstPitch,
+        int width,
+        int height,
+        CUmemorytype srcMemoryType,
+        NV_ENC_BUFFER_FORMAT pixelFormat,
+        const uint32_t dstChromaOffsets[],
+        uint32_t numChromaPlanes,
+        bool bUnAlignedDeviceCopy = false,
+        CUstream stream = NULL);
+
+    /**
+    * @brief This function sets input and output CUDA streams
+    */
+    void SetIOCudaStreams(NV_ENC_CUSTREAM_PTR inputStream, NV_ENC_CUSTREAM_PTR outputStream);
+
+protected:
+    /**
+    * @brief This function is used to release the input buffers allocated for encoding.
+    * This function is an override of virtual function NvEncoder::ReleaseInputBuffers().
+    */
+    virtual void ReleaseInputBuffers() override;
+
+private:
+    /**
+    * @brief This function is used to allocate input buffers for encoding.
+    * This function is an override of virtual function NvEncoder::AllocateInputBuffers().
+    */
+    virtual void AllocateInputBuffers(int32_t numInputBuffers) override;
+
+private:
+    /**
+    * @brief This is a private function to release CUDA device memory used for encoding.
+    */
+    void ReleaseCudaResources();
+
+protected:
+    CUcontext m_cuContext;
+
+private:
+    size_t m_cudaPitch = 0;
+};
+}}
+#endif
--- a/modules/cudacodec/src/precomp.hpp
+++ b/modules/cudacodec/src/precomp.hpp
@ -57,32 +57,38 @@
 #include "opencv2/core/private.cuda.hpp"
 #include <opencv2/core/utils/logger.hpp>

-#ifdef HAVE_NVCUVID
-    #if defined(HAVE_DYNLINK_NVCUVID_HEADER)
-        #include <dynlink_nvcuvid.h>
-    #elif defined(HAVE_NVCUVID_HEADER)
-        #include <nvcuvid.h>
-    #endif
-
-    #ifdef _WIN32
+#if defined(HAVE_NVCUVID) || defined(HAVE_NVCUVENC)
+    #if _WIN32
        #define NOMINMAX
-        #include <windows.h>
-        #ifdef HAVE_NVCUVENC
-            #include <NVEncoderAPI.h>
-        #endif
-    #else
-        #include <pthread.h>
-        #include <unistd.h>
    #endif
+    #if defined(HAVE_NVCUVID)
+        #if defined(HAVE_DYNLINK_NVCUVID_HEADER)
+            #include <dynlink_nvcuvid.h>
+        #elif defined(HAVE_NVCUVID_HEADER)
+            #include <nvcuvid.h>
+        #endif

-    #include "thread.hpp"
-    #include "video_source.hpp"
-    #include "ffmpeg_video_source.hpp"
-    #include "cuvid_video_source.hpp"
-    #include "frame_queue.hpp"
-    #include "video_decoder.hpp"
-    #include "video_parser.hpp"
+        #ifdef _WIN32
+            #include <windows.h>
+        #else
+            #include <pthread.h>
+            #include <unistd.h>
+        #endif

+        #include "thread.hpp"
+        #include "video_source.hpp"
+        #include "ffmpeg_video_source.hpp"
+        #include "cuvid_video_source.hpp"
+        #include "frame_queue.hpp"
+        #include "video_decoder.hpp"
+        #include "video_parser.hpp"
+    #endif
+    #if defined(HAVE_NVCUVENC)
+        #include <fstream>
+        #include <nvEncodeAPI.h>
+        #include "NvEncoderCuda.h"
+        #include <opencv2/cudaimgproc.hpp>
+    #endif
 #endif

 #endif /* OPENCV_PRECOMP_H */
--- a/modules/cudacodec/src/video_reader.cpp
+++ b/modules/cudacodec/src/video_reader.cpp
@ -54,6 +54,7 @@ Ptr<VideoReader> cv::cudacodec::createVideoReader(const Ptr<RawVideoSource>&, co
 #else // HAVE_NVCUVID

 void nv12ToBgra(const GpuMat& decodedFrame, GpuMat& outFrame, int width, int height, cudaStream_t stream);
+bool ValidColorFormat(const ColorFormat colorFormat);

 void videoDecPostProcessFrame(const GpuMat& decodedFrame, GpuMat& outFrame, int width, int height, const ColorFormat colorFormat,
    Stream stream)
@ -74,7 +75,7 @@ void videoDecPostProcessFrame(const GpuMat& decodedFrame, GpuMat& outFrame, int
        outFrame.create(height, width, CV_8UC1);
        cudaMemcpy2DAsync(outFrame.ptr(), outFrame.step, decodedFrame.ptr(), decodedFrame.step, width, height, cudaMemcpyDeviceToDevice, StreamAccessor::getStream(stream));
    }
-    else if (colorFormat == ColorFormat::YUV) {
+    else if (colorFormat == ColorFormat::NV_NV12) {
        decodedFrame.copyTo(outFrame, stream);
    }
 }
@ -100,7 +101,7 @@ namespace

        bool set(const VideoReaderProps propertyId, const double propertyVal) CV_OVERRIDE;

-        void set(const ColorFormat _colorFormat) CV_OVERRIDE;
+        bool set(const ColorFormat colorFormat_) CV_OVERRIDE;

        bool get(const VideoReaderProps propertyId, double& propertyVal) const CV_OVERRIDE;
        bool getVideoReaderProps(const VideoReaderProps propertyId, double& propertyValOut, double propertyValIn) const CV_OVERRIDE;
@ -273,8 +274,16 @@ namespace
        return false;
    }

-    void VideoReaderImpl::set(const ColorFormat _colorFormat) {
-        colorFormat = _colorFormat;
+    bool ValidColorFormat(const ColorFormat colorFormat) {
+        if (colorFormat == ColorFormat::BGRA || colorFormat == ColorFormat::BGR || colorFormat == ColorFormat::GRAY || colorFormat == ColorFormat::NV_NV12)
+            return true;
+        return false;
+    }
+
+    bool VideoReaderImpl::set(const ColorFormat colorFormat_) {
+        if (!ValidColorFormat(colorFormat_)) return false;
+        colorFormat = colorFormat_;
+        return true;
    }

    bool VideoReaderImpl::get(const VideoReaderProps propertyId, double& propertyVal) const {
--- a/modules/cudacodec/src/video_writer.cpp
+++ b/modules/cudacodec/src/video_writer.cpp
--- a/modules/cudacodec/test/test_video.cpp
+++ b/modules/cudacodec/test/test_video.cpp
@ -143,8 +143,8 @@ CUDA_TEST_P(CheckExtraData, Reader)
    ASSERT_EQ(extraDataIdx, 1 );
    ASSERT_TRUE(reader->grab());
    cv::Mat extraData;
-    const bool newData = reader->retrieve(extraData, extraDataIdx);
-    ASSERT_TRUE(newData && sz || !newData && !sz);
+    const bool newData = reader->retrieve(extraData, static_cast<size_t>(extraDataIdx));
+    ASSERT_TRUE((newData && sz) || (!newData && !sz));
    ASSERT_EQ(extraData.total(), sz);
 }

@ -170,11 +170,11 @@ CUDA_TEST_P(CheckKeyFrame, Reader)
        ASSERT_TRUE(reader->grab());
        double N = -1;
        ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_NUMBER_OF_RAW_PACKAGES_SINCE_LAST_GRAB,N));
-        for (int i = rawIdxBase; i < N + rawIdxBase; i++) {
+        for (int i = static_cast<int>(rawIdxBase); i < static_cast<int>(N + rawIdxBase); i++) {
            nPackages++;
            double containsKeyFrame = i;
            ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_LRF_HAS_KEY_FRAME, containsKeyFrame));
-            ASSERT_TRUE(nPackages == 1 && containsKeyFrame || nPackages == 2 && !containsKeyFrame) << "nPackage: " << i;
+            ASSERT_TRUE((nPackages == 1 && containsKeyFrame) || (nPackages == 2 && !containsKeyFrame)) << "nPackage: " << i;
            if (nPackages >= maxNPackagesToCheck)
                break;
        }
@ -192,17 +192,18 @@ CUDA_TEST_P(Scaling, Reader)
    GpuMat frameOr;
    {
        cv::Ptr<cv::cudacodec::VideoReader> readerGs = cv::cudacodec::createVideoReader(inputFile);
-        readerGs->set(cudacodec::ColorFormat::GRAY);
+        ASSERT_TRUE(readerGs->set(cudacodec::ColorFormat::GRAY));
        ASSERT_TRUE(readerGs->nextFrame(frameOr));
    }

    cudacodec::VideoReaderInitParams params;
-    params.targetSz = Size(frameOr.cols * targetSzIn.width, frameOr.rows * targetSzIn.height);
-    params.srcRoi = Rect(frameOr.cols * srcRoiIn.x, frameOr.rows * srcRoiIn.y, frameOr.cols * srcRoiIn.width, frameOr.rows * srcRoiIn.height);
-    params.targetRoi = Rect(params.targetSz.width * targetRoiIn.x, params.targetSz.height * targetRoiIn.y, params.targetSz.width * targetRoiIn.width,
-        params.targetSz.height * targetRoiIn.height);
+    params.targetSz = Size(static_cast<int>(frameOr.cols * targetSzIn.width), static_cast<int>(frameOr.rows * targetSzIn.height));
+    params.srcRoi = Rect(static_cast<int>(frameOr.cols * srcRoiIn.x), static_cast<int>(frameOr.rows * srcRoiIn.y), static_cast<int>(frameOr.cols * srcRoiIn.width),
+        static_cast<int>(frameOr.rows * srcRoiIn.height));
+    params.targetRoi = Rect(static_cast<int>(params.targetSz.width * targetRoiIn.x), static_cast<int>(params.targetSz.height * targetRoiIn.y),
+        static_cast<int>(params.targetSz.width * targetRoiIn.width), static_cast<int>(params.targetSz.height * targetRoiIn.height));
    cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile, {}, params);
-    reader->set(cudacodec::ColorFormat::GRAY);
+    ASSERT_TRUE(reader->set(cudacodec::ColorFormat::GRAY));
    GpuMat frame;
    ASSERT_TRUE(reader->nextFrame(frame));
    const cudacodec::FormatInfo format = reader->format();
@ -239,24 +240,25 @@ CUDA_TEST_P(Video, Reader)
        {cudacodec::ColorFormat::GRAY,1},
        {cudacodec::ColorFormat::BGR,3},
        {cudacodec::ColorFormat::BGRA,4},
-        {cudacodec::ColorFormat::YUV,1}
+        {cudacodec::ColorFormat::NV_NV12,1}
    };

    std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../" + GET_PARAM(1);
    cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile);
+    ASSERT_FALSE(reader->set(cudacodec::ColorFormat::RGB));
    cv::cudacodec::FormatInfo fmt = reader->format();
    cv::cuda::GpuMat frame;
    for (int i = 0; i < 10; i++)
    {
        // request a different colour format for each frame
        const std::pair< cudacodec::ColorFormat, int>& formatToChannels = formatsToChannels[i % formatsToChannels.size()];
-        reader->set(formatToChannels.first);
+        ASSERT_TRUE(reader->set(formatToChannels.first));
        double colorFormat;
        ASSERT_TRUE(reader->get(cudacodec::VideoReaderProps::PROP_COLOR_FORMAT, colorFormat) && static_cast<cudacodec::ColorFormat>(colorFormat) == formatToChannels.first);
        ASSERT_TRUE(reader->nextFrame(frame));
        if(!fmt.valid)
            fmt = reader->format();
-        const int height = formatToChannels.first == cudacodec::ColorFormat::YUV ? 1.5 * fmt.height : fmt.height;
+        const int height = formatToChannels.first == cudacodec::ColorFormat::NV_NV12 ? static_cast<int>(1.5 * fmt.height) : fmt.height;
        ASSERT_TRUE(frame.cols == fmt.width && frame.rows == height);
        ASSERT_FALSE(frame.empty());
        ASSERT_TRUE(frame.channels() == formatToChannels.second);
@ -291,9 +293,9 @@ CUDA_TEST_P(VideoReadRaw, Reader)
            double N = -1;
            ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_NUMBER_OF_RAW_PACKAGES_SINCE_LAST_GRAB,N));
            ASSERT_TRUE(N >= 0) << N << " < 0";
-            for (int i = rawIdxBase; i <= N + rawIdxBase; i++) {
+            for (int j = static_cast<int>(rawIdxBase); j <= static_cast<int>(N + rawIdxBase); j++) {
                Mat rawPackets;
-                reader->retrieve(rawPackets, i);
+                reader->retrieve(rawPackets, j);
                file.write((char*)rawPackets.data, rawPackets.total());
            }
        }
@ -315,7 +317,7 @@ CUDA_TEST_P(VideoReadRaw, Reader)
        {
            ASSERT_TRUE(readerReference->nextFrame(reference));
            ASSERT_TRUE(readerActual->grab());
-            ASSERT_TRUE(readerActual->retrieve(actual, decodedFrameIdx));
+            ASSERT_TRUE(readerActual->retrieve(actual, static_cast<size_t>(decodedFrameIdx)));
            actual.download(actualHost);
            reference.download(referenceHost);
            ASSERT_TRUE(cvtest::norm(actualHost, referenceHost, NORM_INF) == 0);
@ -423,63 +425,242 @@ CUDA_TEST_P(CheckInitParams, Reader)

 #endif // HAVE_NVCUVID

-#if defined(_WIN32) && defined(HAVE_NVCUVENC)
-//////////////////////////////////////////////////////
-// VideoWriter
+#if defined(HAVE_NVCUVID) && defined(HAVE_NVCUVENC)
+struct TransCode : testing::TestWithParam<cv::cuda::DeviceInfo>
+{
+    cv::cuda::DeviceInfo devInfo;
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+        cv::cuda::setDevice(devInfo.deviceID());
+    }
+};
+

-CUDA_TEST_P(Video, Writer)
+CUDA_TEST_P(TransCode, H264ToH265)
 {
-    cv::cuda::setDevice(GET_PARAM(0).deviceID());
+    const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../highgui/video/big_buck_bunny.h264";
+    constexpr cv::cudacodec::ColorFormat colorFormat = cv::cudacodec::ColorFormat::NV_NV12;
+    constexpr double fps = 25;
+    const cudacodec::Codec codec = cudacodec::Codec::HEVC;
+    const std::string ext = ".h265";
+    const std::string outputFile = cv::tempfile(ext.c_str());
+    constexpr int nFrames = 5;
+    Size frameSz;
+    {
+        cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile);
+        cv::cudacodec::FormatInfo fmt = reader->format();
+        reader->set(cudacodec::ColorFormat::NV_NV12);
+        cv::Ptr<cv::cudacodec::VideoWriter> writer;
+        cv::cuda::GpuMat frame;
+        cv::cuda::Stream stream;
+        for (int i = 0; i < nFrames; ++i) {
+            ASSERT_TRUE(reader->nextFrame(frame, stream));
+            if (!fmt.valid) {
+                fmt = reader->format();
+                ASSERT_TRUE(fmt.valid);
+            }
+            ASSERT_FALSE(frame.empty());
+            Mat tst; frame.download(tst);
+            if (writer.empty()) {
+                frameSz = Size(fmt.width, fmt.height);
+                writer = cv::cudacodec::createVideoWriter(outputFile, frameSz, codec, fps, colorFormat, 0, stream);
+            }
+            writer->write(frame);
+        }
+    }

-    const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1);
+    {
+        cv::VideoCapture cap(outputFile);
+        ASSERT_TRUE(cap.isOpened());
+        const int width = static_cast<int>(cap.get(CAP_PROP_FRAME_WIDTH));
+        const int height = static_cast<int>(cap.get(CAP_PROP_FRAME_HEIGHT));
+        ASSERT_EQ(frameSz, Size(width, height));
+        ASSERT_EQ(fps, cap.get(CAP_PROP_FPS));
+        Mat frame;
+        for (int i = 0; i < nFrames; ++i) {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+        }
+    }
+    ASSERT_EQ(0, remove(outputFile.c_str()));
+}

-    std::string outputFile = cv::tempfile(".avi");
-    const double FPS = 25.0;
+INSTANTIATE_TEST_CASE_P(CUDA_Codec, TransCode, ALL_DEVICES);
+#endif
+
+#if defined(HAVE_NVCUVENC)
+
+//////////////////////////////////////////////////////
+// VideoWriter
+
+//==========================================================================

-    cv::VideoCapture reader(inputFile);
-    ASSERT_TRUE(reader.isOpened());
+void CvtColor(const Mat& in, Mat& out, const cudacodec::ColorFormat surfaceFormatCv) {
+    switch (surfaceFormatCv) {
+    case(cudacodec::ColorFormat::RGB):
+        return cv::cvtColor(in, out, COLOR_BGR2RGB);
+    case(cudacodec::ColorFormat::BGRA):
+        return cv::cvtColor(in, out, COLOR_BGR2BGRA);
+    case(cudacodec::ColorFormat::RGBA):
+        return cv::cvtColor(in, out, COLOR_BGR2RGBA);
+    case(cudacodec::ColorFormat::GRAY):
+        return cv::cvtColor(in, out, COLOR_BGR2GRAY);
+    default:
+        in.copyTo(out);
+    }
+}

-    cv::Ptr<cv::cudacodec::VideoWriter> d_writer;
+PARAM_TEST_CASE(Write, cv::cuda::DeviceInfo, bool, cv::cudacodec::Codec, double, cv::cudacodec::ColorFormat)
+{
+};

-    cv::Mat frame;
-    cv::cuda::GpuMat d_frame;
+CUDA_TEST_P(Write, Writer)
+{
+    cv::cuda::setDevice(GET_PARAM(0).deviceID());
+    const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../highgui/video/big_buck_bunny.mp4";
+    const bool deviceSrc = GET_PARAM(1);
+    const cudacodec::Codec codec = GET_PARAM(2);
+    const double fps = GET_PARAM(3);
+    const cv::cudacodec::ColorFormat colorFormat = GET_PARAM(4);
+    const std::string ext = codec == cudacodec::Codec::H264 ? ".h264" : ".hevc";
+    const std::string outputFile = cv::tempfile(ext.c_str());
+    constexpr int nFrames = 5;
+    Size frameSz;
+    {
+        cv::VideoCapture cap(inputFile);
+        ASSERT_TRUE(cap.isOpened());
+        cv::Ptr<cv::cudacodec::VideoWriter> writer;
+        cv::Mat frame, frameNewSf;
+        cv::cuda::GpuMat dFrame;
+        cv::cuda::Stream stream;
+        for (int i = 0; i < nFrames; ++i) {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+            if (writer.empty()) {
+                frameSz = frame.size();
+                writer = cv::cudacodec::createVideoWriter(outputFile, frameSz, codec, fps, colorFormat, 0, stream);
+            }
+            CvtColor(frame, frameNewSf, colorFormat);
+            if (deviceSrc) {
+                dFrame.upload(frameNewSf);
+                writer->write(dFrame);
+            }
+            else
+                writer->write(frameNewSf);
+        }
+    }

-    for (int i = 0; i < 10; ++i)
    {
-        reader >> frame;
-        ASSERT_FALSE(frame.empty());
+        cv::VideoCapture cap(outputFile);
+        ASSERT_TRUE(cap.isOpened());
+        const int width = static_cast<int>(cap.get(CAP_PROP_FRAME_WIDTH));
+        const int height = static_cast<int>(cap.get(CAP_PROP_FRAME_HEIGHT));
+        ASSERT_EQ(frameSz, Size(width, height));
+        ASSERT_TRUE(abs(fps - cap.get(CAP_PROP_FPS)) < 0.5);
+        Mat frame;
+        for (int i = 0; i < nFrames; ++i) {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+        }
+    }
+    ASSERT_EQ(0, remove(outputFile.c_str()));
+}

-        d_frame.upload(frame);
+#define DEVICE_SRC true, false
+#define FPS 10, 29.7
+#define CODEC cv::cudacodec::Codec::H264, cv::cudacodec::Codec::HEVC
+#define COLOR_FORMAT cv::cudacodec::ColorFormat::BGR, cv::cudacodec::ColorFormat::RGB, cv::cudacodec::ColorFormat::BGRA, \
+cv::cudacodec::ColorFormat::RGBA, cv::cudacodec::ColorFormat::GRAY
+INSTANTIATE_TEST_CASE_P(CUDA_Codec, Write, testing::Combine(ALL_DEVICES, testing::Values(DEVICE_SRC), testing::Values(CODEC), testing::Values(FPS),
+    testing::Values(COLOR_FORMAT)));

-        if (d_writer.empty())
-            d_writer = cv::cudacodec::createVideoWriter(outputFile, frame.size(), FPS);

-        d_writer->write(d_frame);
+struct EncoderParams : testing::TestWithParam<cv::cuda::DeviceInfo>
+{
+    cv::cuda::DeviceInfo devInfo;
+    cv::cudacodec::EncoderParams params;
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+        cv::cuda::setDevice(devInfo.deviceID());
+        // Fixed params for CBR test
+        params.nvPreset = cv::cudacodec::EncodePreset::ENC_PRESET_P7;
+        params.tuningInfo = cv::cudacodec::EncodeTuningInfo::ENC_TUNING_INFO_HIGH_QUALITY;
+        params.encodingProfile = cv::cudacodec::EncodeProfile::ENC_H264_PROFILE_MAIN;
+        params.rateControlMode = cv::cudacodec::EncodeParamsRcMode::ENC_PARAMS_RC_CBR;
+        params.multiPassEncoding = cv::cudacodec::EncodeMultiPass::ENC_TWO_PASS_FULL_RESOLUTION;
+        params.averageBitRate = 1000000;
+        params.maxBitRate = 0;
+        params.targetQuality = 0;
+        params.gopLength = 5;
    }
+};

-    reader.release();
-    d_writer.release();

-    reader.open(outputFile);
-    ASSERT_TRUE(reader.isOpened());
+CUDA_TEST_P(EncoderParams, Writer)
+{
+    const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../highgui/video/big_buck_bunny.mp4";
+    constexpr double fps = 25.0;
+    constexpr cudacodec::Codec codec = cudacodec::Codec::H264;
+    const std::string ext = ".h264";
+    const std::string outputFile = cv::tempfile(ext.c_str());
+    Size frameSz;
+    constexpr int nFrames = 5;
+    {
+        cv::VideoCapture reader(inputFile);
+        ASSERT_TRUE(reader.isOpened());
+        const cv::cudacodec::ColorFormat colorFormat = cv::cudacodec::ColorFormat::BGR;
+        cv::Ptr<cv::cudacodec::VideoWriter> writer;
+        cv::Mat frame;
+        cv::cuda::GpuMat dFrame;
+        cv::cuda::Stream stream;
+        for (int i = 0; i < nFrames; ++i) {
+            reader >> frame;
+            ASSERT_FALSE(frame.empty());
+            dFrame.upload(frame);
+            if (writer.empty()) {
+                frameSz = frame.size();
+                writer = cv::cudacodec::createVideoWriter(outputFile, frameSz, codec, fps, colorFormat, params, 0, stream);
+                cv::cudacodec::EncoderParams paramsOut = writer->getEncoderParams();
+                ASSERT_EQ(params, paramsOut);
+            }
+            writer->write(dFrame);
+        }
+    }

-    for (int i = 0; i < 5; ++i)
    {
-        reader >> frame;
-        ASSERT_FALSE(frame.empty());
+        cv::VideoCapture cap(outputFile);
+        ASSERT_TRUE(cap.isOpened());
+        const int width = static_cast<int>(cap.get(CAP_PROP_FRAME_WIDTH));
+        const int height = static_cast<int>(cap.get(CAP_PROP_FRAME_HEIGHT));
+        ASSERT_EQ(frameSz, Size(width, height));
+        ASSERT_EQ(fps, cap.get(CAP_PROP_FPS));
+        const bool checkGop = videoio_registry::hasBackend(CAP_FFMPEG);
+        Mat frame;
+        for (int i = 0; i < nFrames; ++i) {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+            if (checkGop && (cap.get(CAP_PROP_FRAME_TYPE) == 73)) {
+                ASSERT_TRUE(i % params.gopLength == 0);
+            }
+        }
    }
+    ASSERT_EQ(0, remove(outputFile.c_str()));
 }

-#endif // _WIN32, HAVE_NVCUVENC
+INSTANTIATE_TEST_CASE_P(CUDA_Codec, EncoderParams, ALL_DEVICES);
+
+#endif // HAVE_NVCUVENC

 INSTANTIATE_TEST_CASE_P(CUDA_Codec, CheckSet, testing::Combine(
    ALL_DEVICES,
    testing::Values("highgui/video/big_buck_bunny.mp4")));

 #define VIDEO_SRC_SCALING "highgui/video/big_buck_bunny.mp4"
-#define TARGET_SZ Size2f(1,1), Size2f(0.8,0.9), Size2f(2.3,1.8)
-#define SRC_ROI Rect2f(0,0,1,1), Rect2f(0.25,0.25,0.5,0.5)
-#define TARGET_ROI Rect2f(0,0,1,1), Rect2f(0.2,0.3,0.6,0.7)
+#define TARGET_SZ Size2f(1,1), Size2f(0.8f,0.9f), Size2f(2.3f,1.8f)
+#define SRC_ROI Rect2f(0,0,1,1), Rect2f(0.25f,0.25f,0.5f,0.5f)
+#define TARGET_ROI Rect2f(0,0,1,1), Rect2f(0.2f,0.3f,0.6f,0.7f)
 INSTANTIATE_TEST_CASE_P(CUDA_Codec, Scaling, testing::Combine(
    ALL_DEVICES, testing::Values(VIDEO_SRC_SCALING), testing::Values(TARGET_SZ), testing::Values(SRC_ROI), testing::Values(TARGET_ROI)));