Merge branch 4.x

3 years ago · 92dccacae8
parent 28bc8d52fc 758c1a9b94
commit 92dccacae8
99 changed files with 3231 additions and 801 deletions
--- a/modules/aruco/CMakeLists.txt
+++ b/modules/aruco/CMakeLists.txt
@ -1,2 +1,31 @@
 set(the_description "ArUco Marker Detection")
 ocv_define_module(aruco opencv_core opencv_imgproc opencv_3d opencv_calib WRAP python java objc js)
+ocv_include_directories(${CMAKE_CURRENT_BINARY_DIR})
+
+ocv_add_testdata(samples/ contrib/aruco
+    FILES_MATCHING PATTERN "*yml"
+)
+
+ocv_add_testdata(tutorials/aruco_detection/images/ contrib/aruco
+    FILES_MATCHING PATTERN "singlemarkersoriginal.jpg"
+)
+
+ocv_add_testdata(tutorials/aruco_board_detection/images/ contrib/aruco
+    FILES_MATCHING PATTERN "gboriginal.png"
+)
+
+ocv_add_testdata(tutorials/charuco_detection/images/ contrib/aruco
+    FILES_MATCHING PATTERN "choriginal.jpg"
+)
+
+ocv_add_testdata(tutorials/charuco_detection/images/ contrib/aruco
+    FILES_MATCHING PATTERN "chocclusion_original.jpg"
+)
+
+ocv_add_testdata(tutorials/charuco_diamond_detection/images/ contrib/aruco
+    FILES_MATCHING PATTERN "diamondmarkers.png"
+)
+
+ocv_add_testdata(tutorials/aruco_calibration/images/ contrib/aruco
+    FILES_MATCHING REGEX "img_[0-9]+.jpg"
+)
--- a/modules/aruco/include/opencv2/aruco.hpp
+++ b/modules/aruco/include/opencv2/aruco.hpp
@ -150,8 +150,8 @@ enum CornerRefineMethod{
 struct CV_EXPORTS_W DetectorParameters {

    DetectorParameters();
-
    CV_WRAP static Ptr<DetectorParameters> create();
+    CV_WRAP static bool readDetectorParameters(const FileNode& fn, Ptr<DetectorParameters>& params);

    CV_PROP_RW int adaptiveThreshWinSizeMin;
    CV_PROP_RW int adaptiveThreshWinSizeMax;
--- a/modules/aruco/include/opencv2/aruco/dictionary.hpp
+++ b/modules/aruco/include/opencv2/aruco/dictionary.hpp
@ -93,6 +93,16 @@ class CV_EXPORTS_W Dictionary {
    CV_WRAP_AS(create_from) static Ptr<Dictionary> create(int nMarkers, int markerSize,
            const Ptr<Dictionary> &baseDictionary, int randomSeed=0);

+    /**
+     * @brief Read a new dictionary from FileNode. Format:
+     * nmarkers: 35
+     * markersize: 6
+     * marker_0: "101011111011111001001001101100000000"
+     * ...
+     * marker_34: "011111010000111011111110110101100101"
+     */
+    CV_WRAP static bool readDictionary(const cv::FileNode& fn, cv::Ptr<cv::aruco::Dictionary> &dictionary);
+
    /**
     * @see getPredefinedDictionary
     */
--- a/modules/aruco/misc/pattern_generator/MarkerPrinterGUI.py
+++ b/modules/aruco/misc/pattern_generator/MarkerPrinterGUI.py
@ -155,7 +155,7 @@ class MarkerPrinterGUI:
        tk.Button(self.charucoMarkerUIFrame2, text = "Preview", command = self.OnPreviewCharucoMarker).grid(row=1, column=0, sticky = tk.NSEW)
        tk.Button(self.charucoMarkerUIFrame2, text = "Save", command = self.OnSaveCharucoMarker).grid(row=1, column=1, sticky = tk.NSEW)

-        tk.Label(self.charucoMarkerUIFrame2, text="Save opetions:").grid(row=0, column=2, sticky = tk.NSEW)
+        tk.Label(self.charucoMarkerUIFrame2, text="Save options:").grid(row=0, column=2, sticky = tk.NSEW)
        tk.Label(self.charucoMarkerUIFrame2, text="(set 0 as disable)").grid(row=1, column=2, sticky = tk.NSEW)
        tk.Label(self.charucoMarkerUIFrame2, text="subSizeX").grid(row=0, column=3, sticky = tk.NSEW)
        tk.Label(self.charucoMarkerUIFrame2, text="subSizeY").grid(row=0, column=4, sticky = tk.NSEW)
@ -279,7 +279,7 @@ class MarkerPrinterGUI:
        tk.Button(self.arucoGridMarkerUIFrame2, text = "Preview", command = self.OnPreviewArucoGridMarker).grid(row=1, column=0, sticky = tk.NSEW)
        tk.Button(self.arucoGridMarkerUIFrame2, text = "Save", command = self.OnSaveArucoGridMarker).grid(row=1, column=1, sticky = tk.NSEW)

-        tk.Label(self.arucoGridMarkerUIFrame2, text="Save opetions:").grid(row=0, column=2, sticky = tk.NSEW)
+        tk.Label(self.arucoGridMarkerUIFrame2, text="Save options:").grid(row=0, column=2, sticky = tk.NSEW)
        tk.Label(self.arucoGridMarkerUIFrame2, text="(set 0 as disable)").grid(row=1, column=2, sticky = tk.NSEW)
        tk.Label(self.arucoGridMarkerUIFrame2, text="subSizeX").grid(row=0, column=3, sticky = tk.NSEW)
        tk.Label(self.arucoGridMarkerUIFrame2, text="subSizeY").grid(row=0, column=4, sticky = tk.NSEW)
@ -386,7 +386,7 @@ class MarkerPrinterGUI:
        tk.Button(self.arucoMarkerUIFrame2, text = "Preview", command = self.OnPreviewArucoMarker).grid(row=0, column=0, sticky = tk.NSEW)
        tk.Button(self.arucoMarkerUIFrame2, text = "Save", command = self.OnSaveArucoMarker).grid(row=0, column=1, sticky = tk.NSEW)

-        tk.Label(self.arucoMarkerUIFrame2, text="Save opetions:").grid(row=0, column=2, sticky = tk.NSEW)
+        tk.Label(self.arucoMarkerUIFrame2, text="Save options:").grid(row=0, column=2, sticky = tk.NSEW)
        tk.Label(self.arucoMarkerUIFrame2, text="(set 0 as disable)").grid(row=1, column=2, sticky = tk.NSEW)
        tk.Label(self.arucoMarkerUIFrame2, text="pageBorderX (Unit: Meter)").grid(row=0, column=3, sticky = tk.NSEW)
        tk.Label(self.arucoMarkerUIFrame2, text="pageBorderY (Unit: Meter)").grid(row=0, column=4, sticky = tk.NSEW)
@ -477,7 +477,7 @@ class MarkerPrinterGUI:
        tk.Button(self.chessMarkerUIFrame2, text = "Preview", command = self.OnPreviewChessMarker).grid(row=1, column=0, sticky = tk.NSEW)
        tk.Button(self.chessMarkerUIFrame2, text = "Save", command = self.OnSaveChessMarker).grid(row=1, column=1, sticky = tk.NSEW)

-        tk.Label(self.chessMarkerUIFrame2, text="Save opetions:").grid(row=0, column=2, sticky = tk.NSEW)
+        tk.Label(self.chessMarkerUIFrame2, text="Save options:").grid(row=0, column=2, sticky = tk.NSEW)
        tk.Label(self.chessMarkerUIFrame2, text="(set 0 as disable)").grid(row=1, column=2, sticky = tk.NSEW)
        tk.Label(self.chessMarkerUIFrame2, text="subSizeX").grid(row=0, column=3, sticky = tk.NSEW)
        tk.Label(self.chessMarkerUIFrame2, text="subSizeY").grid(row=0, column=4, sticky = tk.NSEW)
--- a/modules/aruco/samples/aruco_samples_utility.hpp
+++ b/modules/aruco/samples/aruco_samples_utility.hpp
@ -0,0 +1,48 @@
+#include <opencv2/highgui.hpp>
+#include <opencv2/aruco.hpp>
+#include <opencv2/calib3d.hpp>
+#include <ctime>
+
+namespace {
+inline static bool readCameraParameters(std::string filename, cv::Mat &camMatrix, cv::Mat &distCoeffs) {
+    cv::FileStorage fs(filename, cv::FileStorage::READ);
+    if (!fs.isOpened())
+        return false;
+    fs["camera_matrix"] >> camMatrix;
+    fs["distortion_coefficients"] >> distCoeffs;
+    return true;
+}
+
+inline static bool saveCameraParams(const std::string &filename, cv::Size imageSize, float aspectRatio, int flags,
+                                    const cv::Mat &cameraMatrix, const cv::Mat &distCoeffs, double totalAvgErr) {
+    cv::FileStorage fs(filename, cv::FileStorage::WRITE);
+    if (!fs.isOpened())
+        return false;
+
+    time_t tt;
+    time(&tt);
+    struct tm *t2 = localtime(&tt);
+    char buf[1024];
+    strftime(buf, sizeof(buf) - 1, "%c", t2);
+
+    fs << "calibration_time" << buf;
+    fs << "image_width" << imageSize.width;
+    fs << "image_height" << imageSize.height;
+
+    if (flags & cv::CALIB_FIX_ASPECT_RATIO) fs << "aspectRatio" << aspectRatio;
+
+    if (flags != 0) {
+        sprintf(buf, "flags: %s%s%s%s",
+                flags & cv::CALIB_USE_INTRINSIC_GUESS ? "+use_intrinsic_guess" : "",
+                flags & cv::CALIB_FIX_ASPECT_RATIO ? "+fix_aspectRatio" : "",
+                flags & cv::CALIB_FIX_PRINCIPAL_POINT ? "+fix_principal_point" : "",
+                flags & cv::CALIB_ZERO_TANGENT_DIST ? "+zero_tangent_dist" : "");
+    }
+    fs << "flags" << flags;
+    fs << "camera_matrix" << cameraMatrix;
+    fs << "distortion_coefficients" << distCoeffs;
+    fs << "avg_reprojection_error" << totalAvgErr;
+    return true;
+}
+
+}
--- a/modules/aruco/samples/calibrate_camera.cpp
+++ b/modules/aruco/samples/calibrate_camera.cpp
@ -45,6 +45,7 @@ the use of this software, even if advised of the possibility of such damage.
 #include <vector>
 #include <iostream>
 #include <ctime>
+#include "aruco_samples_utility.hpp"

 using namespace std;
 using namespace cv;
@ -65,6 +66,7 @@ const char* keys  =
        "DICT_4X4_1000=3, DICT_5X5_50=4, DICT_5X5_100=5, DICT_5X5_250=6, DICT_5X5_1000=7, "
        "DICT_6X6_50=8, DICT_6X6_100=9, DICT_6X6_250=10, DICT_6X6_1000=11, DICT_7X7_50=12,"
        "DICT_7X7_100=13, DICT_7X7_250=14, DICT_7X7_1000=15, DICT_ARUCO_ORIGINAL = 16}"
+        "{cd       |       | Input file with custom dictionary }"
        "{@outfile |<none> | Output file with calibrated camera parameters }"
        "{v        |       | Input from video file, if ommited, input comes from camera }"
        "{ci       | 0     | Camera id if input doesnt come from video (-v) }"
@ -75,80 +77,7 @@ const char* keys  =
        "{pc       | false | Fix the principal point at the center }";
 }

-/**
- */
-static bool readDetectorParameters(string filename, Ptr<aruco::DetectorParameters> &params) {
-    FileStorage fs(filename, FileStorage::READ);
-    if(!fs.isOpened())
-        return false;
-    fs["adaptiveThreshWinSizeMin"] >> params->adaptiveThreshWinSizeMin;
-    fs["adaptiveThreshWinSizeMax"] >> params->adaptiveThreshWinSizeMax;
-    fs["adaptiveThreshWinSizeStep"] >> params->adaptiveThreshWinSizeStep;
-    fs["adaptiveThreshConstant"] >> params->adaptiveThreshConstant;
-    fs["minMarkerPerimeterRate"] >> params->minMarkerPerimeterRate;
-    fs["maxMarkerPerimeterRate"] >> params->maxMarkerPerimeterRate;
-    fs["polygonalApproxAccuracyRate"] >> params->polygonalApproxAccuracyRate;
-    fs["minCornerDistanceRate"] >> params->minCornerDistanceRate;
-    fs["minDistanceToBorder"] >> params->minDistanceToBorder;
-    fs["minMarkerDistanceRate"] >> params->minMarkerDistanceRate;
-    fs["cornerRefinementMethod"] >> params->cornerRefinementMethod;
-    fs["cornerRefinementWinSize"] >> params->cornerRefinementWinSize;
-    fs["cornerRefinementMaxIterations"] >> params->cornerRefinementMaxIterations;
-    fs["cornerRefinementMinAccuracy"] >> params->cornerRefinementMinAccuracy;
-    fs["markerBorderBits"] >> params->markerBorderBits;
-    fs["perspectiveRemovePixelPerCell"] >> params->perspectiveRemovePixelPerCell;
-    fs["perspectiveRemoveIgnoredMarginPerCell"] >> params->perspectiveRemoveIgnoredMarginPerCell;
-    fs["maxErroneousBitsInBorderRate"] >> params->maxErroneousBitsInBorderRate;
-    fs["minOtsuStdDev"] >> params->minOtsuStdDev;
-    fs["errorCorrectionRate"] >> params->errorCorrectionRate;
-    return true;
-}
-
-
-
-/**
- */
-static bool saveCameraParams(const string &filename, Size imageSize, float aspectRatio, int flags,
-                             const Mat &cameraMatrix, const Mat &distCoeffs, double totalAvgErr) {
-    FileStorage fs(filename, FileStorage::WRITE);
-    if(!fs.isOpened())
-        return false;
-
-    time_t tt;
-    time(&tt);
-    struct tm *t2 = localtime(&tt);
-    char buf[1024];
-    strftime(buf, sizeof(buf) - 1, "%c", t2);
-
-    fs << "calibration_time" << buf;
-
-    fs << "image_width" << imageSize.width;
-    fs << "image_height" << imageSize.height;
-
-    if(flags & CALIB_FIX_ASPECT_RATIO) fs << "aspectRatio" << aspectRatio;
-
-    if(flags != 0) {
-        sprintf(buf, "flags: %s%s%s%s",
-                flags & CALIB_USE_INTRINSIC_GUESS ? "+use_intrinsic_guess" : "",
-                flags & CALIB_FIX_ASPECT_RATIO ? "+fix_aspectRatio" : "",
-                flags & CALIB_FIX_PRINCIPAL_POINT ? "+fix_principal_point" : "",
-                flags & CALIB_ZERO_TANGENT_DIST ? "+zero_tangent_dist" : "");
-    }
-
-    fs << "flags" << flags;
-
-    fs << "camera_matrix" << cameraMatrix;
-    fs << "distortion_coefficients" << distCoeffs;

-    fs << "avg_reprojection_error" << totalAvgErr;
-
-    return true;
-}
-
-
-
-/**
- */
 int main(int argc, char *argv[]) {
    CommandLineParser parser(argc, argv, keys);
    parser.about(about);
@ -162,7 +91,6 @@ int main(int argc, char *argv[]) {
    int markersY = parser.get<int>("h");
    float markerLength = parser.get<float>("l");
    float markerSeparation = parser.get<float>("s");
-    int dictionaryId = parser.get<int>("d");
    string outputFile = parser.get<String>(0);

    int calibrationFlags = 0;
@ -174,9 +102,10 @@ int main(int argc, char *argv[]) {
    if(parser.get<bool>("zt")) calibrationFlags |= CALIB_ZERO_TANGENT_DIST;
    if(parser.get<bool>("pc")) calibrationFlags |= CALIB_FIX_PRINCIPAL_POINT;

-    Ptr<aruco::DetectorParameters> detectorParams = aruco::DetectorParameters::create();
+    Ptr<aruco::DetectorParameters> detectorParams;
    if(parser.has("dp")) {
-        bool readOk = readDetectorParameters(parser.get<string>("dp"), detectorParams);
+        FileStorage fs(parser.get<string>("dp"), FileStorage::READ);
+        bool readOk = aruco::DetectorParameters::readDetectorParameters(fs.root(), detectorParams);
        if(!readOk) {
            cerr << "Invalid detector parameters file" << endl;
            return 0;
@ -206,8 +135,23 @@ int main(int argc, char *argv[]) {
        waitTime = 10;
    }

-    Ptr<aruco::Dictionary> dictionary =
-        aruco::getPredefinedDictionary(aruco::PREDEFINED_DICTIONARY_NAME(dictionaryId));
+    Ptr<aruco::Dictionary> dictionary;
+    if (parser.has("d")) {
+        int dictionaryId = parser.get<int>("d");
+        dictionary = aruco::getPredefinedDictionary(aruco::PREDEFINED_DICTIONARY_NAME(dictionaryId));
+    }
+    else if (parser.has("cd")) {
+        FileStorage fs(parser.get<std::string>("cd"), FileStorage::READ);
+        bool readOk = aruco::Dictionary::readDictionary(fs.root(), dictionary);
+        if(!readOk) {
+            cerr << "Invalid dictionary file" << endl;
+            return 0;
+        }
+    }
+    else {
+        cerr << "Dictionary not specified" << endl;
+        return 0;
+    }

    // create board object
    Ptr<aruco::GridBoard> gridboard =
--- a/modules/aruco/samples/calibrate_camera_charuco.cpp
+++ b/modules/aruco/samples/calibrate_camera_charuco.cpp
@ -44,7 +44,7 @@ the use of this software, even if advised of the possibility of such damage.
 #include <opencv2/imgproc.hpp>
 #include <vector>
 #include <iostream>
-#include <ctime>
+#include "aruco_samples_utility.hpp"

 using namespace std;
 using namespace cv;
@ -64,6 +64,7 @@ const char* keys  =
        "DICT_4X4_1000=3, DICT_5X5_50=4, DICT_5X5_100=5, DICT_5X5_250=6, DICT_5X5_1000=7, "
        "DICT_6X6_50=8, DICT_6X6_100=9, DICT_6X6_250=10, DICT_6X6_1000=11, DICT_7X7_50=12,"
        "DICT_7X7_100=13, DICT_7X7_250=14, DICT_7X7_1000=15, DICT_ARUCO_ORIGINAL = 16}"
+        "{cd       |       | Input file with custom dictionary }"
        "{@outfile |<none> | Output file with calibrated camera parameters }"
        "{v        |       | Input from video file, if ommited, input comes from camera }"
        "{ci       | 0     | Camera id if input doesnt come from video (-v) }"
@ -75,80 +76,7 @@ const char* keys  =
        "{sc       | false | Show detected chessboard corners after calibration }";
 }

-/**
- */
-static bool readDetectorParameters(string filename, Ptr<aruco::DetectorParameters> &params) {
-    FileStorage fs(filename, FileStorage::READ);
-    if(!fs.isOpened())
-        return false;
-    fs["adaptiveThreshWinSizeMin"] >> params->adaptiveThreshWinSizeMin;
-    fs["adaptiveThreshWinSizeMax"] >> params->adaptiveThreshWinSizeMax;
-    fs["adaptiveThreshWinSizeStep"] >> params->adaptiveThreshWinSizeStep;
-    fs["adaptiveThreshConstant"] >> params->adaptiveThreshConstant;
-    fs["minMarkerPerimeterRate"] >> params->minMarkerPerimeterRate;
-    fs["maxMarkerPerimeterRate"] >> params->maxMarkerPerimeterRate;
-    fs["polygonalApproxAccuracyRate"] >> params->polygonalApproxAccuracyRate;
-    fs["minCornerDistanceRate"] >> params->minCornerDistanceRate;
-    fs["minDistanceToBorder"] >> params->minDistanceToBorder;
-    fs["minMarkerDistanceRate"] >> params->minMarkerDistanceRate;
-    fs["cornerRefinementMethod"] >> params->cornerRefinementMethod;
-    fs["cornerRefinementWinSize"] >> params->cornerRefinementWinSize;
-    fs["cornerRefinementMaxIterations"] >> params->cornerRefinementMaxIterations;
-    fs["cornerRefinementMinAccuracy"] >> params->cornerRefinementMinAccuracy;
-    fs["markerBorderBits"] >> params->markerBorderBits;
-    fs["perspectiveRemovePixelPerCell"] >> params->perspectiveRemovePixelPerCell;
-    fs["perspectiveRemoveIgnoredMarginPerCell"] >> params->perspectiveRemoveIgnoredMarginPerCell;
-    fs["maxErroneousBitsInBorderRate"] >> params->maxErroneousBitsInBorderRate;
-    fs["minOtsuStdDev"] >> params->minOtsuStdDev;
-    fs["errorCorrectionRate"] >> params->errorCorrectionRate;
-    return true;
-}
-
-
-
-/**
- */
-static bool saveCameraParams(const string &filename, Size imageSize, float aspectRatio, int flags,
-                             const Mat &cameraMatrix, const Mat &distCoeffs, double totalAvgErr) {
-    FileStorage fs(filename, FileStorage::WRITE);
-    if(!fs.isOpened())
-        return false;
-
-    time_t tt;
-    time(&tt);
-    struct tm *t2 = localtime(&tt);
-    char buf[1024];
-    strftime(buf, sizeof(buf) - 1, "%c", t2);
-
-    fs << "calibration_time" << buf;
-
-    fs << "image_width" << imageSize.width;
-    fs << "image_height" << imageSize.height;
-
-    if(flags & CALIB_FIX_ASPECT_RATIO) fs << "aspectRatio" << aspectRatio;
-
-    if(flags != 0) {
-        sprintf(buf, "flags: %s%s%s%s",
-                flags & CALIB_USE_INTRINSIC_GUESS ? "+use_intrinsic_guess" : "",
-                flags & CALIB_FIX_ASPECT_RATIO ? "+fix_aspectRatio" : "",
-                flags & CALIB_FIX_PRINCIPAL_POINT ? "+fix_principal_point" : "",
-                flags & CALIB_ZERO_TANGENT_DIST ? "+zero_tangent_dist" : "");
-    }
-
-    fs << "flags" << flags;
-
-    fs << "camera_matrix" << cameraMatrix;
-    fs << "distortion_coefficients" << distCoeffs;

-    fs << "avg_reprojection_error" << totalAvgErr;
-
-    return true;
-}
-
-
-
-/**
- */
 int main(int argc, char *argv[]) {
    CommandLineParser parser(argc, argv, keys);
    parser.about(about);
@ -162,7 +90,6 @@ int main(int argc, char *argv[]) {
    int squaresY = parser.get<int>("h");
    float squareLength = parser.get<float>("sl");
    float markerLength = parser.get<float>("ml");
-    int dictionaryId = parser.get<int>("d");
    string outputFile = parser.get<string>(0);

    bool showChessboardCorners = parser.get<bool>("sc");
@ -176,9 +103,10 @@ int main(int argc, char *argv[]) {
    if(parser.get<bool>("zt")) calibrationFlags |= CALIB_ZERO_TANGENT_DIST;
    if(parser.get<bool>("pc")) calibrationFlags |= CALIB_FIX_PRINCIPAL_POINT;

-    Ptr<aruco::DetectorParameters> detectorParams = aruco::DetectorParameters::create();
+    Ptr<aruco::DetectorParameters> detectorParams;
    if(parser.has("dp")) {
-        bool readOk = readDetectorParameters(parser.get<string>("dp"), detectorParams);
+        FileStorage fs(parser.get<string>("dp"), FileStorage::READ);
+        bool readOk = aruco::DetectorParameters::readDetectorParameters(fs.root(), detectorParams);
        if(!readOk) {
            cerr << "Invalid detector parameters file" << endl;
            return 0;
@ -208,8 +136,23 @@ int main(int argc, char *argv[]) {
        waitTime = 10;
    }

-    Ptr<aruco::Dictionary> dictionary =
-        aruco::getPredefinedDictionary(aruco::PREDEFINED_DICTIONARY_NAME(dictionaryId));
+    Ptr<aruco::Dictionary> dictionary;
+    if (parser.has("d")) {
+        int dictionaryId = parser.get<int>("d");
+        dictionary = aruco::getPredefinedDictionary(aruco::PREDEFINED_DICTIONARY_NAME(dictionaryId));
+    }
+    else if (parser.has("cd")) {
+        FileStorage fs(parser.get<std::string>("cd"), FileStorage::READ);
+        bool readOk = aruco::Dictionary::readDictionary(fs.root(), dictionary);
+        if(!readOk) {
+            cerr << "Invalid dictionary file" << endl;
+            return 0;
+        }
+    }
+    else {
+        cerr << "Dictionary not specified" << endl;
+        return 0;
+    }

    // create charuco board object
    Ptr<aruco::CharucoBoard> charucoboard =
--- a/modules/aruco/samples/create_board.cpp
+++ b/modules/aruco/samples/create_board.cpp
@ -39,6 +39,8 @@ the use of this software, even if advised of the possibility of such damage.

 #include <opencv2/highgui.hpp>
 #include <opencv2/aruco.hpp>
+#include <iostream>
+#include "aruco_samples_utility.hpp"

 using namespace cv;

@ -54,11 +56,13 @@ const char* keys  =
        "DICT_4X4_1000=3, DICT_5X5_50=4, DICT_5X5_100=5, DICT_5X5_250=6, DICT_5X5_1000=7, "
        "DICT_6X6_50=8, DICT_6X6_100=9, DICT_6X6_250=10, DICT_6X6_1000=11, DICT_7X7_50=12,"
        "DICT_7X7_100=13, DICT_7X7_250=14, DICT_7X7_1000=15, DICT_ARUCO_ORIGINAL = 16}"
+        "{cd       |       | Input file with custom dictionary }"
        "{m        |       | Margins size (in pixels). Default is marker separation (-s) }"
        "{bb       | 1     | Number of bits in marker borders }"
        "{si       | false | show generated image }";
 }

+
 int main(int argc, char *argv[]) {
    CommandLineParser parser(argc, argv, keys);
    parser.about(about);
@ -72,7 +76,6 @@ int main(int argc, char *argv[]) {
    int markersY = parser.get<int>("h");
    int markerLength = parser.get<int>("l");
    int markerSeparation = parser.get<int>("s");
-    int dictionaryId = parser.get<int>("d");
    int margins = markerSeparation;
    if(parser.has("m")) {
        margins = parser.get<int>("m");
@ -93,8 +96,24 @@ int main(int argc, char *argv[]) {
    imageSize.height =
        markersY * (markerLength + markerSeparation) - markerSeparation + 2 * margins;

-    Ptr<aruco::Dictionary> dictionary =
-        aruco::getPredefinedDictionary(aruco::PREDEFINED_DICTIONARY_NAME(dictionaryId));
+    Ptr<aruco::Dictionary> dictionary;
+    if (parser.has("d")) {
+        int dictionaryId = parser.get<int>("d");
+        dictionary = aruco::getPredefinedDictionary(aruco::PREDEFINED_DICTIONARY_NAME(dictionaryId));
+    }
+    else if (parser.has("cd")) {
+        FileStorage fs(parser.get<std::string>("cd"), FileStorage::READ);
+        bool readOk = aruco::Dictionary::readDictionary(fs.root(), dictionary);
+        if(!readOk)
+        {
+            std::cerr << "Invalid dictionary file" << std::endl;
+            return 0;
+        }
+    }
+    else {
+        std::cerr << "Dictionary not specified" << std::endl;
+        return 0;
+    }

    Ptr<aruco::GridBoard> board = aruco::GridBoard::create(markersX, markersY, float(markerLength),
                                                      float(markerSeparation), dictionary);
--- a/modules/aruco/samples/create_board_charuco.cpp
+++ b/modules/aruco/samples/create_board_charuco.cpp
@ -39,11 +39,14 @@ the use of this software, even if advised of the possibility of such damage.

 #include <opencv2/highgui.hpp>
 #include <opencv2/aruco/charuco.hpp>
+#include <iostream>
+#include "aruco_samples_utility.hpp"

 using namespace cv;

 namespace {
 const char* about = "Create a ChArUco board image";
+//! [charuco_detect_board_keys]
 const char* keys  =
        "{@outfile |<none> | Output image }"
        "{w        |       | Number of squares in X direction }"
@ -54,10 +57,13 @@ const char* keys  =
        "DICT_4X4_1000=3, DICT_5X5_50=4, DICT_5X5_100=5, DICT_5X5_250=6, DICT_5X5_1000=7, "
        "DICT_6X6_50=8, DICT_6X6_100=9, DICT_6X6_250=10, DICT_6X6_1000=11, DICT_7X7_50=12,"
        "DICT_7X7_100=13, DICT_7X7_250=14, DICT_7X7_1000=15, DICT_ARUCO_ORIGINAL = 16}"
+        "{cd       |       | Input file with custom dictionary }"
        "{m        |       | Margins size (in pixels). Default is (squareLength-markerLength) }"
        "{bb       | 1     | Number of bits in marker borders }"
        "{si       | false | show generated image }";
 }
+//! [charuco_detect_board_keys]
+

 int main(int argc, char *argv[]) {
    CommandLineParser parser(argc, argv, keys);
@ -72,7 +78,6 @@ int main(int argc, char *argv[]) {
    int squaresY = parser.get<int>("h");
    int squareLength = parser.get<int>("sl");
    int markerLength = parser.get<int>("ml");
-    int dictionaryId = parser.get<int>("d");
    int margins = squareLength - markerLength;
    if(parser.has("m")) {
        margins = parser.get<int>("m");
@ -88,8 +93,23 @@ int main(int argc, char *argv[]) {
        return 0;
    }

-    Ptr<aruco::Dictionary> dictionary =
-        aruco::getPredefinedDictionary(aruco::PREDEFINED_DICTIONARY_NAME(dictionaryId));
+    Ptr<aruco::Dictionary> dictionary;
+    if (parser.has("d")) {
+        int dictionaryId = parser.get<int>("d");
+        dictionary = aruco::getPredefinedDictionary(aruco::PREDEFINED_DICTIONARY_NAME(dictionaryId));
+    }
+    else if (parser.has("cd")) {
+        FileStorage fs(parser.get<std::string>("cd"), FileStorage::READ);
+        bool readOk = aruco::Dictionary::readDictionary(fs.root(), dictionary);
+        if(!readOk) {
+            std::cerr << "Invalid dictionary file" << std::endl;
+            return 0;
+        }
+    }
+    else {
+        std::cerr << "Dictionary not specified" << std::endl;
+        return 0;
+    }

    Size imageSize;
    imageSize.width = squaresX * squareLength + 2 * margins;
--- a/modules/aruco/samples/create_marker.cpp
+++ b/modules/aruco/samples/create_marker.cpp
@ -39,22 +39,28 @@ the use of this software, even if advised of the possibility of such damage.

 #include <opencv2/highgui.hpp>
 #include <opencv2/aruco.hpp>
+#include <iostream>
+#include "aruco_samples_utility.hpp"

 using namespace cv;

 namespace {
 const char* about = "Create an ArUco marker image";
+
+//! [aruco_create_markers_keys]
 const char* keys  =
        "{@outfile |<none> | Output image }"
        "{d        |       | dictionary: DICT_4X4_50=0, DICT_4X4_100=1, DICT_4X4_250=2,"
        "DICT_4X4_1000=3, DICT_5X5_50=4, DICT_5X5_100=5, DICT_5X5_250=6, DICT_5X5_1000=7, "
        "DICT_6X6_50=8, DICT_6X6_100=9, DICT_6X6_250=10, DICT_6X6_1000=11, DICT_7X7_50=12,"
        "DICT_7X7_100=13, DICT_7X7_250=14, DICT_7X7_1000=15, DICT_ARUCO_ORIGINAL = 16}"
+        "{cd       |       | Input file with custom dictionary }"
        "{id       |       | Marker id in the dictionary }"
        "{ms       | 200   | Marker size in pixels }"
        "{bb       | 1     | Number of bits in marker borders }"
        "{si       | false | show generated image }";
 }
+//! [aruco_create_markers_keys]


 int main(int argc, char *argv[]) {
@ -66,7 +72,6 @@ int main(int argc, char *argv[]) {
        return 0;
    }

-    int dictionaryId = parser.get<int>("d");
    int markerId = parser.get<int>("id");
    int borderBits = parser.get<int>("bb");
    int markerSize = parser.get<int>("ms");
@ -79,8 +84,23 @@ int main(int argc, char *argv[]) {
        return 0;
    }

-    Ptr<aruco::Dictionary> dictionary =
-        aruco::getPredefinedDictionary(aruco::PREDEFINED_DICTIONARY_NAME(dictionaryId));
+    Ptr<aruco::Dictionary> dictionary;
+    if (parser.has("d")) {
+        int dictionaryId = parser.get<int>("d");
+        dictionary = aruco::getPredefinedDictionary(aruco::PREDEFINED_DICTIONARY_NAME(dictionaryId));
+    }
+    else if (parser.has("cd")) {
+        FileStorage fs(parser.get<std::string>("cd"), FileStorage::READ);
+        bool readOk = aruco::Dictionary::readDictionary(fs.root(), dictionary);
+        if(!readOk) {
+            std::cerr << "Invalid dictionary file" << std::endl;
+            return 0;
+        }
+    }
+    else {
+        std::cerr << "Dictionary not specified" << std::endl;
+        return 0;
+    }

    Mat markerImg;
    aruco::drawMarker(dictionary, markerId, markerSize, markerImg, borderBits);
--- a/modules/aruco/samples/detect_board.cpp
+++ b/modules/aruco/samples/detect_board.cpp
@ -41,12 +41,15 @@ the use of this software, even if advised of the possibility of such damage.
 #include <opencv2/aruco.hpp>
 #include <vector>
 #include <iostream>
+#include "aruco_samples_utility.hpp"

 using namespace std;
 using namespace cv;

 namespace {
 const char* about = "Pose estimation using a ArUco Planar Grid board";
+
+//! [aruco_detect_board_keys]
 const char* keys  =
        "{w        |       | Number of squares in X direction }"
        "{h        |       | Number of squares in Y direction }"
@ -56,59 +59,17 @@ const char* keys  =
        "DICT_4X4_1000=3, DICT_5X5_50=4, DICT_5X5_100=5, DICT_5X5_250=6, DICT_5X5_1000=7, "
        "DICT_6X6_50=8, DICT_6X6_100=9, DICT_6X6_250=10, DICT_6X6_1000=11, DICT_7X7_50=12,"
        "DICT_7X7_100=13, DICT_7X7_250=14, DICT_7X7_1000=15, DICT_ARUCO_ORIGINAL = 16}"
+        "{cd       |       | Input file with custom dictionary }"
        "{c        |       | Output file with calibrated camera parameters }"
-        "{v        |       | Input from video file, if ommited, input comes from camera }"
+        "{v        |       | Input from video or image file, if omitted, input comes from camera }"
        "{ci       | 0     | Camera id if input doesnt come from video (-v) }"
        "{dp       |       | File of marker detector parameters }"
        "{rs       |       | Apply refind strategy }"
        "{r        |       | show rejected candidates too }";
 }
-
-/**
- */
-static bool readCameraParameters(string filename, Mat &camMatrix, Mat &distCoeffs) {
-    FileStorage fs(filename, FileStorage::READ);
-    if(!fs.isOpened())
-        return false;
-    fs["camera_matrix"] >> camMatrix;
-    fs["distortion_coefficients"] >> distCoeffs;
-    return true;
-}
-
-
-
-/**
- */
-static bool readDetectorParameters(string filename, Ptr<aruco::DetectorParameters> &params) {
-    FileStorage fs(filename, FileStorage::READ);
-    if(!fs.isOpened())
-        return false;
-    fs["adaptiveThreshWinSizeMin"] >> params->adaptiveThreshWinSizeMin;
-    fs["adaptiveThreshWinSizeMax"] >> params->adaptiveThreshWinSizeMax;
-    fs["adaptiveThreshWinSizeStep"] >> params->adaptiveThreshWinSizeStep;
-    fs["adaptiveThreshConstant"] >> params->adaptiveThreshConstant;
-    fs["minMarkerPerimeterRate"] >> params->minMarkerPerimeterRate;
-    fs["maxMarkerPerimeterRate"] >> params->maxMarkerPerimeterRate;
-    fs["polygonalApproxAccuracyRate"] >> params->polygonalApproxAccuracyRate;
-    fs["minCornerDistanceRate"] >> params->minCornerDistanceRate;
-    fs["minDistanceToBorder"] >> params->minDistanceToBorder;
-    fs["minMarkerDistanceRate"] >> params->minMarkerDistanceRate;
-    fs["cornerRefinementMethod"] >> params->cornerRefinementMethod;
-    fs["cornerRefinementWinSize"] >> params->cornerRefinementWinSize;
-    fs["cornerRefinementMaxIterations"] >> params->cornerRefinementMaxIterations;
-    fs["cornerRefinementMinAccuracy"] >> params->cornerRefinementMinAccuracy;
-    fs["markerBorderBits"] >> params->markerBorderBits;
-    fs["perspectiveRemovePixelPerCell"] >> params->perspectiveRemovePixelPerCell;
-    fs["perspectiveRemoveIgnoredMarginPerCell"] >> params->perspectiveRemoveIgnoredMarginPerCell;
-    fs["maxErroneousBitsInBorderRate"] >> params->maxErroneousBitsInBorderRate;
-    fs["minOtsuStdDev"] >> params->minOtsuStdDev;
-    fs["errorCorrectionRate"] >> params->errorCorrectionRate;
-    return true;
-}
+//! [aruco_detect_board_keys]


-/**
- */
 int main(int argc, char *argv[]) {
    CommandLineParser parser(argc, argv, keys);
    parser.about(about);
@ -122,7 +83,6 @@ int main(int argc, char *argv[]) {
    int markersY = parser.get<int>("h");
    float markerLength = parser.get<float>("l");
    float markerSeparation = parser.get<float>("s");
-    int dictionaryId = parser.get<int>("d");
    bool showRejected = parser.has("r");
    bool refindStrategy = parser.has("rs");
    int camId = parser.get<int>("ci");
@ -137,9 +97,10 @@ int main(int argc, char *argv[]) {
        }
    }

-    Ptr<aruco::DetectorParameters> detectorParams = aruco::DetectorParameters::create();
+    Ptr<aruco::DetectorParameters> detectorParams;
    if(parser.has("dp")) {
-        bool readOk = readDetectorParameters(parser.get<string>("dp"), detectorParams);
+        FileStorage fs(parser.get<string>("dp"), FileStorage::READ);
+        bool readOk = aruco::DetectorParameters::readDetectorParameters(fs.root(), detectorParams);
        if(!readOk) {
            cerr << "Invalid detector parameters file" << endl;
            return 0;
@ -157,8 +118,23 @@ int main(int argc, char *argv[]) {
        return 0;
    }

-    Ptr<aruco::Dictionary> dictionary =
-        aruco::getPredefinedDictionary(aruco::PREDEFINED_DICTIONARY_NAME(dictionaryId));
+    Ptr<aruco::Dictionary> dictionary;
+    if (parser.has("d")) {
+        int dictionaryId = parser.get<int>("d");
+        dictionary = aruco::getPredefinedDictionary(aruco::PREDEFINED_DICTIONARY_NAME(dictionaryId));
+    }
+    else if (parser.has("cd")) {
+        FileStorage fs(parser.get<std::string>("cd"), FileStorage::READ);
+        bool readOk = aruco::Dictionary::readDictionary(fs.root(), dictionary);
+        if(!readOk) {
+            cerr << "Invalid dictionary file" << endl;
+            return 0;
+        }
+    }
+    else {
+        cerr << "Dictionary not specified" << endl;
+        return 0;
+    }

    VideoCapture inputVideo;
    int waitTime;
--- a/modules/aruco/samples/detect_board_charuco.cpp
+++ b/modules/aruco/samples/detect_board_charuco.cpp
@ -41,6 +41,7 @@ the use of this software, even if advised of the possibility of such damage.
 #include <opencv2/aruco/charuco.hpp>
 #include <vector>
 #include <iostream>
+#include "aruco_samples_utility.hpp"

 using namespace std;
 using namespace cv;
@ -57,58 +58,16 @@ const char* keys  =
        "DICT_4X4_1000=3, DICT_5X5_50=4, DICT_5X5_100=5, DICT_5X5_250=6, DICT_5X5_1000=7, "
        "DICT_6X6_50=8, DICT_6X6_100=9, DICT_6X6_250=10, DICT_6X6_1000=11, DICT_7X7_50=12,"
        "DICT_7X7_100=13, DICT_7X7_250=14, DICT_7X7_1000=15, DICT_ARUCO_ORIGINAL = 16}"
+        "{cd       |       | Input file with custom dictionary }"
        "{c        |       | Output file with calibrated camera parameters }"
-        "{v        |       | Input from video file, if ommited, input comes from camera }"
+        "{v        |       | Input from video or image file, if ommited, input comes from camera }"
        "{ci       | 0     | Camera id if input doesnt come from video (-v) }"
        "{dp       |       | File of marker detector parameters }"
        "{rs       |       | Apply refind strategy }"
        "{r        |       | show rejected candidates too }";
 }

-/**
- */
-static bool readCameraParameters(string filename, Mat &camMatrix, Mat &distCoeffs) {
-    FileStorage fs(filename, FileStorage::READ);
-    if(!fs.isOpened())
-        return false;
-    fs["camera_matrix"] >> camMatrix;
-    fs["distortion_coefficients"] >> distCoeffs;
-    return true;
-}
-
-
-/**
- */
-static bool readDetectorParameters(string filename, Ptr<aruco::DetectorParameters> &params) {
-    FileStorage fs(filename, FileStorage::READ);
-    if(!fs.isOpened())
-        return false;
-    fs["adaptiveThreshWinSizeMin"] >> params->adaptiveThreshWinSizeMin;
-    fs["adaptiveThreshWinSizeMax"] >> params->adaptiveThreshWinSizeMax;
-    fs["adaptiveThreshWinSizeStep"] >> params->adaptiveThreshWinSizeStep;
-    fs["adaptiveThreshConstant"] >> params->adaptiveThreshConstant;
-    fs["minMarkerPerimeterRate"] >> params->minMarkerPerimeterRate;
-    fs["maxMarkerPerimeterRate"] >> params->maxMarkerPerimeterRate;
-    fs["polygonalApproxAccuracyRate"] >> params->polygonalApproxAccuracyRate;
-    fs["minCornerDistanceRate"] >> params->minCornerDistanceRate;
-    fs["minDistanceToBorder"] >> params->minDistanceToBorder;
-    fs["minMarkerDistanceRate"] >> params->minMarkerDistanceRate;
-    fs["cornerRefinementMethod"] >> params->cornerRefinementMethod;
-    fs["cornerRefinementWinSize"] >> params->cornerRefinementWinSize;
-    fs["cornerRefinementMaxIterations"] >> params->cornerRefinementMaxIterations;
-    fs["cornerRefinementMinAccuracy"] >> params->cornerRefinementMinAccuracy;
-    fs["markerBorderBits"] >> params->markerBorderBits;
-    fs["perspectiveRemovePixelPerCell"] >> params->perspectiveRemovePixelPerCell;
-    fs["perspectiveRemoveIgnoredMarginPerCell"] >> params->perspectiveRemoveIgnoredMarginPerCell;
-    fs["maxErroneousBitsInBorderRate"] >> params->maxErroneousBitsInBorderRate;
-    fs["minOtsuStdDev"] >> params->minOtsuStdDev;
-    fs["errorCorrectionRate"] >> params->errorCorrectionRate;
-    return true;
-}
-

-/**
- */
 int main(int argc, char *argv[]) {
    CommandLineParser parser(argc, argv, keys);
    parser.about(about);
@ -122,7 +81,6 @@ int main(int argc, char *argv[]) {
    int squaresY = parser.get<int>("h");
    float squareLength = parser.get<float>("sl");
    float markerLength = parser.get<float>("ml");
-    int dictionaryId = parser.get<int>("d");
    bool showRejected = parser.has("r");
    bool refindStrategy = parser.has("rs");
    int camId = parser.get<int>("ci");
@ -141,9 +99,10 @@ int main(int argc, char *argv[]) {
        }
    }

-    Ptr<aruco::DetectorParameters> detectorParams = aruco::DetectorParameters::create();
+    Ptr<aruco::DetectorParameters> detectorParams;
    if(parser.has("dp")) {
-        bool readOk = readDetectorParameters(parser.get<string>("dp"), detectorParams);
+        FileStorage fs(parser.get<string>("dp"), FileStorage::READ);
+        bool readOk = aruco::DetectorParameters::readDetectorParameters(fs.root(), detectorParams);
        if(!readOk) {
            cerr << "Invalid detector parameters file" << endl;
            return 0;
@ -155,8 +114,23 @@ int main(int argc, char *argv[]) {
        return 0;
    }

-    Ptr<aruco::Dictionary> dictionary =
-        aruco::getPredefinedDictionary(aruco::PREDEFINED_DICTIONARY_NAME(dictionaryId));
+    Ptr<aruco::Dictionary> dictionary;
+    if (parser.has("d")) {
+        int dictionaryId = parser.get<int>("d");
+        dictionary = aruco::getPredefinedDictionary(aruco::PREDEFINED_DICTIONARY_NAME(dictionaryId));
+    }
+    else if (parser.has("cd")) {
+        FileStorage fs(parser.get<std::string>("cd"), FileStorage::READ);
+        bool readOk = aruco::Dictionary::readDictionary(fs.root(), dictionary);
+        if(!readOk) {
+            cerr << "Invalid dictionary file" << endl;
+            return 0;
+        }
+    }
+    else {
+        cerr << "Dictionary not specified" << endl;
+        return 0;
+    }

    VideoCapture inputVideo;
    int waitTime;
--- a/modules/aruco/samples/detect_diamonds.cpp
+++ b/modules/aruco/samples/detect_diamonds.cpp
@ -41,6 +41,7 @@ the use of this software, even if advised of the possibility of such damage.
 #include <opencv2/aruco/charuco.hpp>
 #include <vector>
 #include <iostream>
+#include "aruco_samples_utility.hpp"

 using namespace std;
 using namespace cv;
@ -55,6 +56,7 @@ const char* keys  =
        "DICT_4X4_1000=3, DICT_5X5_50=4, DICT_5X5_100=5, DICT_5X5_250=6, DICT_5X5_1000=7, "
        "DICT_6X6_50=8, DICT_6X6_100=9, DICT_6X6_250=10, DICT_6X6_1000=11, DICT_7X7_50=12,"
        "DICT_7X7_100=13, DICT_7X7_250=14, DICT_7X7_1000=15, DICT_ARUCO_ORIGINAL = 16}"
+        "{cd       |       | Input file with custom dictionary }"
        "{c        |       | Output file with calibrated camera parameters }"
        "{as       |       | Automatic scale. The provided number is multiplied by the last"
        "diamond id becoming an indicator of the square length. In this case, the -sl and "
@ -63,53 +65,12 @@ const char* keys  =
        "{ci       | 0     | Camera id if input doesnt come from video (-v) }"
        "{dp       |       | File of marker detector parameters }"
        "{rs       |       | Apply refind strategy }"
+        "{refine   |       | Corner refinement: CORNER_REFINE_NONE=0, CORNER_REFINE_SUBPIX=1,"
+        "CORNER_REFINE_CONTOUR=2, CORNER_REFINE_APRILTAG=3}"
        "{r        |       | show rejected candidates too }";
 }

-/**
- */
-static bool readCameraParameters(string filename, Mat &camMatrix, Mat &distCoeffs) {
-    FileStorage fs(filename, FileStorage::READ);
-    if(!fs.isOpened())
-        return false;
-    fs["camera_matrix"] >> camMatrix;
-    fs["distortion_coefficients"] >> distCoeffs;
-    return true;
-}
-
-
-/**
- */
-static bool readDetectorParameters(string filename, Ptr<aruco::DetectorParameters> &params) {
-    FileStorage fs(filename, FileStorage::READ);
-    if(!fs.isOpened())
-        return false;
-    fs["adaptiveThreshWinSizeMin"] >> params->adaptiveThreshWinSizeMin;
-    fs["adaptiveThreshWinSizeMax"] >> params->adaptiveThreshWinSizeMax;
-    fs["adaptiveThreshWinSizeStep"] >> params->adaptiveThreshWinSizeStep;
-    fs["adaptiveThreshConstant"] >> params->adaptiveThreshConstant;
-    fs["minMarkerPerimeterRate"] >> params->minMarkerPerimeterRate;
-    fs["maxMarkerPerimeterRate"] >> params->maxMarkerPerimeterRate;
-    fs["polygonalApproxAccuracyRate"] >> params->polygonalApproxAccuracyRate;
-    fs["minCornerDistanceRate"] >> params->minCornerDistanceRate;
-    fs["minDistanceToBorder"] >> params->minDistanceToBorder;
-    fs["minMarkerDistanceRate"] >> params->minMarkerDistanceRate;
-    fs["cornerRefinementMethod"] >> params->cornerRefinementMethod;
-    fs["cornerRefinementWinSize"] >> params->cornerRefinementWinSize;
-    fs["cornerRefinementMaxIterations"] >> params->cornerRefinementMaxIterations;
-    fs["cornerRefinementMinAccuracy"] >> params->cornerRefinementMinAccuracy;
-    fs["markerBorderBits"] >> params->markerBorderBits;
-    fs["perspectiveRemovePixelPerCell"] >> params->perspectiveRemovePixelPerCell;
-    fs["perspectiveRemoveIgnoredMarginPerCell"] >> params->perspectiveRemoveIgnoredMarginPerCell;
-    fs["maxErroneousBitsInBorderRate"] >> params->maxErroneousBitsInBorderRate;
-    fs["minOtsuStdDev"] >> params->minOtsuStdDev;
-    fs["errorCorrectionRate"] >> params->errorCorrectionRate;
-    return true;
-}

-
-/**
- */
 int main(int argc, char *argv[]) {
    CommandLineParser parser(argc, argv, keys);
    parser.about(about);
@ -121,20 +82,25 @@ int main(int argc, char *argv[]) {

    float squareLength = parser.get<float>("sl");
    float markerLength = parser.get<float>("ml");
-    int dictionaryId = parser.get<int>("d");
    bool showRejected = parser.has("r");
    bool estimatePose = parser.has("c");
    bool autoScale = parser.has("as");
    float autoScaleFactor = autoScale ? parser.get<float>("as") : 1.f;

-    Ptr<aruco::DetectorParameters> detectorParams = aruco::DetectorParameters::create();
+    Ptr<aruco::DetectorParameters> detectorParams;
    if(parser.has("dp")) {
-        bool readOk = readDetectorParameters(parser.get<string>("dp"), detectorParams);
+        FileStorage fs(parser.get<string>("dp"), FileStorage::READ);
+        bool readOk = aruco::DetectorParameters::readDetectorParameters(fs.root(), detectorParams);
        if(!readOk) {
            cerr << "Invalid detector parameters file" << endl;
            return 0;
        }
    }
+    if (parser.has("refine")) {
+        //override cornerRefinementMethod read from config file
+        detectorParams->cornerRefinementMethod = parser.get<int>("refine");
+    }
+    std::cout << "Corner refinement method (0: None, 1: Subpixel, 2:contour, 3: AprilTag 2): " << detectorParams->cornerRefinementMethod << std::endl;

    int camId = parser.get<int>("ci");
    String video;
@ -148,8 +114,23 @@ int main(int argc, char *argv[]) {
        return 0;
    }

-    Ptr<aruco::Dictionary> dictionary =
-        aruco::getPredefinedDictionary(aruco::PREDEFINED_DICTIONARY_NAME(dictionaryId));
+    Ptr<aruco::Dictionary> dictionary;
+    if (parser.has("d")) {
+        int dictionaryId = parser.get<int>("d");
+        dictionary = aruco::getPredefinedDictionary(aruco::PREDEFINED_DICTIONARY_NAME(dictionaryId));
+    }
+    else if (parser.has("cd")) {
+        FileStorage fs(parser.get<std::string>("cd"), FileStorage::READ);
+        bool readOk = aruco::Dictionary::readDictionary(fs.root(), dictionary);
+        if(!readOk) {
+            cerr << "Invalid dictionary file" << endl;
+            return 0;
+        }
+    }
+    else {
+        cerr << "Dictionary not specified" << endl;
+        return 0;
+    }

    Mat camMatrix, distCoeffs;
    if(estimatePose) {
--- a/modules/aruco/samples/detect_markers.cpp
+++ b/modules/aruco/samples/detect_markers.cpp
@ -40,19 +40,23 @@ the use of this software, even if advised of the possibility of such damage.
 #include <opencv2/highgui.hpp>
 #include <opencv2/aruco.hpp>
 #include <iostream>
+#include "aruco_samples_utility.hpp"

 using namespace std;
 using namespace cv;

 namespace {
 const char* about = "Basic marker detection";
+
+//! [aruco_detect_markers_keys]
 const char* keys  =
        "{d        |       | dictionary: DICT_4X4_50=0, DICT_4X4_100=1, DICT_4X4_250=2,"
        "DICT_4X4_1000=3, DICT_5X5_50=4, DICT_5X5_100=5, DICT_5X5_250=6, DICT_5X5_1000=7, "
        "DICT_6X6_50=8, DICT_6X6_100=9, DICT_6X6_250=10, DICT_6X6_1000=11, DICT_7X7_50=12,"
        "DICT_7X7_100=13, DICT_7X7_250=14, DICT_7X7_1000=15, DICT_ARUCO_ORIGINAL = 16,"
        "DICT_APRILTAG_16h5=17, DICT_APRILTAG_25h9=18, DICT_APRILTAG_36h10=19, DICT_APRILTAG_36h11=20}"
-        "{v        |       | Input from video file, if ommited, input comes from camera }"
+        "{cd       |       | Input file with custom dictionary }"
+        "{v        |       | Input from video or image file, if ommited, input comes from camera }"
        "{ci       | 0     | Camera id if input doesnt come from video (-v) }"
        "{c        |       | Camera intrinsic parameters. Needed for camera pose }"
        "{l        | 0.1   | Marker side length (in meters). Needed for correct scale in camera pose }"
@ -61,53 +65,8 @@ const char* keys  =
        "{refine   |       | Corner refinement: CORNER_REFINE_NONE=0, CORNER_REFINE_SUBPIX=1,"
        "CORNER_REFINE_CONTOUR=2, CORNER_REFINE_APRILTAG=3}";
 }
+//! [aruco_detect_markers_keys]

-/**
- */
-static bool readCameraParameters(string filename, Mat &camMatrix, Mat &distCoeffs) {
-    FileStorage fs(filename, FileStorage::READ);
-    if(!fs.isOpened())
-        return false;
-    fs["camera_matrix"] >> camMatrix;
-    fs["distortion_coefficients"] >> distCoeffs;
-    return true;
-}
-
-
-
-/**
- */
-static bool readDetectorParameters(string filename, Ptr<aruco::DetectorParameters> &params) {
-    FileStorage fs(filename, FileStorage::READ);
-    if(!fs.isOpened())
-        return false;
-    fs["adaptiveThreshWinSizeMin"] >> params->adaptiveThreshWinSizeMin;
-    fs["adaptiveThreshWinSizeMax"] >> params->adaptiveThreshWinSizeMax;
-    fs["adaptiveThreshWinSizeStep"] >> params->adaptiveThreshWinSizeStep;
-    fs["adaptiveThreshConstant"] >> params->adaptiveThreshConstant;
-    fs["minMarkerPerimeterRate"] >> params->minMarkerPerimeterRate;
-    fs["maxMarkerPerimeterRate"] >> params->maxMarkerPerimeterRate;
-    fs["polygonalApproxAccuracyRate"] >> params->polygonalApproxAccuracyRate;
-    fs["minCornerDistanceRate"] >> params->minCornerDistanceRate;
-    fs["minDistanceToBorder"] >> params->minDistanceToBorder;
-    fs["minMarkerDistanceRate"] >> params->minMarkerDistanceRate;
-    fs["cornerRefinementMethod"] >> params->cornerRefinementMethod;
-    fs["cornerRefinementWinSize"] >> params->cornerRefinementWinSize;
-    fs["cornerRefinementMaxIterations"] >> params->cornerRefinementMaxIterations;
-    fs["cornerRefinementMinAccuracy"] >> params->cornerRefinementMinAccuracy;
-    fs["markerBorderBits"] >> params->markerBorderBits;
-    fs["perspectiveRemovePixelPerCell"] >> params->perspectiveRemovePixelPerCell;
-    fs["perspectiveRemoveIgnoredMarginPerCell"] >> params->perspectiveRemoveIgnoredMarginPerCell;
-    fs["maxErroneousBitsInBorderRate"] >> params->maxErroneousBitsInBorderRate;
-    fs["minOtsuStdDev"] >> params->minOtsuStdDev;
-    fs["errorCorrectionRate"] >> params->errorCorrectionRate;
-    return true;
-}
-
-
-
-/**
- */
 int main(int argc, char *argv[]) {
    CommandLineParser parser(argc, argv, keys);
    parser.about(about);
@ -117,14 +76,14 @@ int main(int argc, char *argv[]) {
        return 0;
    }

-    int dictionaryId = parser.get<int>("d");
    bool showRejected = parser.has("r");
    bool estimatePose = parser.has("c");
    float markerLength = parser.get<float>("l");

-    Ptr<aruco::DetectorParameters> detectorParams = aruco::DetectorParameters::create();
+    Ptr<aruco::DetectorParameters> detectorParams;
    if(parser.has("dp")) {
-        bool readOk = readDetectorParameters(parser.get<string>("dp"), detectorParams);
+        FileStorage fs(parser.get<string>("dp"), FileStorage::READ);
+        bool readOk = aruco::DetectorParameters::readDetectorParameters(fs.root(), detectorParams);
        if(!readOk) {
            cerr << "Invalid detector parameters file" << endl;
            return 0;
@ -149,8 +108,23 @@ int main(int argc, char *argv[]) {
        return 0;
    }

-    Ptr<aruco::Dictionary> dictionary =
-        aruco::getPredefinedDictionary(aruco::PREDEFINED_DICTIONARY_NAME(dictionaryId));
+    Ptr<aruco::Dictionary> dictionary;
+    if (parser.has("d")) {
+        int dictionaryId = parser.get<int>("d");
+        dictionary = aruco::getPredefinedDictionary(aruco::PREDEFINED_DICTIONARY_NAME(dictionaryId));
+    }
+    else if (parser.has("cd")) {
+        FileStorage fs(parser.get<std::string>("cd"), FileStorage::READ);
+        bool readOk = aruco::Dictionary::readDictionary(fs.root(), dictionary);
+        if(!readOk) {
+            std::cerr << "Invalid dictionary file" << std::endl;
+            return 0;
+        }
+    }
+    else {
+        std::cerr << "Dictionary not specified" << std::endl;
+        return 0;
+    }

    Mat camMatrix, distCoeffs;
    if(estimatePose) {
--- a/modules/aruco/samples/detector_params.yml
+++ b/modules/aruco/samples/detector_params.yml
@ -1,5 +1,4 @@
 %YAML:1.0
-nmarkers: 1024
 adaptiveThreshWinSizeMin: 3
 adaptiveThreshWinSizeMax: 23
 adaptiveThreshWinSizeStep: 10
--- a/modules/aruco/samples/tutorial_camera_charuco.yml
+++ b/modules/aruco/samples/tutorial_camera_charuco.yml
@ -0,0 +1,21 @@
+%YAML:1.0
+---
+calibration_time: "Wed 08 Dec 2021 05:13:09 PM MSK"
+image_width: 640
+image_height: 480
+flags: 0
+camera_matrix: !!opencv-matrix
+   rows: 3
+   cols: 3
+   dt: d
+   data: [ 4.5251072219637672e+02, 0., 3.1770297317353277e+02, 0.,
+       4.5676707935146891e+02, 2.7775155919135995e+02, 0., 0., 1. ]
+distortion_coefficients: !!opencv-matrix
+   rows: 1
+   cols: 5
+   dt: d
+   data: [ 1.2136925618707872e-01, -1.0854664722560681e+00,
+       1.1786843796668460e-04, -4.6240686046485508e-04,
+       2.9542589406810080e+00 ]
+avg_reprojection_error: 1.8234905535936044e-01
+info: "The camera calibration parameters were obtained by img_00.jpg-img_03.jpg from aruco/tutorials/aruco_calibration/images"
--- a/modules/aruco/samples/tutorial_camera_params.yml
+++ b/modules/aruco/samples/tutorial_camera_params.yml
@ -0,0 +1,14 @@
+%YAML:1.0
+camera_matrix: !!opencv-matrix
+   rows: 3
+   cols: 3
+   dt: d
+   data: [ 628.158, 0., 324.099,
+       0., 628.156, 260.908,
+       0., 0., 1. ]
+distortion_coefficients: !!opencv-matrix
+   rows: 5
+   cols: 1
+   dt: d
+   data: [ 0.0995485, -0.206384,
+       0.00754589, 0.00336531, 0 ]
--- a/modules/aruco/samples/tutorial_charuco_create_detect.cpp
+++ b/modules/aruco/samples/tutorial_charuco_create_detect.cpp
@ -4,27 +4,14 @@
 #include <opencv2/highgui.hpp>
 #include <iostream>
 #include <string>
+#include "aruco_samples_utility.hpp"

 namespace {
 const char* about = "A tutorial code on charuco board creation and detection of charuco board with and without camera caliberation";
 const char* keys = "{c        |       | Put value of c=1 to create charuco board;\nc=2 to detect charuco board without camera calibration;\nc=3 to detect charuco board with camera calibration and Pose Estimation}";
 }

-void createBoard();
-void detectCharucoBoardWithCalibrationPose();
-void detectCharucoBoardWithoutCalibration();
-
-static bool readCameraParameters(std::string filename, cv::Mat& camMatrix, cv::Mat& distCoeffs)
-{
-    cv::FileStorage fs(filename, cv::FileStorage::READ);
-    if (!fs.isOpened())
-        return false;
-    fs["camera_matrix"] >> camMatrix;
-    fs["distortion_coefficients"] >> distCoeffs;
-    return (camMatrix.size() == cv::Size(3,3)) ;
-}
-
-void createBoard()
+static inline void createBoard()
 {
    cv::Ptr<cv::aruco::Dictionary> dictionary = cv::aruco::getPredefinedDictionary(cv::aruco::DICT_6X6_250);
    //! [createBoard]
@ -36,7 +23,7 @@ void createBoard()
 }

 //! [detwcp]
-void detectCharucoBoardWithCalibrationPose()
+static inline void detectCharucoBoardWithCalibrationPose()
 {
    cv::VideoCapture inputVideo;
    inputVideo.open(0);
@ -81,9 +68,8 @@ void detectCharucoBoardWithCalibrationPose()
                    //! [detcor]
                    cv::Vec3d rvec, tvec;
                    //! [pose]
-                    // cv::aruco::estimatePoseCharucoBoard(charucoCorners, charucoIds, board, cameraMatrix, distCoeffs, rvec, tvec);
-                    //! [pose]
                    bool valid = cv::aruco::estimatePoseCharucoBoard(charucoCorners, charucoIds, board, cameraMatrix, distCoeffs, rvec, tvec);
+                    //! [pose]
                    // if charuco pose is valid
                    if (valid)
                        cv::aruco::drawAxis(imageCopy, cameraMatrix, distCoeffs, rvec, tvec, 0.1f);
@ -99,7 +85,7 @@ void detectCharucoBoardWithCalibrationPose()
 //! [detwcp]

 //! [detwc]
-void detectCharucoBoardWithoutCalibration()
+static inline void detectCharucoBoardWithoutCalibration()
 {
    cv::VideoCapture inputVideo;
    inputVideo.open(0);
--- a/modules/aruco/samples/tutorial_dict.yml
+++ b/modules/aruco/samples/tutorial_dict.yml
@ -0,0 +1,38 @@
+%YAML:1.0
+nmarkers: 35
+markersize: 6
+marker_0: "101011111011111001001001101100000000"
+marker_1: "000000000010011001010011111010111000"
+marker_2: "011001100000001010000101111101001101"
+marker_3: "001000111111000111011001110000011111"
+marker_4: "100110110100101111000000111101110011"
+marker_5: "010101101110111000111010111100010111"
+marker_6: "101001000110011110101001010100110100"
+marker_7: "011010100100110000011101110110100010"
+marker_8: "111110001000101000110001010010111101"
+marker_9: "011101101100110111001100100001010100"
+marker_10: "100001100001010001110001011000000111"
+marker_11: "110010010010011100101111111000001111"
+marker_12: "110101001001010110011111010110001101"
+marker_13: "001111000001000100010001101001010001"
+marker_14: "000000010010101010111110110011010011"
+marker_15: "110001110111100101110011111100111010"
+marker_16: "101011001110001010110011111011001110"
+marker_17: "101110111101110100101101011001010111"
+marker_18: "000100111000111101010011010101000101"
+marker_19: "001110001110001101100101110100000011"
+marker_20: "100101101100010110110110110001100011"
+marker_21: "010110001001011010000100111000110110"
+marker_22: "001000000000100100000000010100010010"
+marker_23: "101001110010100110000111111010010000"
+marker_24: "111001101010001100011010010001011100"
+marker_25: "101000010001010000110100111101101001"
+marker_26: "101010000001010011001010110110000001"
+marker_27: "100101001000010101001000111101111110"
+marker_28: "010010100110010011110001110101011100"
+marker_29: "011001000101100001101111010001001111"
+marker_30: "000111011100011110001101111011011001"
+marker_31: "010100001011000100111101110001101010"
+marker_32: "100101101001101010111111101101110100"
+marker_33: "101101001010111000000100110111010101"
+marker_34: "011111010000111011111110110101100101"
--- a/modules/aruco/src/aruco.cpp
+++ b/modules/aruco/src/aruco.cpp
@ -100,6 +100,48 @@ Ptr<DetectorParameters> DetectorParameters::create() {
    return params;
 }

+template<typename T>
+static inline bool readParameter(const FileNode& node, T& parameter)
+{
+    if (!node.empty()) {
+        node >> parameter;
+        return true;
+    }
+    return false;
+}
+
+/**
+  * @brief Read a new set of DetectorParameters from FileStorage.
+  */
+bool DetectorParameters::readDetectorParameters(const FileNode& fn, Ptr<DetectorParameters>& params)
+{
+    if(fn.empty())
+        return true;
+    params = DetectorParameters::create();
+    bool checkRead = false;
+    checkRead |= readParameter(fn["adaptiveThreshWinSizeMin"], params->adaptiveThreshWinSizeMin);
+    checkRead |= readParameter(fn["adaptiveThreshWinSizeMax"], params->adaptiveThreshWinSizeMax);
+    checkRead |= readParameter(fn["adaptiveThreshWinSizeStep"], params->adaptiveThreshWinSizeStep);
+    checkRead |= readParameter(fn["adaptiveThreshConstant"], params->adaptiveThreshConstant);
+    checkRead |= readParameter(fn["minMarkerPerimeterRate"], params->minMarkerPerimeterRate);
+    checkRead |= readParameter(fn["maxMarkerPerimeterRate"], params->maxMarkerPerimeterRate);
+    checkRead |= readParameter(fn["polygonalApproxAccuracyRate"], params->polygonalApproxAccuracyRate);
+    checkRead |= readParameter(fn["minCornerDistanceRate"], params->minCornerDistanceRate);
+    checkRead |= readParameter(fn["minDistanceToBorder"], params->minDistanceToBorder);
+    checkRead |= readParameter(fn["minMarkerDistanceRate"], params->minMarkerDistanceRate);
+    checkRead |= readParameter(fn["cornerRefinementMethod"], params->cornerRefinementMethod);
+    checkRead |= readParameter(fn["cornerRefinementWinSize"], params->cornerRefinementWinSize);
+    checkRead |= readParameter(fn["cornerRefinementMaxIterations"], params->cornerRefinementMaxIterations);
+    checkRead |= readParameter(fn["cornerRefinementMinAccuracy"], params->cornerRefinementMinAccuracy);
+    checkRead |= readParameter(fn["markerBorderBits"], params->markerBorderBits);
+    checkRead |= readParameter(fn["perspectiveRemovePixelPerCell"], params->perspectiveRemovePixelPerCell);
+    checkRead |= readParameter(fn["perspectiveRemoveIgnoredMarginPerCell"], params->perspectiveRemoveIgnoredMarginPerCell);
+    checkRead |= readParameter(fn["maxErroneousBitsInBorderRate"], params->maxErroneousBitsInBorderRate);
+    checkRead |= readParameter(fn["minOtsuStdDev"], params->minOtsuStdDev);
+    checkRead |= readParameter(fn["errorCorrectionRate"], params->errorCorrectionRate);
+    return checkRead;
+}
+

 /**
  * @brief Convert input image to gray if it is a 3-channels image
--- a/modules/aruco/src/dictionary.cpp
+++ b/modules/aruco/src/dictionary.cpp
@ -84,6 +84,36 @@ Ptr<Dictionary> Dictionary::create(int nMarkers, int markerSize,
    return generateCustomDictionary(nMarkers, markerSize, baseDictionary, randomSeed);
 }

+template<typename T>
+static inline bool readParameter(const FileNode& node, T& parameter)
+{
+    if (!node.empty()) {
+        node >> parameter;
+        return true;
+    }
+    return false;
+}
+
+bool Dictionary::readDictionary(const cv::FileNode& fn, cv::Ptr<cv::aruco::Dictionary> &dictionary)
+{
+    int nMarkers = 0, markerSize = 0;
+    if(fn.empty() || !readParameter(fn["nmarkers"], nMarkers) || !readParameter(fn["markersize"], markerSize))
+        return false;
+    cv::Mat bytes(0, 0, CV_8UC1), marker(markerSize, markerSize, CV_8UC1);
+    std::string markerString;
+    for (int i = 0; i < nMarkers; i++) {
+        std::ostringstream ostr;
+        ostr << i;
+        if (!readParameter(fn["marker_" + ostr.str()], markerString))
+            return false;
+
+        for (int j = 0; j < (int) markerString.size(); j++)
+            marker.at<unsigned char>(j) = (markerString[j] == '0') ? 0 : 1;
+        bytes.push_back(cv::aruco::Dictionary::getByteListFromBits(marker));
+    }
+    dictionary = cv::makePtr<cv::aruco::Dictionary>(bytes, markerSize);
+    return true;
+}

 /**
 */
--- a/modules/aruco/test/test_arucodetection.cpp
+++ b/modules/aruco/test/test_arucodetection.cpp
@ -36,8 +36,8 @@ or tort (including negligence or otherwise) arising in any way out of
 the use of this software, even if advised of the possibility of such damage.
 */

-
 #include "test_precomp.hpp"
+#include <opencv2/core/utils/logger.defines.hpp>

 namespace opencv_test { namespace {

@ -554,4 +554,94 @@ TEST(CV_ArucoBitCorrection, algorithmic) {
    test.safe_run();
 }

+TEST(CV_ArucoTutorial, can_find_singlemarkersoriginal)
+{
+    string img_path = cvtest::findDataFile("singlemarkersoriginal.jpg", false);
+    Mat image = imread(img_path);
+    Ptr<aruco::Dictionary> dictionary =  aruco::getPredefinedDictionary(aruco::DICT_6X6_250);
+    Ptr<aruco::DetectorParameters> detectorParams = aruco::DetectorParameters::create();
+
+    vector< int > ids;
+    vector< vector< Point2f > > corners, rejected;
+    const size_t N = 6ull;
+    // corners of ArUco markers with indices goldCornersIds
+    const int goldCorners[N][8] = { {359,310, 404,310, 410,350, 362,350}, {427,255, 469,256, 477,289, 434,288},
+                                    {233,273, 190,273, 196,241, 237,241}, {298,185, 334,186, 335,212, 297,211},
+                                    {425,163, 430,186, 394,186, 390,162}, {195,155, 230,155, 227,178, 190,178} };
+    const int goldCornersIds[N] = { 40, 98, 62, 23, 124, 203};
+    map<int, const int*> mapGoldCorners;
+    for (size_t i = 0; i < N; i++)
+        mapGoldCorners[goldCornersIds[i]] = goldCorners[i];
+
+    aruco::detectMarkers(image, dictionary, corners, ids, detectorParams, rejected);
+
+    ASSERT_EQ(N, ids.size());
+    for (size_t i = 0; i < N; i++)
+    {
+        int arucoId = ids[i];
+        ASSERT_EQ(4ull, corners[i].size());
+        ASSERT_TRUE(mapGoldCorners.find(arucoId) != mapGoldCorners.end());
+        for (int j = 0; j < 4; j++)
+        {
+            EXPECT_NEAR(static_cast<float>(mapGoldCorners[arucoId][j * 2]), corners[i][j].x, 1.f);
+            EXPECT_NEAR(static_cast<float>(mapGoldCorners[arucoId][j * 2 + 1]), corners[i][j].y, 1.f);
+        }
+    }
+}
+
+TEST(CV_ArucoTutorial, can_find_gboriginal)
+{
+    string imgPath = cvtest::findDataFile("gboriginal.png", false);
+    Mat image = imread(imgPath);
+    string dictPath = cvtest::findDataFile("tutorial_dict.yml", false);
+    cv::Ptr<cv::aruco::Dictionary> dictionary;
+
+    FileStorage fs(dictPath, FileStorage::READ);
+    aruco::Dictionary::readDictionary(fs.root(), dictionary); // set marker from tutorial_dict.yml
+
+    Ptr<aruco::DetectorParameters> detectorParams = aruco::DetectorParameters::create();
+
+    vector< int > ids;
+    vector< vector< Point2f > > corners, rejected;
+    const size_t N = 35ull;
+    // corners of ArUco markers with indices 0, 1, ..., 34
+    const int goldCorners[N][8] = { {252,74, 286,81, 274,102, 238,95},    {295,82, 330,89, 319,111, 282,104},
+                                    {338,91, 375,99, 365,121, 327,113},   {383,100, 421,107, 412,130, 374,123},
+                                    {429,109, 468,116, 461,139, 421,132}, {235,100, 270,108, 257,130, 220,122},
+                                    {279,109, 316,117, 304,140, 266,133}, {324,119, 362,126, 352,150, 313,143},
+                                    {371,128, 410,136, 400,161, 360,152}, {418,139, 459,145, 451,170, 410,163},
+                                    {216,128, 253,136, 239,161, 200,152}, {262,138, 300,146, 287,172, 248,164},
+                                    {309,148, 349,156, 337,183, 296,174}, {358,158, 398,167, 388,194, 346,185},
+                                    {407,169, 449,176, 440,205, 397,196}, {196,158, 235,168, 218,195, 179,185},
+                                    {243,170, 283,178, 269,206, 228,197}, {293,180, 334,190, 321,218, 279,209},
+                                    {343,192, 385,200, 374,230, 330,220}, {395,203, 438,211, 429,241, 384,233},
+                                    {174,192, 215,201, 197,231, 156,221}, {223,204, 265,213, 249,244, 207,234},
+                                    {275,215, 317,225, 303,257, 259,246}, {327,227, 371,238, 359,270, 313,259},
+                                    {381,240, 426,249, 416,282, 369,273}, {151,228, 193,238, 173,271, 130,260},
+                                    {202,241, 245,251, 228,285, 183,274}, {255,254, 300,264, 284,299, 238,288},
+                                    {310,267, 355,278, 342,314, 295,302}, {366,281, 413,290, 402,327, 353,317},
+                                    {125,267, 168,278, 147,314, 102,303}, {178,281, 223,293, 204,330, 157,317},
+                                    {233,296, 280,307, 263,346, 214,333}, {291,310, 338,322, 323,363, 274,349},
+                                    {349,325, 399,336, 386,378, 335,366} };
+    map<int, const int*> mapGoldCorners;
+    for (int i = 0; i < static_cast<int>(N); i++)
+        mapGoldCorners[i] = goldCorners[i];
+
+    aruco::detectMarkers(image, dictionary, corners, ids, detectorParams, rejected);
+
+
+    ASSERT_EQ(N, ids.size());
+    for (size_t i = 0; i < N; i++)
+    {
+        int arucoId = ids[i];
+        ASSERT_EQ(4ull, corners[i].size());
+        ASSERT_TRUE(mapGoldCorners.find(arucoId) != mapGoldCorners.end());
+        for (int j = 0; j < 4; j++)
+        {
+            EXPECT_NEAR(static_cast<float>(mapGoldCorners[arucoId][j*2]), corners[i][j].x, 1.f);
+            EXPECT_NEAR(static_cast<float>(mapGoldCorners[arucoId][j*2+1]), corners[i][j].y, 1.f);
+        }
+    }
+}
+
 }} // namespace
--- a/modules/aruco/test/test_charucodetection.cpp
+++ b/modules/aruco/test/test_charucodetection.cpp
@ -733,4 +733,120 @@ TEST(Charuco, testBoardSubpixelCoords)
    EXPECT_NEAR(0, cvtest::norm(expected_corners, c_corners.reshape(1), NORM_INF), 1e-3);
 }

+TEST(CV_ArucoTutorial, can_find_choriginal)
+{
+    string imgPath = cvtest::findDataFile("choriginal.jpg", false);
+    Mat image = imread(imgPath);
+    cv::Ptr<cv::aruco::Dictionary> dictionary = aruco::getPredefinedDictionary(aruco::DICT_6X6_250);
+    Ptr<aruco::DetectorParameters> detectorParams = aruco::DetectorParameters::create();
+
+    vector< int > ids;
+    vector< vector< Point2f > > corners, rejected;
+    const size_t N = 17ull;
+    // corners of aruco markers with indices goldCornersIds
+    const int goldCorners[N][8] = { {268,77,  290,80,  286,97,  263,94},  {360,90,  382,93,  379,111, 357,108},
+                                    {211,106, 233,109, 228,127, 205,123}, {306,120, 328,124, 325,142, 302,138},
+                                    {402,135, 425,139, 423,157, 400,154}, {247,152, 271,155, 267,174, 242,171},
+                                    {347,167, 371,171, 369,191, 344,187}, {185,185, 209,189, 203,210, 178,206},
+                                    {288,201, 313,206, 309,227, 284,223}, {393,218, 418,222, 416,245, 391,241},
+                                    {223,240, 250,244, 244,268, 217,263}, {333,258, 359,262, 356,286, 329,282},
+                                    {152,281, 179,285, 171,312, 143,307}, {267,300, 294,305, 289,331, 261,327},
+                                    {383,319, 410,324, 408,351, 380,347}, {194,347, 223,352, 216,382, 186,377},
+                                    {315,368, 345,373, 341,403, 310,398} };
+    map<int, const int*> mapGoldCorners;
+    for (int i = 0; i < static_cast<int>(N); i++)
+        mapGoldCorners[i] = goldCorners[i];
+
+    aruco::detectMarkers(image, dictionary, corners, ids, detectorParams, rejected);
+
+    ASSERT_EQ(N, ids.size());
+    for (size_t i = 0; i < N; i++)
+    {
+        int arucoId = ids[i];
+        ASSERT_EQ(4ull, corners[i].size());
+        ASSERT_TRUE(mapGoldCorners.find(arucoId) != mapGoldCorners.end());
+        for (int j = 0; j < 4; j++)
+        {
+            EXPECT_NEAR(static_cast<float>(mapGoldCorners[arucoId][j * 2]), corners[i][j].x, 1.f);
+            EXPECT_NEAR(static_cast<float>(mapGoldCorners[arucoId][j * 2 + 1]), corners[i][j].y, 1.f);
+        }
+    }
+}
+
+TEST(CV_ArucoTutorial, can_find_chocclusion)
+{
+    string imgPath = cvtest::findDataFile("chocclusion_original.jpg", false);
+    Mat image = imread(imgPath);
+    cv::Ptr<cv::aruco::Dictionary> dictionary = aruco::getPredefinedDictionary(aruco::DICT_6X6_250);
+    Ptr<aruco::DetectorParameters> detectorParams = aruco::DetectorParameters::create();
+
+    vector< int > ids;
+    vector< vector< Point2f > > corners, rejected;
+    const size_t N = 13ull;
+    // corners of aruco markers with indices goldCornersIds
+    const int goldCorners[N][8] = { {301,57, 322,62, 317,79, 295,73}, {391,80, 413,85, 408,103, 386,97},
+                                    {242,79, 264,85, 256,102, 234,96}, {334,103, 357,109, 352,126, 329,121},
+                                    {428,129, 451,134, 448,152, 425,146}, {274,128, 296,134, 290,153, 266,147},
+                                    {371,154, 394,160, 390,180, 366,174}, {208,155, 232,161, 223,181, 199,175},
+                                    {309,182, 333,188, 327,209, 302,203}, {411,210, 436,216, 432,238, 407,231},
+                                    {241,212, 267,219, 258,242, 232,235}, {167,244, 194,252, 183,277, 156,269},
+                                    {202,314, 230,322, 220,349, 191,341} };
+    map<int, const int*> mapGoldCorners;
+    const int goldCornersIds[N] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 15};
+    for (int i = 0; i < static_cast<int>(N); i++)
+        mapGoldCorners[goldCornersIds[i]] = goldCorners[i];
+
+    aruco::detectMarkers(image, dictionary, corners, ids, detectorParams, rejected);
+
+    ASSERT_EQ(N, ids.size());
+    for (size_t i = 0; i < N; i++)
+    {
+        int arucoId = ids[i];
+        ASSERT_EQ(4ull, corners[i].size());
+        ASSERT_TRUE(mapGoldCorners.find(arucoId) != mapGoldCorners.end());
+        for (int j = 0; j < 4; j++)
+        {
+            EXPECT_NEAR(static_cast<float>(mapGoldCorners[arucoId][j * 2]), corners[i][j].x, 1.f);
+            EXPECT_NEAR(static_cast<float>(mapGoldCorners[arucoId][j * 2 + 1]), corners[i][j].y, 1.f);
+        }
+    }
+}
+
+TEST(CV_ArucoTutorial, can_find_diamondmarkers)
+{
+    string imgPath = cvtest::findDataFile("diamondmarkers.png", false);
+    Mat image = imread(imgPath);
+
+    string dictPath = cvtest::findDataFile("tutorial_dict.yml", false);
+    cv::Ptr<cv::aruco::Dictionary> dictionary;
+    FileStorage fs(dictPath, FileStorage::READ);
+    aruco::Dictionary::readDictionary(fs.root(), dictionary); // set marker from tutorial_dict.yml
+
+    string detectorPath = cvtest::findDataFile("detector_params.yml", false);
+    fs = FileStorage(detectorPath, FileStorage::READ);
+    Ptr<aruco::DetectorParameters> detectorParams;
+    aruco::DetectorParameters::readDetectorParameters(fs.root(), detectorParams);
+    detectorParams->cornerRefinementMethod = 3;
+
+    vector< int > ids;
+    vector< vector< Point2f > > corners, rejected;
+    const size_t N = 12ull;
+    // corner indices of ArUco markers
+    const int goldCornersIds[N] = { 4, 12, 11, 3, 12, 10, 12, 10, 10, 11, 2, 11 };
+    map<int, int> counterGoldCornersIds;
+    for (int i = 0; i < static_cast<int>(N); i++)
+        counterGoldCornersIds[goldCornersIds[i]]++;
+
+    aruco::detectMarkers(image, dictionary, corners, ids, detectorParams, rejected);
+    map<int, int> counterRes;
+    for (size_t i = 0; i < N; i++)
+    {
+        int arucoId = ids[i];
+        counterRes[arucoId]++;
+    }
+
+    ASSERT_EQ(N, ids.size());
+    EXPECT_EQ(counterGoldCornersIds, counterRes); // check the number of ArUco markers
+}
+
 }} // namespace
--- a/modules/aruco/test/test_main.cpp
+++ b/modules/aruco/test/test_main.cpp
@ -3,4 +3,6 @@
 // of this distribution and at http://opencv.org/license.html.
 #include "test_precomp.hpp"

-CV_TEST_MAIN("cv")
+CV_TEST_MAIN("cv",
+    cvtest::addDataSearchSubDirectory("contrib/aruco")
+)
--- a/modules/aruco/tutorials/aruco_board_detection/aruco_board_detection.markdown
+++ b/modules/aruco/tutorials/aruco_board_detection/aruco_board_detection.markdown
@ -1,6 +1,9 @@
 Detection of ArUco Boards {#tutorial_aruco_board_detection}
 ==============================

+@prev_tutorial{tutorial_aruco_detection}
+@next_tutorial{tutorial_charuco_detection}
+
 An ArUco Board is a set of markers that acts like a single marker in the sense that it provides a
 single pose for the camera.

@ -55,7 +58,8 @@ The aruco module provides a specific function, ```estimatePoseBoard()```, to per
    cv::Mat inputImage;
    // camera parameters are read from somewhere
    cv::Mat cameraMatrix, distCoeffs;
-    readCameraParameters(cameraMatrix, distCoeffs);
+    // You can read camera parameters from tutorial_camera_params.yml
+    readCameraParameters(filename, cameraMatrix, distCoeffs); // This function is located in detect_board.cpp
    // assume we have a function to create the board object
    cv::Ptr<cv::aruco::Board> board = cv::aruco::Board::create();
    ...
@ -153,11 +157,11 @@ The output image will be something like this:

 ![](images/board.jpg)

-A full working example of board creation is included in the ```create_board.cpp``` inside the module samples folder.
+A full working example of board creation is included in the `create_board.cpp` inside the `modules/aruco/samples/`.

 Note: The samples now take input via commandline via the [OpenCV Commandline Parser](http://docs.opencv.org/trunk/d0/d2e/classcv_1_1CommandLineParser.html#gsc.tab=0). For this file the example parameters will look like
@code{.cpp}
-    "_output path_/aboard.png" -w=5 -h=7 -l=100 -s=10 -d=10
+    "_output_path_/aboard.png" -w=5 -h=7 -l=100 -s=10 -d=10
@endcode

 Finally, a full example of board detection:
@ -167,10 +171,12 @@ Finally, a full example of board detection:
    inputVideo.open(0);

    cv::Mat cameraMatrix, distCoeffs;
-    // camera parameters are read from somewhere
-    readCameraParameters(cameraMatrix, distCoeffs);
+    // You can read camera parameters from tutorial_camera_params.yml
+    readCameraParameters(filename, cameraMatrix, distCoeffs); // This function is located in detect_board.cpp

    cv::Ptr<cv::aruco::Dictionary> dictionary = cv::aruco::getPredefinedDictionary(cv::aruco::DICT_6X6_250);
+    // To use tutorial sample, you need read custome dictionaty from tutorial_dict.yml
+    readDictionary(filename, dictionary); // This function is located in detect_board.cpp
    cv::Ptr<cv::aruco::GridBoard> board = cv::aruco::GridBoard::create(5, 7, 0.04, 0.01, dictionary);

    while (inputVideo.grab()) {
@ -207,14 +213,20 @@ Sample video:
 <iframe width="420" height="315" src="https://www.youtube.com/embed/Q1HlJEjW_j0" frameborder="0" allowfullscreen></iframe>
@endhtmlonly

-A full working example is included in the ```detect_board.cpp``` inside the module samples folder.
+A full working example is included in the `detect_board.cpp` inside the `modules/aruco/samples/`.

 Note: The samples now take input via commandline via the [OpenCV Commandline Parser](http://docs.opencv.org/trunk/d0/d2e/classcv_1_1CommandLineParser.html#gsc.tab=0). For this file the example parameters will look like
@code{.cpp}
-    -c="_path_"/calib.txt" "_path_/aboard.png" -w=5 -h=7 -l=100 -s=10 -d=10
+    -w=5 -h=7 -l=100 -s=10
+    -v=/path_to_aruco_tutorials/aruco_board_detection/images/gboriginal.png
+    -c=/path_to_aruco_samples/tutorial_camera_params.yml
+    -cd=/path_to_aruco_samples/tutorial_dict.yml
@endcode
-
-
+Parameters for `detect_board.cpp`:
+@snippet samples/detect_board.cpp aruco_detect_board_keys
+@note To work with examples from the tutorial, you can use camera parameters from `tutorial_camera_params.yml` and
+you need use custom dictionary from `tutorial_dict.yml`.
+An example of usage in `detect_board.cpp`.

 Refine marker detection
 -----
--- a/modules/aruco/tutorials/aruco_calibration/aruco_calibration.markdown
+++ b/modules/aruco/tutorials/aruco_calibration/aruco_calibration.markdown
@ -1,6 +1,9 @@
 Calibration with ArUco and ChArUco {#tutorial_aruco_calibration}
 ==============================

+@prev_tutorial{tutorial_charuco_diamond_detection}
+@next_tutorial{tutorial_aruco_faq}
+
 The ArUco module can also be used to calibrate a camera. Camera calibration consists in obtaining the
 camera intrinsic parameters and distortion coefficients. This parameters remain fixed unless the camera
 optic is modified, thus camera calibration only need to be done once.
@ -59,14 +62,16 @@ in each of the viewpoints.
 Finally, the ```calibrationFlags``` parameter determines some of the options for the calibration. Its format is equivalent to the flags parameter in the OpenCV
 ```calibrateCamera()``` function.

-A full working example is included in the ```calibrate_camera_charuco.cpp``` inside the module samples folder.
+A full working example is included in the `calibrate_camera_charuco.cpp` inside the `modules/aruco/samples/`.

 Note: The samples now take input via commandline via the [OpenCV Commandline Parser](http://docs.opencv.org/trunk/d0/d2e/classcv_1_1CommandLineParser.html#gsc.tab=0). For this file the example parameters will look like
@code{.cpp}
-    _output path_" -dp="_path_/detector_params.yml" -w=5 -h=7 -sl=0.04 -ml=0.02 -d=10
+    "output_path/camera_calib.txt" -w=5 -h=7 -sl=0.04 -ml=0.02 -d=10
+    -v="path_aruco/tutorials/aruco_calibration/images/img_%02d.jpg
+    -c=path_aruco/samples/tutorial_camera_params.yml
@endcode

-
+The camera calibration parameters from `samples/tutorial_camera_charuco.yml` were obtained by `aruco_calibration/images/img_00.jpg-img_03.jpg`.

 Calibration with ArUco Boards
 ------
@ -104,7 +109,7 @@ In this case, and contrary to the ```calibrateCameraCharuco()``` function, the d
 The rest of parameters are the same than in ```calibrateCameraCharuco()```, except the board layout object which does not need to be a ```CharucoBoard``` object, it can be
 any ```Board``` object.

-A full working example is included in the ```calibrate_camera.cpp``` inside the module samples folder.
+A full working example is included in the `calibrate_camera.cpp` inside the `modules/aruco/samples/`.

 Note: The samples now take input via commandline via the [OpenCV Commandline Parser](http://docs.opencv.org/trunk/d0/d2e/classcv_1_1CommandLineParser.html#gsc.tab=0). For this file the example parameters will look like
@code{.cpp}
--- a/modules/aruco/tutorials/aruco_calibration/images/img_00.jpg
+++ b/modules/aruco/tutorials/aruco_calibration/images/img_00.jpg
--- a/modules/aruco/tutorials/aruco_calibration/images/img_01.jpg
+++ b/modules/aruco/tutorials/aruco_calibration/images/img_01.jpg
--- a/modules/aruco/tutorials/aruco_calibration/images/img_02.jpg
+++ b/modules/aruco/tutorials/aruco_calibration/images/img_02.jpg
--- a/modules/aruco/tutorials/aruco_calibration/images/img_03.jpg
+++ b/modules/aruco/tutorials/aruco_calibration/images/img_03.jpg
--- a/modules/aruco/tutorials/aruco_detection/aruco_detection.markdown
+++ b/modules/aruco/tutorials/aruco_detection/aruco_detection.markdown
@ -1,6 +1,8 @@
 Detection of ArUco Markers {#tutorial_aruco_detection}
 ==============================

+@next_tutorial{tutorial_aruco_board_detection}
+
 Pose estimation is of great importance in many computer vision applications: robot navigation,
 augmented reality, and many more. This process is based on finding correspondences between points in
 the real environment and their 2d image projection. This is usually a difficult step, and thus it is
@ -102,12 +104,14 @@ The generated image is:

 ![Generated marker](images/marker23.png)

-A full working example is included in the `create_marker.cpp` inside the module samples folder.
+A full working example is included in the `create_marker.cpp` inside the `modules/aruco/samples/`.

-Note: The samples now take input from the command line using cv::CommandLineParser. For this file the example parameters will look like
+Note: The samples now take input from the command line using cv::CommandLineParser. For this file the example parameters will look like:
@code{.cpp}
 "marker23.png" -d=10 -id=23
@endcode
+Parameters for `create_marker.cpp`:
+@snippet samples/create_marker.cpp aruco_create_markers_keys

 Marker Detection
 ------
@ -231,13 +235,14 @@ while (inputVideo.grab()) {
 Note that some of the optional parameters have been omitted, like the detection parameter object and the
 output vector of rejected candidates.

-A full working example is included in the `detect_markers.cpp` inside the module samples folder.
+A full working example is included in the `detect_markers.cpp` inside the `modules/aruco/samples/`.

 Note: The samples now take input from the command line using cv::CommandLineParser. For this file the example parameters will look like
@code{.cpp}
-c="_path_/calib.txt" -d=10
+-v=/path_to_aruco_tutorials/aruco_detection/images/singlemarkersoriginal.jpg -d=10
@endcode
-
+Parameters for `detect_markers.cpp`:
+@snippet samples/detect_markers.cpp aruco_detect_markers_keys


 Pose Estimation
@ -267,7 +272,9 @@ The aruco module provides a function to estimate the poses of all the detected m

@code{.cpp}
 cv::Mat cameraMatrix, distCoeffs;
-...
+// You can read camera parameters from tutorial_camera_params.yml
+readCameraParameters(filename, cameraMatrix, distCoeffs); // This function is located in detect_markers.cpp
+
 std::vector<cv::Vec3d> rvecs, tvecs;
 cv::aruco::estimatePoseSingleMarkers(markerCorners, 0.05, cameraMatrix, distCoeffs, rvecs, tvecs);
@endcode
@ -308,8 +315,8 @@ cv::VideoCapture inputVideo;
 inputVideo.open(0);

 cv::Mat cameraMatrix, distCoeffs;
-// camera parameters are read from somewhere
-readCameraParameters(cameraMatrix, distCoeffs);
+// You can read camera parameters from tutorial_camera_params.yml
+readCameraParameters(filename, cameraMatrix, distCoeffs); // This function is located in detect_markers.cpp

 cv::Ptr<cv::aruco::Dictionary> dictionary = cv::aruco::getPredefinedDictionary(cv::aruco::DICT_6X6_250);

@ -346,12 +353,17 @@ Sample video:
 <iframe width="420" height="315" src="https://www.youtube.com/embed/IsXWrcB_Hvs" frameborder="0" allowfullscreen></iframe>
@endhtmlonly

-A full working example is included in the `detect_markers.cpp` inside the module samples folder.
+A full working example is included in the `detect_markers.cpp` inside the `modules/aruco/samples/`.

 Note: The samples now take input from the command line using cv::CommandLineParser. For this file the example parameters will look like
@code{.cpp}
-    -c="_path_/calib.txt" -d=10
+-v=/path_to_aruco_tutorials/aruco_detection/images/singlemarkersoriginal.jpg -d=10
+-c=/path_to_aruco_samples/tutorial_camera_params.yml
@endcode
+Parameters for `detect_markers.cpp`:
+@snippet samples/detect_markers.cpp aruco_detect_markers_keys
+@note To work with examples from the tutorial, you can use camera parameters from `tutorial_camera_params.yml`.
+An example of use in `detect.cpp`.



@ -766,4 +778,4 @@ too low, it can produce a poor subpixel refinement.
 Default values:

 - `int cornerRefinementMaxIterations = 30`
- `double cornerRefinementMinAccuracy = 0.1`
+- `double cornerRefinementMinAccuracy = 0.1`
--- a/modules/aruco/tutorials/aruco_faq/aruco_faq.markdown
+++ b/modules/aruco/tutorials/aruco_faq/aruco_faq.markdown
@ -1,6 +1,8 @@
 Aruco module FAQ {#tutorial_aruco_faq}
 ==============================

+@prev_tutorial{tutorial_aruco_calibration}
+
 This is a compilation of questions that can be useful for those that want to use the aruco module.

 - I only want to label some objects, what should I use?
--- a/modules/aruco/tutorials/charuco_detection/charuco_detection.markdown
+++ b/modules/aruco/tutorials/charuco_detection/charuco_detection.markdown
@ -1,6 +1,9 @@
-Detection of ChArUco Corners {#tutorial_charuco_detection}
+Detection of ChArUco Boards {#tutorial_charuco_detection}
 ==============================

+@prev_tutorial{tutorial_aruco_board_detection}
+@next_tutorial{tutorial_charuco_diamond_detection}
+
 ArUco markers and boards are very useful due to their fast detection and their versatility.
 However, one of the problems of ArUco markers is that the accuracy of their corner positions is not too high,
 even after applying subpixel refinement.
@ -87,11 +90,11 @@ The output image will be something like this:

 ![](images/charucoboard.jpg)

-A full working example is included in the ```create_board_charuco.cpp``` inside the modules/aruco/samples/create_board_charuco.cpp.
+A full working example is included in the `create_board_charuco.cpp` inside the `modules/aruco/samples/`.

 Note: The create_board_charuco.cpp now take input via commandline via the [OpenCV Commandline Parser](http://docs.opencv.org/trunk/d0/d2e/classcv_1_1CommandLineParser.html#gsc.tab=0). For this file the example parameters will look like
@code{.cpp}
-    "_ output path_/chboard.png" -w=5 -h=7 -sl=200 -ml=120 -d=10
+    "_output_path_/chboard.png" -w=5 -h=7 -sl=200 -ml=120 -d=10
@endcode


@ -180,15 +183,15 @@ This can be easily done using the ```drawDetectedCornersCharuco()``` function:

 For this image:

-![Image with Charuco board](images/choriginal.png)
+![Image with Charuco board](images/choriginal.jpg)

 The result will be:

-![Charuco board detected](images/chcorners.png)
+![Charuco board detected](images/chcorners.jpg)

 In the presence of occlusion. like in the following image, although some corners are clearly visible, not all their surrounding markers have been detected due occlusion and, thus, they are not interpolated:

-![Charuco detection with occlusion](images/chocclusion.png)
+![Charuco detection with occlusion](images/chocclusion.jpg)

 Finally, this is a full example of ChArUco detection (without using calibration parameters):

@ -200,15 +203,14 @@ Sample video:
 <iframe width="420" height="315" src="https://www.youtube.com/embed/Nj44m_N_9FY" frameborder="0" allowfullscreen></iframe>
@endhtmlonly

-A full working example is included in the ```detect_board_charuco.cpp``` inside the modules/aruco/samples/detect_board_charuco.cpp.
+A full working example is included in the `detect_board_charuco.cpp` inside the `modules/aruco/samples/`.

 Note: The samples now take input via commandline via the [OpenCV Commandline Parser](http://docs.opencv.org/trunk/d0/d2e/classcv_1_1CommandLineParser.html#gsc.tab=0). For this file the example parameters will look like
@code{.cpp}
-    -c="_path_/calib.txt" -dp="_path_/detector_params.yml" -w=5 -h=7 -sl=0.04 -ml=0.02 -d=10
+    -w=5 -h=7 -sl=0.04 -ml=0.02 -d=10
+    -v=/path_to_aruco_tutorials/charuco_detection/images/choriginal.jpg
@endcode

-Here the calib.txt is the output file generated by the calibrate_camera_charuco.cpp.
-
 ChArUco Pose Estimation
 ------

@ -231,15 +233,17 @@ not enough corners for pose estimation or they are in the same line.

 The axis can be drawn using ```drawAxis()``` to check the pose is correctly estimated. The result would be: (X:red, Y:green, Z:blue)

-![Charuco Board Axis](images/chaxis.png)
+![Charuco Board Axis](images/chaxis.jpg)

 A full example of ChArUco detection with pose estimation:

@snippet samples/tutorial_charuco_create_detect.cpp detwcp

-A full working example is included in the ```detect_board_charuco.cpp``` inside the modules/aruco/samples/detect_board_charuco.cpp.
+A full working example is included in the `detect_board_charuco.cpp` inside the `modules/aruco/samples/detect_board_charuco.cpp`.

 Note: The samples now take input via commandline via the [OpenCV Commandline Parser](http://docs.opencv.org/trunk/d0/d2e/classcv_1_1CommandLineParser.html#gsc.tab=0). For this file the example parameters will look like
@code{.cpp}
-    "_path_/calib.txt" -dp="_path_/detector_params.yml" -w=5 -h=7 -sl=0.04 -ml=0.02 -d=10
+    -w=5 -h=7 -sl=0.04 -ml=0.02 -d=10 -dp="_path_/detector_params.yml"
+    -v=/path_to_aruco_tutorials/charuco_detection/images/choriginal.jpg
+    -c=/path_to_aruco_samples/tutorial_camera_charuco.yml
@endcode
--- a/modules/aruco/tutorials/charuco_detection/images/chaxis.jpg
+++ b/modules/aruco/tutorials/charuco_detection/images/chaxis.jpg
--- a/modules/aruco/tutorials/charuco_detection/images/chaxis.png
+++ b/modules/aruco/tutorials/charuco_detection/images/chaxis.png
--- a/modules/aruco/tutorials/charuco_detection/images/chcorners.jpg
+++ b/modules/aruco/tutorials/charuco_detection/images/chcorners.jpg
--- a/modules/aruco/tutorials/charuco_detection/images/chcorners.png
+++ b/modules/aruco/tutorials/charuco_detection/images/chcorners.png
--- a/modules/aruco/tutorials/charuco_detection/images/chocclusion.jpg
+++ b/modules/aruco/tutorials/charuco_detection/images/chocclusion.jpg
--- a/modules/aruco/tutorials/charuco_detection/images/chocclusion.png
+++ b/modules/aruco/tutorials/charuco_detection/images/chocclusion.png
--- a/modules/aruco/tutorials/charuco_detection/images/chocclusion_original.jpg
+++ b/modules/aruco/tutorials/charuco_detection/images/chocclusion_original.jpg
--- a/modules/aruco/tutorials/charuco_detection/images/choriginal.jpg
+++ b/modules/aruco/tutorials/charuco_detection/images/choriginal.jpg
--- a/modules/aruco/tutorials/charuco_detection/images/choriginal.png
+++ b/modules/aruco/tutorials/charuco_detection/images/choriginal.png
--- a/modules/aruco/tutorials/charuco_diamond_detection/charuco_diamond_detection.markdown
+++ b/modules/aruco/tutorials/charuco_diamond_detection/charuco_diamond_detection.markdown
@ -1,6 +1,9 @@
 Detection of Diamond Markers {#tutorial_charuco_diamond_detection}
 ==============================

+@prev_tutorial{tutorial_charuco_detection}
+@next_tutorial{tutorial_aruco_calibration}
+
 A ChArUco diamond marker (or simply diamond marker) is a chessboard composed by 3x3 squares and 4 ArUco markers inside the white squares.
 It is similar to a ChArUco board in appearance, however they are conceptually different.

@ -58,7 +61,7 @@ The image produced will be:

 ![Diamond marker](images/diamondmarker.png)

-A full working example is included in the ```create_diamond.cpp``` inside the module samples folder.
+A full working example is included in the `create_diamond.cpp` inside the `modules/aruco/samples/`.

 Note: The samples now take input via commandline via the [OpenCV Commandline Parser](http://docs.opencv.org/trunk/d0/d2e/classcv_1_1CommandLineParser.html#gsc.tab=0). For this file the example parameters will look like
@code{.cpp}
@ -118,11 +121,13 @@ The result is the same that the one produced by ```drawDetectedMarkers()```, but

 ![Detected diamond markers](images/detecteddiamonds.png)

-A full working example is included in the ```detect_diamonds.cpp``` inside the module samples folder.
+A full working example is included in the `detect_diamonds.cpp` inside the `modules/aruco/samples/`.

 Note: The samples now take input via commandline via the [OpenCV Commandline Parser](http://docs.opencv.org/trunk/d0/d2e/classcv_1_1CommandLineParser.html#gsc.tab=0). For this file the example parameters will look like
@code{.cpp}
-    -c="_path_/calib.txt" -dp="_path_/detector_params.yml" -sl=0.04 -ml=0.02 -d=10
+    -dp="path_aruco/samples/detector_params.yml" -sl=0.04 -ml=0.012 -refine=3
+    -v="path_aruco/tutorials/charuco_diamond_detection/images/diamondmarkers.png"
+    -cd="path_aruco/samples/tutorial_dict.yml
@endcode

 ChArUco Diamond Pose Estimation
@ -166,9 +171,12 @@ Sample video:
 <iframe width="420" height="315" src="https://www.youtube.com/embed/OqKpBnglH7k" frameborder="0" allowfullscreen></iframe>
@endhtmlonly

-A full working example is included in the ```detect_diamonds.cpp``` inside the module samples folder.
+A full working example is included in the `detect_diamonds.cpp` inside the `modules/aruco/samples/`.

 Note: The samples now take input via commandline via the [OpenCV Commandline Parser](http://docs.opencv.org/trunk/d0/d2e/classcv_1_1CommandLineParser.html#gsc.tab=0). For this file the example parameters will look like
@code{.cpp}
-    -c="_output path_/calib.txt" -dp="_path_/detector_params.yml" -sl=0.04 -ml=0.02 -d=10
+    -dp="path_aruco/samples/detector_params.yml" -sl=0.04 -ml=0.012 -refine=3
+    -v="path_aruco/tutorials/charuco_diamond_detection/images/diamondmarkers.png"
+    -cd="path_aruco/samples/tutorial_dict.yml
+    -c="path_aruco/samples/tutorial_camera_params.yml"
@endcode
--- a/modules/barcode/src/barcode.cpp
+++ b/modules/barcode/src/barcode.cpp
@ -255,7 +255,7 @@ BarcodeDetector::detectAndDecode(InputArray img, vector<std::string> &decoded_in
        return false;
    }
    vector<Point2f> points;
-    bool ok = this->detect(img, points);
+    bool ok = this->detect(inarr, points);
    if (!ok)
    {
        points_.release();
--- a/modules/cudaarithm/include/opencv2/cudaarithm.hpp
+++ b/modules/cudaarithm/include/opencv2/cudaarithm.hpp
@ -662,11 +662,11 @@ CV_EXPORTS_W void countNonZero(InputArray src, OutputArray dst, Stream& stream =
@param dim Dimension index along which the matrix is reduced. 0 means that the matrix is reduced
 to a single row. 1 means that the matrix is reduced to a single column.
@param reduceOp Reduction operation that could be one of the following:
-   **CV_REDUCE_SUM** The output is the sum of all rows/columns of the matrix.
-   **CV_REDUCE_AVG** The output is the mean vector of all rows/columns of the matrix.
-   **CV_REDUCE_MAX** The output is the maximum (column/row-wise) of all rows/columns of the
+-   **REDUCE_SUM** The output is the sum of all rows/columns of the matrix.
+-   **REDUCE_AVG** The output is the mean vector of all rows/columns of the matrix.
+-   **REDUCE_MAX** The output is the maximum (column/row-wise) of all rows/columns of the
 matrix.
-   **CV_REDUCE_MIN** The output is the minimum (column/row-wise) of all rows/columns of the
+-   **REDUCE_MIN** The output is the minimum (column/row-wise) of all rows/columns of the
 matrix.
@param dtype When it is negative, the destination vector will have the same type as the source
 matrix. Otherwise, its type will be CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), mtx.channels()) .
@ -675,7 +675,7 @@ matrix. Otherwise, its type will be CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), mtx.channe
 The function reduce reduces the matrix to a vector by treating the matrix rows/columns as a set of
 1D vectors and performing the specified operation on the vectors until a single row/column is
 obtained. For example, the function can be used to compute horizontal and vertical projections of a
-raster image. In case of CV_REDUCE_SUM and CV_REDUCE_AVG , the output may have a larger element
+raster image. In case of REDUCE_SUM and REDUCE_AVG , the output may have a larger element
 bit-depth to preserve accuracy. And multi-channel arrays are also supported in these two reduction
 modes.

--- a/modules/cudacodec/include/opencv2/cudacodec.hpp
+++ b/modules/cudacodec/include/opencv2/cudacodec.hpp
@ -231,7 +231,7 @@ CV_EXPORTS_W Ptr<cudacodec::VideoWriter> createVideoWriter(const Ptr<EncoderCall
 want to work with raw video stream.
@param frameSize Size of the input video frames.
@param fps Framerate of the created video stream.
-@param params Encoder parameters. See cudacodec::EncoderParams .
+@param params Encoder parameters. See cudacodec::EncoderParams.
@param format Surface format of input frames ( SF_UYVY , SF_YUY2 , SF_YV12 , SF_NV12 ,
 SF_IYUV , SF_BGR or SF_GRAY). BGR or gray frames will be converted to YV12 format before
 encoding, frames with other formats will be used as is.
@ -265,7 +265,7 @@ enum Codec
    Uncompressed_UYVY   = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y'))    //!< UYVY (4:2:2)
 };

-/** @brief Chroma formats supported by cudacodec::VideoReader .
+/** @brief Chroma formats supported by cudacodec::VideoReader.
 */
 enum ChromaFormat
 {
@ -276,6 +276,18 @@ enum ChromaFormat
    NumFormats
 };

+/** @brief Deinterlacing mode used by decoder.
+* @param Weave Weave both fields (no deinterlacing). For progressive content and for content that doesn't need deinterlacing.
+* Bob Drop one field.
+* @param Adaptive Adaptive deinterlacing needs more video memory than other deinterlacing modes.
+* */
+enum DeinterlaceMode
+{
+    Weave = 0,
+    Bob = 1,
+    Adaptive = 2
+};
+
 /** @brief Struct providing information about video file format. :
 */
 struct FormatInfo
@ -283,10 +295,31 @@ struct FormatInfo
    Codec codec;
    ChromaFormat chromaFormat;
    int nBitDepthMinus8 = -1;
-    int width = 0;//!< Width of the decoded frame returned by nextFrame(frame)
-    int height = 0;//!< Height of the decoded frame returned by nextFrame(frame)
+    int ulWidth = 0;//!< Coded sequence width in pixels.
+    int ulHeight = 0;//!< Coded sequence height in pixels.
+    int width = 0;//!< Width of the decoded frame returned by nextFrame(frame).
+    int height = 0;//!< Height of the decoded frame returned by nextFrame(frame).
+    int ulMaxWidth = 0;
+    int ulMaxHeight = 0;
    Rect displayArea;//!< ROI inside the decoded frame returned by nextFrame(frame), containing the useable video frame.
    bool valid = false;
+    double fps = 0;
+    int ulNumDecodeSurfaces = 0;//!< Maximum number of internal decode surfaces.
+    DeinterlaceMode deinterlaceMode;
+};
+
+/** @brief cv::cudacodec::VideoReader generic properties identifier.
+*/
+enum class VideoReaderProps {
+    PROP_DECODED_FRAME_IDX = 0, //!< Index for retrieving the decoded frame using retrieve().
+    PROP_EXTRA_DATA_INDEX = 1, //!< Index for retrieving the extra data associated with a video source using retrieve().
+    PROP_RAW_PACKAGES_BASE_INDEX = 2, //!< Base index for retrieving raw encoded data using retrieve().
+    PROP_NUMBER_OF_RAW_PACKAGES_SINCE_LAST_GRAB = 3, //!< Number of raw packages recieved since the last call to grab().
+    PROP_RAW_MODE = 4, //!< Status of raw mode.
+    PROP_LRF_HAS_KEY_FRAME = 5, //!< FFmpeg source only - Indicates whether the Last Raw Frame (LRF), output from VideoReader::retrieve() when VideoReader is initialized in raw mode, contains encoded data for a key frame.
+#ifndef CV_DOXYGEN
+    PROP_NOT_SUPPORTED
+#endif
 };

 /** @brief Video reader interface.
@ -310,6 +343,62 @@ public:
    /** @brief Returns information about video file format.
    */
    virtual FormatInfo format() const = 0;
+
+    /** @brief Grabs the next frame from the video source.
+
+    @return `true` (non-zero) in the case of success.
+
+    The method/function grabs the next frame from video file or camera and returns true (non-zero) in
+    the case of success.
+
+    The primary use of the function is for reading both the encoded and decoded video data when rawMode is enabled.  With rawMode enabled
+    retrieve() can be called following grab() to retrieve all the data associated with the current video source since the last call to grab() or the creation of the VideoReader.
+     */
+    CV_WRAP virtual bool grab(Stream& stream = Stream::Null()) = 0;
+
+    /** @brief Returns previously grabbed video data.
+
+    @param [out] frame The returned data which depends on the provided idx.  If there is no new data since the last call to grab() the image will be empty.
+    @param idx Determins the returned data inside image. The returned data can be the:
+    Decoded frame, idx = get(PROP_DECODED_FRAME_IDX).
+    Extra data if available, idx = get(PROP_EXTRA_DATA_INDEX).
+    Raw encoded data package.  To retrieve package i,  idx = get(PROP_RAW_PACKAGES_BASE_INDEX) + i with i < get(PROP_NUMBER_OF_RAW_PACKAGES_SINCE_LAST_GRAB)
+    @return `false` if no frames has been grabbed
+
+    The method returns data associated with the current video source since the last call to grab() or the creation of the VideoReader. If no data is present
+    the method returns false and the function returns an empty image.
+     */
+    CV_WRAP virtual bool retrieve(CV_OUT OutputArray frame, const size_t idx = static_cast<size_t>(VideoReaderProps::PROP_DECODED_FRAME_IDX)) const = 0;
+
+    /** @brief Sets a property in the VideoReader.
+
+    @param propertyId Property identifier from cv::cudacodec::VideoReaderProps (eg. cv::cudacodec::VideoReaderProps::PROP_DECODED_FRAME_IDX,
+    cv::cudacodec::VideoReaderProps::PROP_EXTRA_DATA_INDEX, ...).
+    @param propertyVal Value of the property.
+    @return `true` if the property has been set.
+     */
+    CV_WRAP virtual bool set(const VideoReaderProps propertyId, const double propertyVal) = 0;
+
+    /** @brief Returns the specified VideoReader property
+
+    @param propertyId Property identifier from cv::cudacodec::VideoReaderProps (eg. cv::cudacodec::VideoReaderProps::PROP_DECODED_FRAME_IDX,
+    cv::cudacodec::VideoReaderProps::PROP_EXTRA_DATA_INDEX, ...).
+    @param propertyVal
+    In - Optional value required for querying specific propertyId's, e.g. the index of the raw package to be checked for a key frame (cv::cudacodec::VideoReaderProps::PROP_LRF_HAS_KEY_FRAME).
+    Out - Value of the property.
+    @return `true` unless the property is not supported.
+    */
+    CV_WRAP virtual bool get(const VideoReaderProps propertyId, CV_IN_OUT double& propertyVal) const = 0;
+
+    /** @brief Retrieves the specified property used by the VideoSource.
+
+    @param propertyId Property identifier from cv::VideoCaptureProperties (eg. cv::CAP_PROP_POS_MSEC, cv::CAP_PROP_POS_FRAMES, ...)
+    or one from @ref videoio_flags_others.
+    @param propertyVal Value for the specified property.
+
+    @return `true` unless the property is unset set or not supported.
+     */
+    CV_WRAP virtual bool get(const int propertyId, CV_OUT double& propertyVal) const = 0;
 };

 /** @brief Interface for video demultiplexing. :
@ -328,26 +417,53 @@ public:
     */
    virtual bool getNextPacket(unsigned char** data, size_t* size) = 0;

+    /** @brief Returns true if the last packet contained a key frame.
+     */
+    virtual bool lastPacketContainsKeyFrame() const { return false; }
+
    /** @brief Returns information about video file format.
    */
    virtual FormatInfo format() const = 0;

    /** @brief Updates the coded width and height inside format.
    */
-    virtual void updateFormat(const int codedWidth, const int codedHeight) = 0;
+    virtual void updateFormat(const FormatInfo& videoFormat) = 0;
+
+    /** @brief Returns any extra data associated with the video source.
+
+    @param extraData 1D cv::Mat containing the extra data if it exists.
+     */
+    virtual void getExtraData(cv::Mat& extraData) const = 0;
+
+    /** @brief Retrieves the specified property used by the VideoSource.
+
+    @param propertyId Property identifier from cv::VideoCaptureProperties (eg. cv::CAP_PROP_POS_MSEC, cv::CAP_PROP_POS_FRAMES, ...)
+    or one from @ref videoio_flags_others.
+    @param propertyVal Value for the specified property.
+
+    @return `true` unless the property is unset set or not supported.
+     */
+    virtual bool get(const int propertyId, double& propertyVal) const = 0;
 };

 /** @brief Creates video reader.

@param filename Name of the input video file.
+@param params Pass through parameters for VideoCapure.  VideoCapture with the FFMpeg back end (CAP_FFMPEG) is used to parse the video input.
+The `params` parameter allows to specify extra parameters encoded as pairs `(paramId_1, paramValue_1, paramId_2, paramValue_2, ...)`.
+    See cv::VideoCaptureProperties
+e.g. when streaming from an RTSP source CAP_PROP_OPEN_TIMEOUT_MSEC may need to be set.
+@param rawMode Allow the raw encoded data which has been read up until the last call to grab() to be retrieved by calling retrieve(rawData,RAW_DATA_IDX).

 FFMPEG is used to read videos. User can implement own demultiplexing with cudacodec::RawVideoSource
 */
-CV_EXPORTS_W Ptr<VideoReader> createVideoReader(const String& filename);
+CV_EXPORTS_W Ptr<VideoReader> createVideoReader(const String& filename, const std::vector<int>& params = {}, const bool rawMode = false);
+
 /** @overload
@param source RAW video source implemented by user.
+@param rawMode Allow the raw encoded data which has been read up until the last call to grab() to be retrieved by calling retrieve(rawData,RAW_DATA_IDX).
 */
-CV_EXPORTS_W Ptr<VideoReader> createVideoReader(const Ptr<RawVideoSource>& source);
+CV_EXPORTS_W Ptr<VideoReader> createVideoReader(const Ptr<RawVideoSource>& source, const bool rawMode = false);

 //! @}

--- a/modules/cudacodec/src/cuvid_video_source.cpp
+++ b/modules/cudacodec/src/cuvid_video_source.cpp
@ -76,6 +76,8 @@ cv::cudacodec::detail::CuvidVideoSource::CuvidVideoSource(const String& fname)
    format_.height = vidfmt.coded_height;
    format_.displayArea = Rect(Point(vidfmt.display_area.left, vidfmt.display_area.top), Point(vidfmt.display_area.right, vidfmt.display_area.bottom));
    format_.valid = true;
+    if (vidfmt.frame_rate.numerator != 0 && vidfmt.frame_rate.denominator != 0)
+        format_.fps = vidfmt.frame_rate.numerator / (double)vidfmt.frame_rate.denominator;
 }

 cv::cudacodec::detail::CuvidVideoSource::~CuvidVideoSource()
@ -88,10 +90,9 @@ FormatInfo cv::cudacodec::detail::CuvidVideoSource::format() const
    return format_;
 }

-void cv::cudacodec::detail::CuvidVideoSource::updateFormat(const int codedWidth, const int codedHeight)
+void cv::cudacodec::detail::CuvidVideoSource::updateFormat(const FormatInfo& videoFormat)
 {
-    format_.width = codedWidth;
-    format_.height = codedHeight;
+    format_ = videoFormat;
    format_.valid = true;
 }

@ -119,7 +120,7 @@ int CUDAAPI cv::cudacodec::detail::CuvidVideoSource::HandleVideoData(void* userD
 {
    CuvidVideoSource* thiz = static_cast<CuvidVideoSource*>(userData);

-    return thiz->parseVideoData(packet->payload, packet->payload_size, (packet->flags & CUVID_PKT_ENDOFSTREAM) != 0);
+    return thiz->parseVideoData(packet->payload, packet->payload_size, thiz->RawModeEnabled(), false, (packet->flags & CUVID_PKT_ENDOFSTREAM) != 0);
 }

 #endif // HAVE_NVCUVID
--- a/modules/cudacodec/src/cuvid_video_source.hpp
+++ b/modules/cudacodec/src/cuvid_video_source.hpp
@ -55,7 +55,7 @@ public:
    ~CuvidVideoSource();

    FormatInfo format() const CV_OVERRIDE;
-    void updateFormat(const int codedWidth, const int codedHeight);
+    void updateFormat(const FormatInfo& videoFormat) CV_OVERRIDE;
    void start() CV_OVERRIDE;
    void stop() CV_OVERRIDE;
    bool isStarted() const CV_OVERRIDE;
--- a/modules/cudacodec/src/ffmpeg_video_source.cpp
+++ b/modules/cudacodec/src/ffmpeg_video_source.cpp
@ -75,6 +75,7 @@ Codec FourccToCodec(int codec)
    case CV_FOURCC_MACRO('M', 'P', 'G', '1'): return MPEG1;
    case CV_FOURCC_MACRO('M', 'P', 'G', '2'): return MPEG2;
    case CV_FOURCC_MACRO('X', 'V', 'I', 'D'): // fallthru
+    case CV_FOURCC_MACRO('m', 'p', '4', 'v'): // fallthru
    case CV_FOURCC_MACRO('D', 'I', 'V', 'X'): return MPEG4;
    case CV_FOURCC_MACRO('W', 'V', 'C', '1'): return VC1;
    case CV_FOURCC_MACRO('H', '2', '6', '4'): // fallthru
@ -112,12 +113,30 @@ void FourccToChromaFormat(const int pixelFormat, ChromaFormat &chromaFormat, int
    }
 }

-cv::cudacodec::detail::FFmpegVideoSource::FFmpegVideoSource(const String& fname)
+static
+int StartCodeLen(unsigned char* data, const int sz) {
+    if (sz >= 3 && data[0] == 0 && data[1] == 0 && data[2] == 1)
+        return 3;
+    else if (sz >= 4 && data[0] == 0 && data[1] == 0 && data[2] == 0 && data[3] == 1)
+        return 4;
+    else
+        return 0;
+}
+
+bool ParamSetsExist(unsigned char* parameterSets, const int szParameterSets, unsigned char* data, const int szData) {
+    const int paramSetStartCodeLen = StartCodeLen(parameterSets, szParameterSets);
+    const int packetStartCodeLen = StartCodeLen(data, szData);
+    // weak test to see if the parameter set has already been included in the RTP stream
+    return paramSetStartCodeLen != 0 && packetStartCodeLen != 0 && parameterSets[paramSetStartCodeLen] == data[packetStartCodeLen];
+}
+
+cv::cudacodec::detail::FFmpegVideoSource::FFmpegVideoSource(const String& fname, const std::vector<int>& _videoCaptureParams)
+    : videoCaptureParams(_videoCaptureParams)
 {
    if (!videoio_registry::hasBackend(CAP_FFMPEG))
        CV_Error(Error::StsNotImplemented, "FFmpeg backend not found");

-    cap.open(fname, CAP_FFMPEG);
+    cap.open(fname, CAP_FFMPEG, videoCaptureParams);
    if (!cap.isOpened())
        CV_Error(Error::StsUnsupportedFormat, "Unsupported video source");

@ -125,6 +144,11 @@ cv::cudacodec::detail::FFmpegVideoSource::FFmpegVideoSource(const String& fname)
        CV_Error(Error::StsUnsupportedFormat, "Fetching of RAW video streams is not supported");
    CV_Assert(cap.get(CAP_PROP_FORMAT) == -1);

+    const int codecExtradataIndex = static_cast<int>(cap.get(CAP_PROP_CODEC_EXTRADATA_INDEX));
+    Mat tmpExtraData;
+    if (cap.retrieve(tmpExtraData, codecExtradataIndex) && tmpExtraData.total())
+        extraData = tmpExtraData.clone();
+
    int codec = (int)cap.get(CAP_PROP_FOURCC);
    int pixelFormat = (int)cap.get(CAP_PROP_CODEC_PIXEL_FORMAT);

@ -133,6 +157,7 @@ cv::cudacodec::detail::FFmpegVideoSource::FFmpegVideoSource(const String& fname)
    format_.width = cap.get(CAP_PROP_FRAME_WIDTH);
    format_.displayArea = Rect(0, 0, format_.width, format_.height);
    format_.valid = false;
+    format_.fps = cap.get(CAP_PROP_FPS);
    FourccToChromaFormat(pixelFormat, format_.chromaFormat, format_.nBitDepthMinus8);
 }

@ -147,19 +172,49 @@ FormatInfo cv::cudacodec::detail::FFmpegVideoSource::format() const
    return format_;
 }

-void cv::cudacodec::detail::FFmpegVideoSource::updateFormat(const int codedWidth, const int codedHeight)
+void cv::cudacodec::detail::FFmpegVideoSource::updateFormat(const FormatInfo& videoFormat)
 {
-    format_.width = codedWidth;
-    format_.height = codedHeight;
+    format_ = videoFormat;
    format_.valid = true;
 }

+bool cv::cudacodec::detail::FFmpegVideoSource::get(const int propertyId, double& propertyVal) const
+{
+    CV_Assert(videoCaptureParams.size() % 2 == 0);
+    for (std::size_t i = 0; i < videoCaptureParams.size(); i += 2) {
+        if (videoCaptureParams.at(i) == propertyId) {
+            propertyVal = videoCaptureParams.at(i + 1);
+            return true;
+        }
+    }
+    return false;
+}
+
 bool cv::cudacodec::detail::FFmpegVideoSource::getNextPacket(unsigned char** data, size_t* size)
 {
    cap >> rawFrame;
    *data = rawFrame.data;
    *size = rawFrame.total();
+    if (iFrame++ == 0 && extraData.total()) {
+        if (format_.codec == Codec::MPEG4 ||
+            ((format_.codec == Codec::H264 || format_.codec == Codec::HEVC) && !ParamSetsExist(extraData.data, extraData.total(), *data, *size)))
+        {
+            const size_t nBytesToTrimFromData = format_.codec == Codec::MPEG4 ? 3 : 0;
+            const size_t newSz = extraData.total() + *size - nBytesToTrimFromData;
+            dataWithHeader = Mat(1, newSz, CV_8UC1);
+            memcpy(dataWithHeader.data, extraData.data, extraData.total());
+            memcpy(dataWithHeader.data + extraData.total(), (*data) + nBytesToTrimFromData, *size - nBytesToTrimFromData);
+            *data = dataWithHeader.data;
+            *size = newSz;
+        }
+    }
+
    return *size != 0;
 }

+bool cv::cudacodec::detail::FFmpegVideoSource::lastPacketContainsKeyFrame() const
+{
+    return cap.get(CAP_PROP_LRF_HAS_KEY_FRAME);
+}
+
 #endif // HAVE_CUDA
--- a/modules/cudacodec/src/ffmpeg_video_source.hpp
+++ b/modules/cudacodec/src/ffmpeg_video_source.hpp
@ -51,20 +51,27 @@ namespace cv { namespace cudacodec { namespace detail {
 class FFmpegVideoSource : public RawVideoSource
 {
 public:
-    FFmpegVideoSource(const String& fname);
+    FFmpegVideoSource(const String& fname, const std::vector<int>& params);
    ~FFmpegVideoSource();

    bool getNextPacket(unsigned char** data, size_t* size) CV_OVERRIDE;

+    bool lastPacketContainsKeyFrame() const;
+
    FormatInfo format() const CV_OVERRIDE;

-    void updateFormat(const int codedWidth, const int codedHeight);
+    void updateFormat(const FormatInfo& videoFormat) CV_OVERRIDE;
+
+    void getExtraData(cv::Mat& _extraData) const CV_OVERRIDE { _extraData = extraData; }

+    bool get(const int propertyId, double& propertyVal) const;

 private:
    FormatInfo format_;
    VideoCapture cap;
-    Mat rawFrame;
+    Mat rawFrame, extraData, dataWithHeader;
+    int iFrame = 0;
+    std::vector<int> videoCaptureParams;
 };

 }}}
--- a/modules/cudacodec/src/frame_queue.cpp
+++ b/modules/cudacodec/src/frame_queue.cpp
@ -45,13 +45,22 @@

 #ifdef HAVE_NVCUVID

-cv::cudacodec::detail::FrameQueue::FrameQueue() :
-    endOfDecode_(0),
-    framesInQueue_(0),
-    readPosition_(0)
-{
-    std::memset(displayQueue_, 0, sizeof(displayQueue_));
-    std::memset((void*) isFrameInUse_, 0, sizeof(isFrameInUse_));
+RawPacket::RawPacket(const unsigned char* _data, const size_t _size, const bool _containsKeyFrame) : size(_size), containsKeyFrame(_containsKeyFrame) {
+    data = cv::makePtr<unsigned char*>(new unsigned char[size]);
+    memcpy(*data, _data, size);
+};
+
+cv::cudacodec::detail::FrameQueue::~FrameQueue() {
+    if (isFrameInUse_)
+        delete[] isFrameInUse_;
+}
+
+void cv::cudacodec::detail::FrameQueue::init(const int _maxSz) {
+    AutoLock autoLock(mtx_);
+    maxSz = _maxSz;
+    displayQueue_ = std::vector<CUVIDPARSERDISPINFO>(maxSz, CUVIDPARSERDISPINFO());
+    isFrameInUse_ = new volatile int[maxSz];
+    std::memset((void*)isFrameInUse_, 0, sizeof(*isFrameInUse_) * maxSz);
 }

 bool cv::cudacodec::detail::FrameQueue::waitUntilFrameAvailable(int pictureIndex)
@ -68,7 +77,7 @@ bool cv::cudacodec::detail::FrameQueue::waitUntilFrameAvailable(int pictureIndex
    return true;
 }

-void cv::cudacodec::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picParams)
+void cv::cudacodec::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picParams, const std::vector<RawPacket> rawPackets)
 {
    // Mark the frame as 'in-use' so we don't re-use it for decoding until it is no longer needed
    // for display
@ -82,10 +91,12 @@ void cv::cudacodec::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picPa
        {
            AutoLock autoLock(mtx_);

-            if (framesInQueue_ < MaximumSize)
+            if (framesInQueue_ < maxSz)
            {
-                int writePosition = (readPosition_ + framesInQueue_) % MaximumSize;
-                displayQueue_[writePosition] = *picParams;
+                const int writePosition = (readPosition_ + framesInQueue_) % maxSz;
+                displayQueue_.at(writePosition) = *picParams;
+                for (const auto& rawPacket : rawPackets)
+                    rawPacketQueue.push(rawPacket);
                framesInQueue_++;
                isFramePlaced = true;
            }
@ -99,15 +110,19 @@ void cv::cudacodec::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picPa
    } while (!isEndOfDecode());
 }

-bool cv::cudacodec::detail::FrameQueue::dequeue(CUVIDPARSERDISPINFO& displayInfo)
+bool cv::cudacodec::detail::FrameQueue::dequeue(CUVIDPARSERDISPINFO& displayInfo, std::vector<RawPacket>& rawPackets)
 {
    AutoLock autoLock(mtx_);

    if (framesInQueue_ > 0)
    {
        int entry = readPosition_;
-        displayInfo = displayQueue_[entry];
-        readPosition_ = (entry + 1) % MaximumSize;
+        displayInfo = displayQueue_.at(entry);
+        while (!rawPacketQueue.empty()) {
+            rawPackets.push_back(rawPacketQueue.front());
+            rawPacketQueue.pop();
+        }
+        readPosition_ = (entry + 1) % maxSz;
        framesInQueue_--;
        return true;
    }
--- a/modules/cudacodec/src/frame_queue.hpp
+++ b/modules/cudacodec/src/frame_queue.hpp
@ -43,17 +43,27 @@

 #ifndef __FRAME_QUEUE_HPP__
 #define __FRAME_QUEUE_HPP__
+#include <queue>

 #include "opencv2/core/utility.hpp"

+class RawPacket {
+public:
+    RawPacket(const unsigned char* _data, const size_t _size = 0, const bool _containsKeyFrame = false);
+    unsigned char* Data() const { return *data; }
+    size_t size;
+    bool containsKeyFrame;
+private:
+    cv::Ptr<unsigned char*> data = 0;
+};
+
 namespace cv { namespace cudacodec { namespace detail {

 class FrameQueue
 {
 public:
-    static const int MaximumSize = 20; // MAX_FRM_CNT;
-
-    FrameQueue();
+    ~FrameQueue();
+    void init(const int _maxSz);

    void endDecode() { endOfDecode_ = true; }
    bool isEndOfDecode() const { return endOfDecode_ != 0;}
@ -64,7 +74,7 @@ public:
    // available, the method returns false.
    bool waitUntilFrameAvailable(int pictureIndex);

-    void enqueue(const CUVIDPARSERDISPINFO* picParams);
+    void enqueue(const CUVIDPARSERDISPINFO* picParams, const std::vector<RawPacket> rawPackets);

    // Deque the next frame.
    // Parameters:
@ -72,7 +82,7 @@ public:
    // Returns:
    //      true, if a new frame was returned,
    //      false, if the queue was empty and no new frame could be returned.
-    bool dequeue(CUVIDPARSERDISPINFO& displayInfo);
+    bool dequeue(CUVIDPARSERDISPINFO& displayInfo, std::vector<RawPacket>& rawPackets);

    void releaseFrame(const CUVIDPARSERDISPINFO& picParams) { isFrameInUse_[picParams.picture_index] = false; }

@ -80,13 +90,13 @@ private:
    bool isInUse(int pictureIndex) const { return isFrameInUse_[pictureIndex] != 0; }

    Mutex mtx_;
-
-    volatile int isFrameInUse_[MaximumSize];
-    volatile int endOfDecode_;
-
-    int framesInQueue_;
-    int readPosition_;
-    CUVIDPARSERDISPINFO displayQueue_[MaximumSize];
+    volatile int* isFrameInUse_ = 0;
+    volatile int endOfDecode_ = 0;
+    int framesInQueue_ = 0;
+    int readPosition_ = 0;
+    std::vector< CUVIDPARSERDISPINFO> displayQueue_;
+    int maxSz = 0;
+    std::queue<RawPacket> rawPacketQueue;
 };

 }}}
--- a/modules/cudacodec/src/video_decoder.cpp
+++ b/modules/cudacodec/src/video_decoder.cpp
@ -47,13 +47,13 @@

 void cv::cudacodec::detail::VideoDecoder::create(const FormatInfo& videoFormat)
 {
-    if (videoFormat.nBitDepthMinus8 > 0 || videoFormat.chromaFormat != YUV420)
-        CV_Error(Error::StsUnsupportedFormat, "NV12 output requires 8 bit YUV420");
+    if (videoFormat.nBitDepthMinus8 > 0 || videoFormat.chromaFormat == YUV444)
+        CV_Error(Error::StsUnsupportedFormat, "NV12 output currently supported for 8 bit YUV420, YUV422 and Monochrome inputs.");

-    cudaVideoCodec _codec = static_cast<cudaVideoCodec>(videoFormat.codec);
-    cudaVideoChromaFormat _chromaFormat = static_cast<cudaVideoChromaFormat>(videoFormat.chromaFormat);
-
-    cudaVideoCreateFlags videoCreateFlags = (_codec == cudaVideoCodec_JPEG || _codec == cudaVideoCodec_MPEG2) ?
+    videoFormat_ = videoFormat;
+    const cudaVideoCodec _codec = static_cast<cudaVideoCodec>(videoFormat.codec);
+    const cudaVideoChromaFormat _chromaFormat = static_cast<cudaVideoChromaFormat>(videoFormat.chromaFormat);
+    const cudaVideoCreateFlags videoCreateFlags = (_codec == cudaVideoCodec_JPEG || _codec == cudaVideoCodec_MPEG2) ?
                                            cudaVideoCreate_PreferCUDA :
                                            cudaVideoCreate_PreferCUVID;

@ -101,35 +101,29 @@ void cv::cudacodec::detail::VideoDecoder::create(const FormatInfo& videoFormat)
    if (!decodeCaps.bIsSupported)
        CV_Error(Error::StsUnsupportedFormat, "Video source is not supported by hardware video decoder");

-    CV_Assert(videoFormat.width >= decodeCaps.nMinWidth &&
-        videoFormat.height >= decodeCaps.nMinHeight &&
-        videoFormat.width <= decodeCaps.nMaxWidth &&
-        videoFormat.height <= decodeCaps.nMaxHeight);
+    CV_Assert(videoFormat.ulWidth >= decodeCaps.nMinWidth &&
+        videoFormat.ulHeight >= decodeCaps.nMinHeight &&
+        videoFormat.ulWidth <= decodeCaps.nMaxWidth &&
+        videoFormat.ulHeight <= decodeCaps.nMaxHeight);

    CV_Assert((videoFormat.width >> 4)* (videoFormat.height >> 4) <= decodeCaps.nMaxMBCount);
 #endif
-
-    // Fill the decoder-create-info struct from the given video-format struct.
-    std::memset(&createInfo_, 0, sizeof(CUVIDDECODECREATEINFO));
-
    // Create video decoder
+    CUVIDDECODECREATEINFO createInfo_ = {};
    createInfo_.CodecType           = _codec;
-    createInfo_.ulWidth             = videoFormat.width;
-    createInfo_.ulHeight            = videoFormat.height;
-    createInfo_.ulNumDecodeSurfaces = FrameQueue::MaximumSize;
+    createInfo_.ulWidth             = videoFormat.ulWidth;
+    createInfo_.ulHeight            = videoFormat.ulHeight;
+    createInfo_.ulNumDecodeSurfaces = videoFormat.ulNumDecodeSurfaces;
    createInfo_.ChromaFormat    = _chromaFormat;
    createInfo_.OutputFormat    = cudaVideoSurfaceFormat_NV12;
-    createInfo_.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;
-
-    // No scaling
-    static const int MAX_FRAME_COUNT = 2;
-
-    createInfo_.ulTargetWidth       = createInfo_.ulWidth;
-    createInfo_.ulTargetHeight      = createInfo_.ulHeight;
-    createInfo_.ulNumOutputSurfaces = MAX_FRAME_COUNT;  // We won't simultaneously map more than 8 surfaces
+    createInfo_.DeinterlaceMode = static_cast<cudaVideoDeinterlaceMode>(videoFormat.deinterlaceMode);
+    createInfo_.ulTargetWidth       = videoFormat.width;
+    createInfo_.ulTargetHeight      = videoFormat.height;
+    createInfo_.ulMaxWidth          = videoFormat.ulMaxWidth;
+    createInfo_.ulMaxHeight         = videoFormat.ulMaxHeight;
+    createInfo_.ulNumOutputSurfaces = 2;
    createInfo_.ulCreationFlags     = videoCreateFlags;
    createInfo_.vidLock = lock_;
-
    cuSafeCall(cuCtxPushCurrent(ctx_));
    cuSafeCall(cuvidCreateDecoder(&decoder_, &createInfo_));
    cuSafeCall(cuCtxPopCurrent(NULL));
--- a/modules/cudacodec/src/video_decoder.hpp
+++ b/modules/cudacodec/src/video_decoder.hpp
@ -49,9 +49,9 @@ namespace cv { namespace cudacodec { namespace detail {
 class VideoDecoder
 {
 public:
-    VideoDecoder(const FormatInfo& videoFormat, CUcontext ctx, CUvideoctxlock lock) : ctx_(ctx), lock_(lock), decoder_(0)
+    VideoDecoder(const Codec& codec, CUcontext ctx, CUvideoctxlock lock) : ctx_(ctx), lock_(lock), decoder_(0)
    {
-        create(videoFormat);
+        videoFormat_.codec = codec;
    }

    ~VideoDecoder()
@ -63,17 +63,18 @@ public:
    void release();

    // Get the code-type currently used.
-    cudaVideoCodec codec() const { return createInfo_.CodecType; }
-    unsigned long maxDecodeSurfaces() const { return createInfo_.ulNumDecodeSurfaces; }
+    cudaVideoCodec codec() const { return static_cast<cudaVideoCodec>(videoFormat_.codec); }
+    unsigned long maxDecodeSurfaces() const { return videoFormat_.ulNumDecodeSurfaces; }

-    unsigned long frameWidth() const { return createInfo_.ulWidth; }
-    unsigned long frameHeight() const { return createInfo_.ulHeight; }
+    unsigned long frameWidth() const { return videoFormat_.ulWidth; }
+    unsigned long frameHeight() const { return videoFormat_.ulHeight; }
+    FormatInfo format() { AutoLock autoLock(mtx_); return videoFormat_;}

-    unsigned long targetWidth() const { return createInfo_.ulTargetWidth; }
-    unsigned long targetHeight() const { return createInfo_.ulTargetHeight; }
+    unsigned long targetWidth() { return videoFormat_.width; }
+    unsigned long targetHeight() { return videoFormat_.height; }

-    cudaVideoChromaFormat chromaFormat() const { return createInfo_.ChromaFormat; }
-    int nBitDepthMinus8() const { return createInfo_.bitDepthMinus8; }
+    cudaVideoChromaFormat chromaFormat() const { return static_cast<cudaVideoChromaFormat>(videoFormat_.chromaFormat); }
+    int nBitDepthMinus8() const { return videoFormat_.nBitDepthMinus8; }

    bool decodePicture(CUVIDPICPARAMS* picParams)
    {
@ -87,8 +88,7 @@ public:

        cuSafeCall( cuvidMapVideoFrame(decoder_, picIdx, &ptr, &pitch, &videoProcParams) );

-
-        return cuda::GpuMat(targetHeight() * 3 / 2, targetWidth(), CV_8UC1, (void*) ptr, pitch);
+        return cuda::GpuMat(frameHeight() * 3 / 2, frameWidth(), CV_8UC1, (void*) ptr, pitch);
    }

    void unmapFrame(cuda::GpuMat& frame)
@ -98,10 +98,11 @@ public:
    }

 private:
+    CUcontext ctx_ = 0;
    CUvideoctxlock lock_;
-    CUcontext ctx_;
-    CUVIDDECODECREATEINFO createInfo_;
-    CUvideodecoder        decoder_;
+    CUvideodecoder        decoder_ = 0;
+    FormatInfo videoFormat_ = {};
+    Mutex mtx_;
 };

 }}}
--- a/modules/cudacodec/src/video_parser.cpp
+++ b/modules/cudacodec/src/video_parser.cpp
@ -52,7 +52,7 @@ cv::cudacodec::detail::VideoParser::VideoParser(VideoDecoder* videoDecoder, Fram
    std::memset(&params, 0, sizeof(CUVIDPARSERPARAMS));

    params.CodecType              = videoDecoder->codec();
-    params.ulMaxNumDecodeSurfaces = videoDecoder->maxDecodeSurfaces();
+    params.ulMaxNumDecodeSurfaces = 1;
    params.ulMaxDisplayDelay      = 1; // this flag is needed so the parser will push frames out to the decoder as quickly as it can
    params.pUserData              = this;
    params.pfnSequenceCallback    = HandleVideoSequence;    // Called before decoding frames and/or whenever there is a format change
@ -62,7 +62,7 @@ cv::cudacodec::detail::VideoParser::VideoParser(VideoDecoder* videoDecoder, Fram
    cuSafeCall( cuvidCreateVideoParser(&parser_, &params) );
 }

-bool cv::cudacodec::detail::VideoParser::parseVideoData(const unsigned char* data, size_t size, bool endOfStream)
+bool cv::cudacodec::detail::VideoParser::parseVideoData(const unsigned char* data, size_t size, const bool rawMode, const bool containsKeyFrame, bool endOfStream)
 {
    CUVIDSOURCEDATAPACKET packet;
    std::memset(&packet, 0, sizeof(CUVIDSOURCEDATAPACKET));
@ -73,6 +73,9 @@ bool cv::cudacodec::detail::VideoParser::parseVideoData(const unsigned char* dat
    packet.payload_size = static_cast<unsigned long>(size);
    packet.payload = data;

+    if (rawMode)
+        currentFramePackets.push_back(RawPacket(data, size, containsKeyFrame));
+
    if (cuvidParseVideoData(parser_, &packet) != CUDA_SUCCESS)
    {
        hasError_ = true;
@ -80,7 +83,7 @@ bool cv::cudacodec::detail::VideoParser::parseVideoData(const unsigned char* dat
        return false;
    }

-    const int maxUnparsedPackets = 20;
+    constexpr int maxUnparsedPackets = 20;

    ++unparsedPackets_;
    if (unparsedPackets_ > maxUnparsedPackets)
@ -106,17 +109,39 @@ int CUDAAPI cv::cudacodec::detail::VideoParser::HandleVideoSequence(void* userDa
        format->coded_width   != thiz->videoDecoder_->frameWidth()  ||
        format->coded_height  != thiz->videoDecoder_->frameHeight() ||
        format->chroma_format != thiz->videoDecoder_->chromaFormat()||
-        format->bit_depth_luma_minus8 != thiz->videoDecoder_->nBitDepthMinus8())
+        format->bit_depth_luma_minus8 != thiz->videoDecoder_->nBitDepthMinus8() ||
+        format->min_num_decode_surfaces != thiz->videoDecoder_->maxDecodeSurfaces())
    {
        FormatInfo newFormat;
-
        newFormat.codec = static_cast<Codec>(format->codec);
        newFormat.chromaFormat = static_cast<ChromaFormat>(format->chroma_format);
+        newFormat.nBitDepthMinus8 = format->bit_depth_luma_minus8;
+        newFormat.ulWidth = format->coded_width;
+        newFormat.ulHeight = format->coded_height;
        newFormat.width = format->coded_width;
        newFormat.height = format->coded_height;
        newFormat.displayArea = Rect(Point(format->display_area.left, format->display_area.top), Point(format->display_area.right, format->display_area.bottom));
-        newFormat.nBitDepthMinus8 = format->bit_depth_luma_minus8;
-
+        newFormat.fps = format->frame_rate.numerator / static_cast<float>(format->frame_rate.denominator);
+        newFormat.ulNumDecodeSurfaces = format->min_num_decode_surfaces;
+        if (format->progressive_sequence)
+            newFormat.deinterlaceMode = Weave;
+        else
+            newFormat.deinterlaceMode = Adaptive;
+        int maxW = 0, maxH = 0;
+        // AV1 has max width/height of sequence in sequence header
+        if (format->codec == cudaVideoCodec_AV1 && format->seqhdr_data_length > 0)
+        {
+            CUVIDEOFORMATEX* vidFormatEx = (CUVIDEOFORMATEX*)format;
+            maxW = vidFormatEx->av1.max_width;
+            maxH = vidFormatEx->av1.max_height;
+        }
+        if (maxW < (int)format->coded_width)
+            maxW = format->coded_width;
+        if (maxH < (int)format->coded_height)
+            maxH = format->coded_height;
+        newFormat.ulMaxWidth = maxW;
+        newFormat.ulMaxHeight = maxH;
+        thiz->frameQueue_->init(newFormat.ulNumDecodeSurfaces);
        try
        {
            thiz->videoDecoder_->release();
@ -129,7 +154,7 @@ int CUDAAPI cv::cudacodec::detail::VideoParser::HandleVideoSequence(void* userDa
        }
    }

-    return true;
+    return thiz->videoDecoder_->maxDecodeSurfaces();
 }

 int CUDAAPI cv::cudacodec::detail::VideoParser::HandlePictureDecode(void* userData, CUVIDPICPARAMS* picParams)
@ -158,8 +183,8 @@ int CUDAAPI cv::cudacodec::detail::VideoParser::HandlePictureDisplay(void* userD

    thiz->unparsedPackets_ = 0;

-    thiz->frameQueue_->enqueue(picParams);
-
+    thiz->frameQueue_->enqueue(picParams, thiz->currentFramePackets);
+    thiz->currentFramePackets.clear();
    return true;
 }

--- a/modules/cudacodec/src/video_parser.hpp
+++ b/modules/cudacodec/src/video_parser.hpp
@ -59,7 +59,7 @@ public:
        cuvidDestroyVideoParser(parser_);
    }

-    bool parseVideoData(const unsigned char* data, size_t size, bool endOfStream);
+    bool parseVideoData(const unsigned char* data, size_t size, const bool rawMode, const bool containsKeyFrame, bool endOfStream);

    bool hasError() const { return hasError_; }

@ -68,6 +68,7 @@ private:
    FrameQueue* frameQueue_;
    CUvideoparser parser_;
    int unparsedPackets_;
+    std::vector<RawPacket> currentFramePackets;
    volatile bool hasError_;

    // Called when the decoder encounters a video format change (or initial sequence header)
--- a/modules/cudacodec/src/video_reader.cpp
+++ b/modules/cudacodec/src/video_reader.cpp
@ -48,8 +48,8 @@ using namespace cv::cudacodec;

 #ifndef HAVE_NVCUVID

-Ptr<VideoReader> cv::cudacodec::createVideoReader(const String&) { throw_no_cuda(); return Ptr<VideoReader>(); }
-Ptr<VideoReader> cv::cudacodec::createVideoReader(const Ptr<RawVideoSource>&) { throw_no_cuda(); return Ptr<VideoReader>(); }
+Ptr<VideoReader> cv::cudacodec::createVideoReader(const String&, const std::vector<int>&, const bool) { throw_no_cuda(); return Ptr<VideoReader>(); }
+Ptr<VideoReader> cv::cudacodec::createVideoReader(const Ptr<RawVideoSource>&, const bool) { throw_no_cuda(); return Ptr<VideoReader>(); }

 #else // HAVE_NVCUVID

@ -69,16 +69,33 @@ namespace

        FormatInfo format() const CV_OVERRIDE;

+        bool grab(Stream& stream) CV_OVERRIDE;
+
+        bool retrieve(OutputArray frame, const size_t idx) const CV_OVERRIDE;
+
+        bool set(const VideoReaderProps propertyId, const double propertyVal) CV_OVERRIDE;
+
+        bool get(const VideoReaderProps propertyId, double& propertyVal) const CV_OVERRIDE;
+
+        bool get(const int propertyId, double& propertyVal) const CV_OVERRIDE;
+
    private:
+        bool internalGrab(GpuMat& frame, Stream& stream);
+
        Ptr<VideoSource> videoSource_;

-        Ptr<FrameQueue> frameQueue_;
-        Ptr<VideoDecoder> videoDecoder_;
-        Ptr<VideoParser> videoParser_;
+        Ptr<FrameQueue> frameQueue_ = 0;
+        Ptr<VideoDecoder> videoDecoder_ = 0;
+        Ptr<VideoParser> videoParser_ = 0;

        CUvideoctxlock lock_;

        std::deque< std::pair<CUVIDPARSERDISPINFO, CUVIDPROCPARAMS> > frames_;
+        std::vector<RawPacket> rawPackets;
+        GpuMat lastFrame;
+        static const int decodedFrameIdx = 0;
+        static const int extraDataIdx = 1;
+        static const int rawPacketsBaseIdx = 2;
    };

    FormatInfo VideoReaderImpl::format() const
@ -97,11 +114,9 @@ namespace
        CUcontext ctx;
        cuSafeCall( cuCtxGetCurrent(&ctx) );
        cuSafeCall( cuvidCtxLockCreate(&lock_, ctx) );
-
-        frameQueue_.reset(new FrameQueue);
-        videoDecoder_.reset(new VideoDecoder(videoSource_->format(), ctx, lock_));
+        frameQueue_.reset(new FrameQueue());
+        videoDecoder_.reset(new VideoDecoder(videoSource_->format().codec, ctx, lock_));
        videoParser_.reset(new VideoParser(videoDecoder_, frameQueue_));
-
        videoSource_->setVideoParser(videoParser_);
        videoSource_->start();
    }
@ -122,22 +137,21 @@ namespace
        CUvideoctxlock m_lock;
    };

-    bool VideoReaderImpl::nextFrame(GpuMat& frame, Stream& stream)
-    {
-        if (videoSource_->hasError() || videoParser_->hasError())
-            CV_Error(Error::StsUnsupportedFormat, "Unsupported video source");
+    bool VideoReaderImpl::internalGrab(GpuMat& frame, Stream& stream) {
+        if (videoParser_->hasError())
+            CV_Error(Error::StsError, "Parsing/Decoding video source failed, check GPU memory is available and GPU supports hardware decoding.");

        if (frames_.empty())
        {
            CUVIDPARSERDISPINFO displayInfo;
-
+            rawPackets.clear();
            for (;;)
            {
-                if (frameQueue_->dequeue(displayInfo))
+                if (frameQueue_->dequeue(displayInfo, rawPackets))
                    break;

-                if (videoSource_->hasError() || videoParser_->hasError())
-                    CV_Error(Error::StsUnsupportedFormat, "Unsupported video source");
+                if (videoParser_->hasError())
+                    CV_Error(Error::StsError, "Parsing/Decoding video source failed, check GPU memory is available and GPU supports hardware decoding.");

                if (frameQueue_->isEndOfDecode())
                    return false;
@ -148,7 +162,7 @@ namespace

            bool isProgressive = displayInfo.progressive_frame != 0;
            const int num_fields = isProgressive ? 1 : 2 + displayInfo.repeat_first_field;
-            videoSource_->updateFormat(videoDecoder_->targetWidth(), videoDecoder_->targetHeight());
+            videoSource_->updateFormat(videoDecoder_->format());

            for (int active_field = 0; active_field < num_fields; ++active_field)
            {
@ -192,31 +206,115 @@ namespace

        return true;
    }
+
+    bool VideoReaderImpl::grab(Stream& stream) {
+        return internalGrab(lastFrame, stream);
+    }
+
+    bool VideoReaderImpl::retrieve(OutputArray frame, const size_t idx) const {
+        if (idx == decodedFrameIdx) {
+            if (!frame.isGpuMat())
+                CV_Error(Error::StsUnsupportedFormat, "Decoded frame is stored on the device and must be retrieved using a cv::cuda::GpuMat");
+            frame.getGpuMatRef() = lastFrame;
+        }
+        else if (idx == extraDataIdx) {
+            if (!frame.isMat())
+                CV_Error(Error::StsUnsupportedFormat, "Extra data  is stored on the host and must be retrueved using a cv::Mat");
+            videoSource_->getExtraData(frame.getMatRef());
+        }
+        else{
+            if (idx >= rawPacketsBaseIdx && idx < rawPacketsBaseIdx + rawPackets.size()) {
+                if (!frame.isMat())
+                    CV_Error(Error::StsUnsupportedFormat, "Raw data is stored on the host and must retrievd using a cv::Mat");
+                Mat tmp(1, rawPackets.at(idx - rawPacketsBaseIdx).size, CV_8UC1, rawPackets.at(idx - rawPacketsBaseIdx).Data(), rawPackets.at(idx - rawPacketsBaseIdx).size);
+                frame.getMatRef() = tmp;
+            }
+        }
+        return !frame.empty();
+    }
+
+    bool VideoReaderImpl::set(const VideoReaderProps propertyId, const double propertyVal) {
+        switch (propertyId) {
+        case VideoReaderProps::PROP_RAW_MODE :
+            videoSource_->SetRawMode(static_cast<bool>(propertyVal));
+            break;
+        }
+        return true;
+    }
+
+    bool VideoReaderImpl::get(const VideoReaderProps propertyId, double& propertyVal) const {
+        switch (propertyId)
+        {
+        case VideoReaderProps::PROP_DECODED_FRAME_IDX:
+            propertyVal =  decodedFrameIdx;
+            return true;
+        case VideoReaderProps::PROP_EXTRA_DATA_INDEX:
+            propertyVal = extraDataIdx;
+            return true;
+        case VideoReaderProps::PROP_RAW_PACKAGES_BASE_INDEX:
+            if (videoSource_->RawModeEnabled()) {
+                propertyVal = rawPacketsBaseIdx;
+                return true;
+            }
+            else
+                break;
+        case VideoReaderProps::PROP_NUMBER_OF_RAW_PACKAGES_SINCE_LAST_GRAB:
+            propertyVal = rawPackets.size();
+            return true;
+        case::VideoReaderProps::PROP_RAW_MODE:
+            propertyVal = videoSource_->RawModeEnabled();
+            return true;
+        case::VideoReaderProps::PROP_LRF_HAS_KEY_FRAME: {
+            const int iPacket = propertyVal - rawPacketsBaseIdx;
+            if (videoSource_->RawModeEnabled() && iPacket >= 0 && iPacket < rawPackets.size()) {
+                propertyVal = rawPackets.at(iPacket).containsKeyFrame;
+                return true;
+            }
+            else
+                break;
+        }
+        default:
+            break;
+        }
+        return false;
+    }
+
+    bool VideoReaderImpl::get(const int propertyId, double& propertyVal) const {
+        return videoSource_->get(propertyId, propertyVal);
+    }
+
+    bool VideoReaderImpl::nextFrame(GpuMat& frame, Stream& stream)
+    {
+        if (!internalGrab(frame, stream))
+            return false;
+        return true;
+    }
 }

-Ptr<VideoReader> cv::cudacodec::createVideoReader(const String& filename)
+Ptr<VideoReader> cv::cudacodec::createVideoReader(const String& filename, const std::vector<int>& params, const bool rawMode)
 {
-    CV_Assert( !filename.empty() );
+    CV_Assert(!filename.empty());

    Ptr<VideoSource> videoSource;

    try
    {
        // prefer ffmpeg to cuvidGetSourceVideoFormat() which doesn't always return the corrct raw pixel format
-        Ptr<RawVideoSource> source(new FFmpegVideoSource(filename));
-        videoSource.reset(new RawVideoSourceWrapper(source));
+        Ptr<RawVideoSource> source(new FFmpegVideoSource(filename, params));
+        videoSource.reset(new RawVideoSourceWrapper(source, rawMode));
    }
    catch (...)
    {
+        if (params.size()) throw;
        videoSource.reset(new CuvidVideoSource(filename));
    }

    return makePtr<VideoReaderImpl>(videoSource);
 }

-Ptr<VideoReader> cv::cudacodec::createVideoReader(const Ptr<RawVideoSource>& source)
+Ptr<VideoReader> cv::cudacodec::createVideoReader(const Ptr<RawVideoSource>& source, const bool rawMode)
 {
-    Ptr<VideoSource> videoSource(new RawVideoSourceWrapper(source));
+    Ptr<VideoSource> videoSource(new RawVideoSourceWrapper(source, rawMode));
    return makePtr<VideoReaderImpl>(videoSource);
 }

--- a/modules/cudacodec/src/video_source.cpp
+++ b/modules/cudacodec/src/video_source.cpp
@ -49,14 +49,15 @@ using namespace cv;
 using namespace cv::cudacodec;
 using namespace cv::cudacodec::detail;

-bool cv::cudacodec::detail::VideoSource::parseVideoData(const unsigned char* data, size_t size, bool endOfStream)
+bool cv::cudacodec::detail::VideoSource::parseVideoData(const unsigned char* data, size_t size, const bool rawMode, const bool containsKeyFrame, bool endOfStream)
 {
-    return videoParser_->parseVideoData(data, size, endOfStream);
+    return videoParser_->parseVideoData(data, size, rawMode, containsKeyFrame, endOfStream);
 }

-cv::cudacodec::detail::RawVideoSourceWrapper::RawVideoSourceWrapper(const Ptr<RawVideoSource>& source) :
+cv::cudacodec::detail::RawVideoSourceWrapper::RawVideoSourceWrapper(const Ptr<RawVideoSource>& source, const bool rawMode) :
    source_(source)
 {
+    SetRawMode(rawMode);
    CV_Assert( !source_.empty() );
 }

@ -65,9 +66,14 @@ cv::cudacodec::FormatInfo cv::cudacodec::detail::RawVideoSourceWrapper::format()
    return source_->format();
 }

-void cv::cudacodec::detail::RawVideoSourceWrapper::updateFormat(const int codedWidth, const int codedHeight)
+void cv::cudacodec::detail::RawVideoSourceWrapper::updateFormat(const FormatInfo& videoFormat)
 {
-    source_->updateFormat(codedWidth,codedHeight);
+    source_->updateFormat(videoFormat);
+}
+
+bool cv::cudacodec::detail::RawVideoSourceWrapper::get(const int propertyId, double& propertyVal) const
+{
+    return source_->get(propertyId, propertyVal);
 }

 void cv::cudacodec::detail::RawVideoSourceWrapper::start()
@ -109,7 +115,19 @@ void cv::cudacodec::detail::RawVideoSourceWrapper::readLoop(void* userData)
            break;
        }

-        if (!thiz->parseVideoData(data, size))
+        bool containsKeyFrame = false;
+        if (thiz->RawModeEnabled()) {
+            containsKeyFrame = thiz->source_->lastPacketContainsKeyFrame();
+            if (!thiz->extraDataQueried) {
+                thiz->extraDataQueried = true;
+                Mat extraData;
+                thiz->source_->getExtraData(extraData);
+                if(!extraData.empty())
+                    thiz->setExtraData(extraData);
+            }
+        }
+
+        if (!thiz->parseVideoData(data, size, thiz->RawModeEnabled(), containsKeyFrame))
        {
            thiz->hasError_ = true;
            break;
@ -119,7 +137,7 @@ void cv::cudacodec::detail::RawVideoSourceWrapper::readLoop(void* userData)
            break;
    }

-    thiz->parseVideoData(0, 0, true);
+    thiz->parseVideoData(0, 0, false, false, true);
 }

 #endif // HAVE_NVCUVID
--- a/modules/cudacodec/src/video_source.hpp
+++ b/modules/cudacodec/src/video_source.hpp
@ -56,41 +56,51 @@ public:
    virtual ~VideoSource() {}

    virtual FormatInfo format() const = 0;
-    virtual void updateFormat(const int codedWidth, const int codedHeight) = 0;
+    virtual void updateFormat(const FormatInfo& videoFormat) = 0;
+    virtual bool get(const int propertyId, double& propertyVal) const { return false; }
    virtual void start() = 0;
    virtual void stop() = 0;
    virtual bool isStarted() const = 0;
    virtual bool hasError() const = 0;
-
    void setVideoParser(detail::VideoParser* videoParser) { videoParser_ = videoParser; }
-
+    void setExtraData(const cv::Mat _extraData) {
+        AutoLock autoLock(mtx_);
+        extraData = _extraData.clone();
+    }
+    void getExtraData(cv::Mat& _extraData) {
+        AutoLock autoLock(mtx_);
+        _extraData = extraData.clone();
+    }
+    void SetRawMode(const bool enabled) { rawMode_ = enabled; }
+    bool RawModeEnabled() const { return rawMode_; }
 protected:
-    bool parseVideoData(const uchar* data, size_t size, bool endOfStream = false);
-
+    bool parseVideoData(const uchar* data, size_t size, const bool rawMode, const bool containsKeyFrame,  bool endOfStream = false);
+    bool extraDataQueried = false;
 private:
-    detail::VideoParser* videoParser_;
+    detail::VideoParser* videoParser_ = 0;
+    cv::Mat extraData;
+    bool rawMode_ = false;
+    Mutex mtx_;
 };

 class RawVideoSourceWrapper : public VideoSource
 {
 public:
-    RawVideoSourceWrapper(const Ptr<RawVideoSource>& source);
+    RawVideoSourceWrapper(const Ptr<RawVideoSource>& source, const bool rawMode);

    FormatInfo format() const CV_OVERRIDE;
-    void updateFormat(const int codedWidth, const int codedHeight) CV_OVERRIDE;
+    void updateFormat(const FormatInfo& videoFormat) CV_OVERRIDE;
+    bool get(const int propertyId, double& propertyVal) const CV_OVERRIDE;
    void start() CV_OVERRIDE;
    void stop() CV_OVERRIDE;
    bool isStarted() const CV_OVERRIDE;
    bool hasError() const CV_OVERRIDE;
-
 private:
-    Ptr<RawVideoSource> source_;
-
-    Ptr<Thread> thread_;
+    static void readLoop(void* userData);
+    Ptr<RawVideoSource> source_ = 0;
+    Ptr<Thread> thread_ = 0;
    volatile bool stop_;
    volatile bool hasError_;
-
-    static void readLoop(void* userData);
 };

 }}}
--- a/modules/cudacodec/test/test_video.cpp
+++ b/modules/cudacodec/test/test_video.cpp
@ -45,14 +45,132 @@ namespace opencv_test {
    namespace {

 #if defined(HAVE_NVCUVID) || defined(HAVE_NVCUVENC)
+PARAM_TEST_CASE(CheckSet, cv::cuda::DeviceInfo, std::string)
+{
+};
+
+typedef tuple<std::string, int> check_extra_data_params_t;
+PARAM_TEST_CASE(CheckExtraData, cv::cuda::DeviceInfo, check_extra_data_params_t)
+{
+};
+
 PARAM_TEST_CASE(Video, cv::cuda::DeviceInfo, std::string)
 {
 };

+PARAM_TEST_CASE(VideoReadRaw, cv::cuda::DeviceInfo, std::string)
+{
+};
+
+PARAM_TEST_CASE(CheckKeyFrame, cv::cuda::DeviceInfo, std::string)
+{
+};
+
+struct CheckParams : testing::TestWithParam<cv::cuda::DeviceInfo>
+{
+    cv::cuda::DeviceInfo devInfo;
+
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+
+        cv::cuda::setDevice(devInfo.deviceID());
+    }
+};
+
 #if defined(HAVE_NVCUVID)
 //////////////////////////////////////////////////////
 // VideoReader

+//==========================================================================
+
+CUDA_TEST_P(CheckSet, Reader)
+{
+    cv::cuda::setDevice(GET_PARAM(0).deviceID());
+
+    if (!videoio_registry::hasBackend(CAP_FFMPEG))
+        throw SkipTestException("FFmpeg backend was not found");
+
+    std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + +"../" + GET_PARAM(1);
+    cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile);
+    double unsupportedVal = -1;
+    ASSERT_FALSE(reader->get(cv::cudacodec::VideoReaderProps::PROP_NOT_SUPPORTED, unsupportedVal));
+    double rawModeVal = -1;
+    ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_RAW_MODE, rawModeVal));
+    ASSERT_FALSE(rawModeVal);
+    ASSERT_TRUE(reader->set(cv::cudacodec::VideoReaderProps::PROP_RAW_MODE,true));
+    ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_RAW_MODE, rawModeVal));
+    ASSERT_TRUE(rawModeVal);
+    bool rawPacketsAvailable = false;
+    while (reader->grab()) {
+        double nRawPackages = -1;
+        ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_NUMBER_OF_RAW_PACKAGES_SINCE_LAST_GRAB, nRawPackages));
+        if (nRawPackages > 0) {
+            rawPacketsAvailable = true;
+            break;
+        }
+    }
+    ASSERT_TRUE(rawPacketsAvailable);
+}
+
+CUDA_TEST_P(CheckExtraData, Reader)
+{
+    // RTSP streaming is only supported by the FFmpeg back end
+    if (!videoio_registry::hasBackend(CAP_FFMPEG))
+        throw SkipTestException("FFmpeg backend not found");
+
+    cv::cuda::setDevice(GET_PARAM(0).deviceID());
+    const string path = get<0>(GET_PARAM(1));
+    const int sz = get<1>(GET_PARAM(1));
+    std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../" + path;
+    cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile, {}, true);
+    double rawModeVal = -1;
+    ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_RAW_MODE, rawModeVal));
+    ASSERT_TRUE(rawModeVal);
+    double extraDataIdx = -1;
+    ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_EXTRA_DATA_INDEX, extraDataIdx));
+    ASSERT_EQ(extraDataIdx, 1 );
+    ASSERT_TRUE(reader->grab());
+    cv::Mat extraData;
+    const bool newData = reader->retrieve(extraData, extraDataIdx);
+    ASSERT_TRUE(newData && sz || !newData && !sz);
+    ASSERT_EQ(extraData.total(), sz);
+}
+
+CUDA_TEST_P(CheckKeyFrame, Reader)
+{
+    cv::cuda::setDevice(GET_PARAM(0).deviceID());
+
+    // RTSP streaming is only supported by the FFmpeg back end
+    if (!videoio_registry::hasBackend(CAP_FFMPEG))
+        throw SkipTestException("FFmpeg backend not found");
+
+    const string path = GET_PARAM(1);
+    std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../" + path;
+    cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile, {}, true);
+    double rawModeVal = -1;
+    ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_RAW_MODE, rawModeVal));
+    ASSERT_TRUE(rawModeVal);
+    double rawIdxBase = -1;
+    ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_RAW_PACKAGES_BASE_INDEX, rawIdxBase));
+    ASSERT_EQ(rawIdxBase, 2);
+    constexpr int maxNPackagesToCheck = 2;
+    int nPackages = 0;
+    while (nPackages < maxNPackagesToCheck) {
+        ASSERT_TRUE(reader->grab());
+        double N = -1;
+        ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_NUMBER_OF_RAW_PACKAGES_SINCE_LAST_GRAB,N));
+        for (int i = rawIdxBase; i < N + rawIdxBase; i++) {
+            nPackages++;
+            double containsKeyFrame = i;
+            ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_LRF_HAS_KEY_FRAME, containsKeyFrame));
+            ASSERT_TRUE(nPackages == 1 && containsKeyFrame || nPackages == 2 && !containsKeyFrame) << "nPackage: " << i;
+            if (nPackages >= maxNPackagesToCheck)
+                break;
+        }
+    }
+}
+
 CUDA_TEST_P(Video, Reader)
 {
    cv::cuda::setDevice(GET_PARAM(0).deviceID());
@ -74,6 +192,103 @@ CUDA_TEST_P(Video, Reader)
        ASSERT_FALSE(frame.empty());
    }
 }
+
+CUDA_TEST_P(VideoReadRaw, Reader)
+{
+    cv::cuda::setDevice(GET_PARAM(0).deviceID());
+
+    // RTSP streaming is only supported by the FFmpeg back end
+    if (!videoio_registry::hasBackend(CAP_FFMPEG))
+        throw SkipTestException("FFmpeg backend not found");
+
+    std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../" + GET_PARAM(1);
+    const string fileNameOut = tempfile("test_container_stream");
+    {
+        std::ofstream file(fileNameOut, std::ios::binary);
+        ASSERT_TRUE(file.is_open());
+        cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile, {}, true);
+        double rawModeVal = -1;
+        ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_RAW_MODE, rawModeVal));
+        ASSERT_TRUE(rawModeVal);
+        double rawIdxBase = -1;
+        ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_RAW_PACKAGES_BASE_INDEX, rawIdxBase));
+        ASSERT_EQ(rawIdxBase, 2);
+        cv::cuda::GpuMat frame;
+        for (int i = 0; i < 100; i++)
+        {
+            ASSERT_TRUE(reader->grab());
+            ASSERT_TRUE(reader->retrieve(frame));
+            ASSERT_FALSE(frame.empty());
+            double N = -1;
+            ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_NUMBER_OF_RAW_PACKAGES_SINCE_LAST_GRAB,N));
+            ASSERT_TRUE(N >= 0) << N << " < 0";
+            for (int i = rawIdxBase; i <= N + rawIdxBase; i++) {
+                Mat rawPackets;
+                reader->retrieve(rawPackets, i);
+                file.write((char*)rawPackets.data, rawPackets.total());
+            }
+        }
+    }
+
+    std::cout << "Checking written video stream: " << fileNameOut << std::endl;
+
+    {
+        cv::Ptr<cv::cudacodec::VideoReader> readerReference = cv::cudacodec::createVideoReader(inputFile);
+        cv::Ptr<cv::cudacodec::VideoReader> readerActual = cv::cudacodec::createVideoReader(fileNameOut, {}, true);
+        double decodedFrameIdx = -1;
+        ASSERT_TRUE(readerActual->get(cv::cudacodec::VideoReaderProps::PROP_DECODED_FRAME_IDX, decodedFrameIdx));
+        ASSERT_EQ(decodedFrameIdx, 0);
+        cv::cuda::GpuMat reference, actual;
+        cv::Mat referenceHost, actualHost;
+        for (int i = 0; i < 100; i++)
+        {
+            ASSERT_TRUE(readerReference->nextFrame(reference));
+            ASSERT_TRUE(readerActual->grab());
+            ASSERT_TRUE(readerActual->retrieve(actual, decodedFrameIdx));
+            actual.download(actualHost);
+            reference.download(referenceHost);
+            ASSERT_TRUE(cvtest::norm(actualHost, referenceHost, NORM_INF) == 0);
+        }
+    }
+
+    ASSERT_EQ(0, remove(fileNameOut.c_str()));
+}
+
+CUDA_TEST_P(CheckParams, Reader)
+{
+    std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../highgui/video/big_buck_bunny.mp4";
+    {
+        cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile);
+        double msActual = -1;
+        ASSERT_FALSE(reader->get(cv::VideoCaptureProperties::CAP_PROP_OPEN_TIMEOUT_MSEC, msActual));
+    }
+
+    {
+        constexpr int msReference = 3333;
+        cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile, {
+            cv::VideoCaptureProperties::CAP_PROP_OPEN_TIMEOUT_MSEC, msReference });
+        double msActual = -1;
+        ASSERT_TRUE(reader->get(cv::VideoCaptureProperties::CAP_PROP_OPEN_TIMEOUT_MSEC, msActual));
+        ASSERT_EQ(msActual, msReference);
+    }
+
+    {
+        std::vector<bool> exceptionsThrown = { false,true };
+        std::vector<int> capPropFormats = { -1,0 };
+        for (int i = 0; i < capPropFormats.size(); i++) {
+            bool exceptionThrown = false;
+            try {
+                cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile, {
+                    cv::VideoCaptureProperties::CAP_PROP_FORMAT, capPropFormats.at(i) });
+            }
+            catch (cv::Exception ex) {
+                if (ex.code == Error::StsUnsupportedFormat)
+                    exceptionThrown = true;
+            }
+            ASSERT_EQ(exceptionThrown, exceptionsThrown.at(i));
+        }
+    }
+}
 #endif // HAVE_NVCUVID

 #if defined(_WIN32) && defined(HAVE_NVCUVENC)
@ -125,11 +340,37 @@ CUDA_TEST_P(Video, Writer)

 #endif // _WIN32, HAVE_NVCUVENC

-#define VIDEO_SRC "cv/video/768x576.avi", "cv/video/1920x1080.avi", "highgui/video/big_buck_bunny.avi", \
+INSTANTIATE_TEST_CASE_P(CUDA_Codec, CheckSet, testing::Combine(
+    ALL_DEVICES,
+    testing::Values("highgui/video/big_buck_bunny.mp4")));
+
+#define VIDEO_SRC_R "highgui/video/big_buck_bunny.mp4", "cv/video/768x576.avi", "cv/video/1920x1080.avi", "highgui/video/big_buck_bunny.avi", \
    "highgui/video/big_buck_bunny.h264", "highgui/video/big_buck_bunny.h265", "highgui/video/big_buck_bunny.mpg"
 INSTANTIATE_TEST_CASE_P(CUDA_Codec, Video, testing::Combine(
    ALL_DEVICES,
-    testing::Values(VIDEO_SRC)));
+    testing::Values(VIDEO_SRC_R)));
+
+#define VIDEO_SRC_RW "highgui/video/big_buck_bunny.h264", "highgui/video/big_buck_bunny.h265"
+INSTANTIATE_TEST_CASE_P(CUDA_Codec, VideoReadRaw, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(VIDEO_SRC_RW)));
+
+const check_extra_data_params_t check_extra_data_params[] =
+{
+    check_extra_data_params_t("highgui/video/big_buck_bunny.mp4", 45),
+    check_extra_data_params_t("highgui/video/big_buck_bunny.mov", 45),
+    check_extra_data_params_t("highgui/video/big_buck_bunny.mjpg.avi", 0)
+};
+
+INSTANTIATE_TEST_CASE_P(CUDA_Codec, CheckExtraData, testing::Combine(
+    ALL_DEVICES,
+    testing::ValuesIn(check_extra_data_params)));
+
+INSTANTIATE_TEST_CASE_P(CUDA_Codec, CheckKeyFrame, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(VIDEO_SRC_R)));
+
+INSTANTIATE_TEST_CASE_P(CUDA_Codec, CheckParams, ALL_DEVICES);

 #endif // HAVE_NVCUVID || HAVE_NVCUVENC
 }} // namespace
--- a/modules/cudaoptflow/src/nvidiaOpticalFlow.cpp
+++ b/modules/cudaoptflow/src/nvidiaOpticalFlow.cpp
@ -314,15 +314,11 @@ private:

    NV_OF_CUDA_API_FUNCTION_LIST* GetAPI()
    {
-        std::lock_guard<std::mutex> lock(m_lock);
        return  m_ofAPI.get();
    }

    NvOFHandle GetHandle() { return m_hOF; }

-protected:
-    std::mutex m_lock;
-
 public:
    NvidiaOpticalFlowImpl(cv::Size imageSize, NV_OF_PERF_LEVEL perfPreset, bool bEnableTemporalHints,
        bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId, Stream inputStream, Stream outputStream);
@ -338,6 +334,8 @@ public:
        int gridSize, InputOutputArray upsampledFlow);

    virtual int getGridSize() const { return m_gridSize; }
+
+    ~NvidiaOpticalFlowImpl();
 };

 NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl(
@ -599,20 +597,24 @@ void NvidiaOpticalFlowImpl::collectGarbage()
    if (m_hInputBuffer)
    {
        NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hInputBuffer));
+        m_hInputBuffer = nullptr;
    }
    if (m_hReferenceBuffer)
    {
        NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hReferenceBuffer));
+        m_hReferenceBuffer = nullptr;
    }
    if (m_hOutputBuffer)
    {
        NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hOutputBuffer));
+        m_hOutputBuffer = nullptr;
    }
    if (m_enableExternalHints)
    {
        if (m_hHintBuffer)
        {
            NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hHintBuffer));
+            m_hHintBuffer = nullptr;
        }
    }
    if (m_enableCostBuffer)
@ -620,6 +622,7 @@ void NvidiaOpticalFlowImpl::collectGarbage()
        if (m_hCostBuffer)
        {
            NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hCostBuffer));
+            m_hCostBuffer = nullptr;
        }
    }
    if (m_inputStream)
@ -633,9 +636,15 @@ void NvidiaOpticalFlowImpl::collectGarbage()
    if (m_hOF)
    {
        NVOF_API_CALL(GetAPI()->nvOFDestroy(m_hOF));
+        m_hOF = nullptr;
    }
 }

+NvidiaOpticalFlowImpl::~NvidiaOpticalFlowImpl()
+{
+    collectGarbage();
+}
+
 void NvidiaOpticalFlowImpl::upSampler(InputArray _flow, cv::Size imageSize,
    int gridSize, InputOutputArray upsampledFlow)
 {
@ -754,15 +763,11 @@ private:

    NV_OF_CUDA_API_FUNCTION_LIST* GetAPI()
    {
-        std::lock_guard<std::mutex> lock(m_lock);
        return  m_ofAPI.get();
    }

    NvOFHandle GetHandle() { return m_hOF; }

-protected:
-    std::mutex m_lock;
-
 public:
    NvidiaOpticalFlowImpl_2(cv::Size imageSize, NV_OF_PERF_LEVEL perfPreset,
        NV_OF_OUTPUT_VECTOR_GRID_SIZE outputGridSize, NV_OF_HINT_VECTOR_GRID_SIZE hintGridSize,
@ -778,6 +783,8 @@ public:
    virtual void convertToFloat(InputArray flow, InputOutputArray floatFlow);

    virtual int getGridSize() const { return m_gridSize; }
+
+    ~NvidiaOpticalFlowImpl_2();
 };

 NvidiaOpticalFlowImpl_2::NvidiaOpticalFlowImpl_2(
@ -1149,24 +1156,29 @@ void NvidiaOpticalFlowImpl_2::collectGarbage()
    if (m_hInputBuffer)
    {
        NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hInputBuffer));
+        m_hInputBuffer = nullptr;
    }
    if (m_hReferenceBuffer)
    {
        NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hReferenceBuffer));
+        m_hReferenceBuffer = nullptr;
    }
    if (m_hOutputBuffer)
    {
        NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hOutputBuffer));
+        m_hOutputBuffer = nullptr;
    }
    if (m_scaleFactor > 1 && m_hOutputUpScaledBuffer)
    {
        NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hOutputUpScaledBuffer));
+        m_hOutputUpScaledBuffer = nullptr;
    }
    if (m_enableExternalHints)
    {
        if (m_hHintBuffer)
        {
            NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hHintBuffer));
+            m_hHintBuffer = nullptr;
        }
    }
    if (m_enableCostBuffer)
@ -1174,6 +1186,7 @@ void NvidiaOpticalFlowImpl_2::collectGarbage()
        if (m_hCostBuffer)
        {
            NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hCostBuffer));
+            m_hCostBuffer = nullptr;
        }
    }
    if (m_inputStream)
@ -1187,9 +1200,15 @@ void NvidiaOpticalFlowImpl_2::collectGarbage()
    if (m_hOF)
    {
        NVOF_API_CALL(GetAPI()->nvOFDestroy(m_hOF));
+        m_hOF = nullptr;
    }
 }

+NvidiaOpticalFlowImpl_2::~NvidiaOpticalFlowImpl_2()
+{
+    collectGarbage();
+}
+
 void NvidiaOpticalFlowImpl_2::convertToFloat(InputArray _flow, InputOutputArray floatFlow)
 {
    Mat flow;
--- a/modules/cudaoptflow/src/tvl1flow.cpp
+++ b/modules/cudaoptflow/src/tvl1flow.cpp
@ -162,7 +162,9 @@ namespace
        GpuMat p32_buf;

        GpuMat diff_buf;
-        GpuMat norm_buf;
+
+        GpuMat diff_sum_dev;
+        Mat diff_sum_host;
    };

    void OpticalFlowDual_TVL1_Impl::calc(InputArray _frame0, InputArray _frame1, InputOutputArray _flow, Stream& stream)
@ -361,8 +363,11 @@ namespace
                estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, diff, l_t, static_cast<float>(theta_), gamma_, calcError, stream);
                if (calcError)
                {
+                    cuda::calcSum(diff, diff_sum_dev, cv::noArray(), _stream);
+                    diff_sum_dev.download(diff_sum_host, _stream);
                    _stream.waitForCompletion();
-                    error = cuda::sum(diff, norm_buf)[0];
+
+                    error = diff_sum_host.at<double>(0,0);
                    prevError = error;
                }
                else
--- a/modules/cudaoptflow/test/test_optflow.cpp
+++ b/modules/cudaoptflow/test/test_optflow.cpp
@ -605,7 +605,6 @@ CUDA_TEST_P(NvidiaOpticalFlow_2_0, Regression)
    ASSERT_FALSE(golden.empty());

    EXPECT_MAT_SIMILAR(golden, upsampledFlow, 1e-10);
-    d_nvof->collectGarbage();
 }

 CUDA_TEST_P(NvidiaOpticalFlow_2_0, OpticalFlowNan)
@ -640,7 +639,6 @@ CUDA_TEST_P(NvidiaOpticalFlow_2_0, OpticalFlowNan)

    EXPECT_TRUE(cv::checkRange(flowx));
    EXPECT_TRUE(cv::checkRange(flowy));
-    d_nvof->collectGarbage();
 };

 INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, NvidiaOpticalFlow_2_0, ALL_DEVICES);
--- a/modules/cudastereo/perf/perf_stereo.cpp
+++ b/modules/cudastereo/perf/perf_stereo.cpp
@ -87,6 +87,45 @@ PERF_TEST_P(ImagePair, StereoBM,
    }
 }

+PERF_TEST_P(ImagePair, StereoBMwithUniqueness,
+            Values(pair_string("gpu/perf/aloe.png", "gpu/perf/aloeR.png")))
+{
+    declare.time(300.0);
+
+    const cv::Mat imgLeft = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(imgLeft.empty());
+
+    const cv::Mat imgRight = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(imgRight.empty());
+
+    const int ndisp = 256;
+
+    if (PERF_RUN_CUDA())
+    {
+        cv::Ptr<cv::StereoBM> d_bm = cv::cuda::createStereoBM(ndisp);
+        d_bm->setUniquenessRatio(10);
+
+        const cv::cuda::GpuMat d_imgLeft(imgLeft);
+        const cv::cuda::GpuMat d_imgRight(imgRight);
+        cv::cuda::GpuMat dst;
+
+        TEST_CYCLE() d_bm->compute(d_imgLeft, d_imgRight, dst);
+
+        CUDA_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Ptr<cv::StereoBM> bm = cv::StereoBM::create(ndisp);
+        bm->setUniquenessRatio(10);
+
+        cv::Mat dst;
+
+        TEST_CYCLE() bm->compute(imgLeft, imgRight, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
 //////////////////////////////////////////////////////////////////////
 // StereoBeliefPropagation

--- a/modules/cudastereo/src/cuda/stereobm.cu
+++ b/modules/cudastereo/src/cuda/stereobm.cu
@ -43,6 +43,7 @@
 #if !defined CUDA_DISABLER

 #include "opencv2/core/cuda/common.hpp"
+#include <limits.h>

 namespace cv { namespace cuda { namespace device
 {
@ -60,18 +61,13 @@ namespace cv { namespace cuda { namespace device
        #define STEREO_MIND 0                    // The minimum d range to check
        #define STEREO_DISP_STEP N_DISPARITIES   // the d step, must be <= 1 to avoid aliasing

-        __constant__ unsigned int* cminSSDImage;
-        __constant__ size_t cminSSD_step;
-        __constant__ int cwidth;
-        __constant__ int cheight;
-
        __device__ __forceinline__ int SQ(int a)
        {
            return a * a;
        }

        template<int RADIUS>
-        __device__ unsigned int CalcSSD(volatile unsigned int *col_ssd_cache, volatile unsigned int *col_ssd, const int X)
+        __device__ unsigned int CalcSSD(volatile unsigned int *col_ssd_cache, volatile unsigned int *col_ssd, const int X, int cwidth)
        {
            unsigned int cache = 0;
            unsigned int cache2 = 0;
@ -80,9 +76,8 @@ namespace cv { namespace cuda { namespace device
            {
                for(int i = 1; i <= RADIUS; i++)
                    cache += col_ssd[i];
-
-                col_ssd_cache[0] = cache;
            }
+            col_ssd_cache[0] = cache;

            __syncthreads();

@ -99,26 +94,24 @@ namespace cv { namespace cuda { namespace device
        }

        template<int RADIUS>
-        __device__ uint2 MinSSD(volatile unsigned int *col_ssd_cache, volatile unsigned int *col_ssd, const int X)
+        __device__ uint2 MinSSD(volatile unsigned int *col_ssd_cache, volatile unsigned int *col_ssd, const int X, int cwidth, unsigned int* ssd)
        {
-            unsigned int ssd[N_DISPARITIES];
-
            //See above:  #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
-            ssd[0] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 0 * (BLOCK_W + 2 * RADIUS), X);
+            ssd[0] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 0 * (BLOCK_W + 2 * RADIUS), X, cwidth);
            __syncthreads();
-            ssd[1] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 1 * (BLOCK_W + 2 * RADIUS), X);
+            ssd[1] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 1 * (BLOCK_W + 2 * RADIUS), X, cwidth);
            __syncthreads();
-            ssd[2] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 2 * (BLOCK_W + 2 * RADIUS), X);
+            ssd[2] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 2 * (BLOCK_W + 2 * RADIUS), X, cwidth);
            __syncthreads();
-            ssd[3] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 3 * (BLOCK_W + 2 * RADIUS), X);
+            ssd[3] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 3 * (BLOCK_W + 2 * RADIUS), X, cwidth);
            __syncthreads();
-            ssd[4] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 4 * (BLOCK_W + 2 * RADIUS), X);
+            ssd[4] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 4 * (BLOCK_W + 2 * RADIUS), X, cwidth);
            __syncthreads();
-            ssd[5] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 5 * (BLOCK_W + 2 * RADIUS), X);
+            ssd[5] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 5 * (BLOCK_W + 2 * RADIUS), X, cwidth);
            __syncthreads();
-            ssd[6] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 6 * (BLOCK_W + 2 * RADIUS), X);
+            ssd[6] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 6 * (BLOCK_W + 2 * RADIUS), X, cwidth);
            __syncthreads();
-            ssd[7] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 7 * (BLOCK_W + 2 * RADIUS), X);
+            ssd[7] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 7 * (BLOCK_W + 2 * RADIUS), X, cwidth);

            int mssd = ::min(::min(::min(ssd[0], ssd[1]), ::min(ssd[4], ssd[5])), ::min(::min(ssd[2], ssd[3]), ::min(ssd[6], ssd[7])));

@ -235,26 +228,27 @@ namespace cv { namespace cuda { namespace device
        }

        template<int RADIUS>
-        __global__ void stereoKernel(unsigned char *left, unsigned char *right, size_t img_step, PtrStepb disp, int maxdisp)
+        __global__ void stereoKernel(unsigned char *left, unsigned char *right, size_t img_step, PtrStepb disp, int maxdisp,
+                                     int uniquenessRatio, unsigned int* cminSSDImage, size_t cminSSD_step, int cwidth, int cheight)
        {
            extern __shared__ unsigned int col_ssd_cache[];
+            uint line_ssds[2 + N_DISPARITIES]; // +2 - tail of previous batch for accurate uniquenessRatio check
+            uint* batch_ssds = line_ssds + 2;
+
+            uint line_ssd_tails[3*ROWSperTHREAD];
+            uchar uniqueness_approved[ROWSperTHREAD];
+            uchar local_disparity[ROWSperTHREAD];
+
            volatile unsigned int *col_ssd = col_ssd_cache + BLOCK_W + threadIdx.x;
-            volatile unsigned int *col_ssd_extra = threadIdx.x < (2 * RADIUS) ? col_ssd + BLOCK_W : 0;  //#define N_DIRTY_PIXELS (2 * RADIUS)
+            volatile unsigned int *col_ssd_extra = threadIdx.x < (2 * RADIUS) ? col_ssd + BLOCK_W : 0;

-            //#define X (blockIdx.x * BLOCK_W + threadIdx.x + STEREO_MAXD)
-            int X = (blockIdx.x * BLOCK_W + threadIdx.x + maxdisp + RADIUS);
-            //#define Y (__mul24(blockIdx.y, ROWSperTHREAD) + RADIUS)
-            #define Y (blockIdx.y * ROWSperTHREAD + RADIUS)
-            //int Y = blockIdx.y * ROWSperTHREAD + RADIUS;
+            const int X = (blockIdx.x * BLOCK_W + threadIdx.x + maxdisp + RADIUS);
+            const int Y = (blockIdx.y * ROWSperTHREAD + RADIUS);

            unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step;
            unsigned char* disparImage = disp.data + X + Y * disp.step;
-            //if (X < cwidth)
-            //{
-            //    unsigned int *minSSDImage_end = minSSDImage + min(ROWSperTHREAD, cheight - Y) * minssd_step;
-            //    for(uint *ptr = minSSDImage; ptr != minSSDImage_end; ptr += minssd_step )
-            //        *ptr = 0xFFFFFFFF;
-            //}
+            float thresh_scale;
+
            int end_row = ::min(ROWSperTHREAD, cheight - Y - RADIUS);
            int y_tex;
            int x_tex = X - RADIUS;
@ -262,6 +256,25 @@ namespace cv { namespace cuda { namespace device
            if (x_tex >= cwidth)
                return;

+            for(int i = 0; i < ROWSperTHREAD; i++)
+                local_disparity[i] = 0;
+
+            for(int i = 0; i < 3*ROWSperTHREAD; i++)
+            {
+                line_ssd_tails[i] = UINT_MAX;
+            }
+
+            if (uniquenessRatio > 0)
+            {
+                batch_ssds[6] = UINT_MAX;
+                batch_ssds[7] = UINT_MAX;
+                thresh_scale = (1.0 + uniquenessRatio / 100.0f);
+                for(int i = 0; i < ROWSperTHREAD; i++)
+                {
+                    uniqueness_approved[i] = 1;
+                }
+            }
+
            for(int d = STEREO_MIND; d < maxdisp; d += STEREO_DISP_STEP)
            {
                y_tex = Y - RADIUS;
@ -276,10 +289,10 @@ namespace cv { namespace cuda { namespace device

                if (Y < cheight - RADIUS)
                {
-                    uint2 minSSD = MinSSD<RADIUS>(col_ssd_cache + threadIdx.x, col_ssd, X);
+                    uint2 batch_opt = MinSSD<RADIUS>(col_ssd_cache + threadIdx.x, col_ssd, X, cwidth, batch_ssds);

                    // For threads that do not satisfy the if condition below("X < cwidth - RADIUS"), previously
-                    // computed "minSSD" value, which is the result of "MinSSD" function call, is not used at all.
+                    // computed "batch_opt" value, which is the result of "MinSSD" function call, is not used at all.
                    //
                    // However, since the "MinSSD" function has "__syncthreads" call in its body, those threads
                    // must also call "MinSSD" to avoid deadlock. (#13850)
@ -290,10 +303,50 @@ namespace cv { namespace cuda { namespace device

                    if (X < cwidth - RADIUS)
                    {
-                        if (minSSD.x < minSSDImage[0])
+                        unsigned int last_opt = line_ssd_tails[3*0 + 0];
+                        unsigned int opt = ::min(last_opt, batch_opt.x);
+
+                        if (uniquenessRatio > 0)
+                        {
+                            line_ssds[0] = line_ssd_tails[3*0 + 1];
+                            line_ssds[1] = line_ssd_tails[3*0 + 2];
+
+                            float thresh = thresh_scale * opt;
+                            int dtest = local_disparity[0];
+
+                            if(batch_opt.x < last_opt)
+                            {
+                                uniqueness_approved[0] = 1;
+                                dtest = d + batch_opt.y;
+                                if ((local_disparity[0] < dtest-1 || local_disparity[0] > dtest+1) && (last_opt <= thresh))
+                                {
+                                    uniqueness_approved[0] = 0;
+                                }
+                            }
+
+                            if(uniqueness_approved[0])
+                            {
+                                // the trial to decompose the code on 2 loops without ld vs dtest makes
+                                // uniqueness check dramatically slow. at least on gf 1080
+                                for (int ld = d-2; ld < d + N_DISPARITIES; ld++)
+                                {
+                                    if ((ld < dtest-1 || ld > dtest+1) && (line_ssds[ld-d+2] <= thresh))
+                                    {
+                                        uniqueness_approved[0] = 0;
+                                        break;
+                                    }
+                                }
+                            }
+
+
+                            line_ssd_tails[3*0 + 1] = batch_ssds[6];
+                            line_ssd_tails[3*0 + 2] = batch_ssds[7];
+                        }
+
+                        line_ssd_tails[3*0 + 0] = opt;
+                        if (batch_opt.x < last_opt)
                        {
-                            disparImage[0] = (unsigned char)(d + minSSD.y);
-                            minSSDImage[0] = minSSD.x;
+                            local_disparity[0] = (unsigned char)(d + batch_opt.y);
                        }
                    }
                }
@ -313,14 +366,13 @@ namespace cv { namespace cuda { namespace device

                    y_tex += 1;

-                    __syncthreads(); //before MinSSD function
+                    __syncthreads();

                    if (row < cheight - RADIUS - Y)
                    {
-                        uint2 minSSD = MinSSD<RADIUS>(col_ssd_cache + threadIdx.x, col_ssd, X);
-
+                        uint2 batch_opt = MinSSD<RADIUS>(col_ssd_cache + threadIdx.x, col_ssd, X, cwidth, batch_ssds);
                        // For threads that do not satisfy the if condition below("X < cwidth - RADIUS"), previously
-                        // computed "minSSD" value, which is the result of "MinSSD" function call, is not used at all.
+                        // computed "batch_opt" value, which is the result of "MinSSD" function call, is not used at all.
                        //
                        // However, since the "MinSSD" function has "__syncthreads" call in its body, those threads
                        // must also call "MinSSD" to avoid deadlock. (#13850)
@ -331,11 +383,47 @@ namespace cv { namespace cuda { namespace device

                        if (X < cwidth - RADIUS)
                        {
-                            int idx = row * cminSSD_step;
-                            if (minSSD.x < minSSDImage[idx])
+                            unsigned int last_opt = line_ssd_tails[3*row + 0];
+                            unsigned int opt = ::min(last_opt, batch_opt.x);
+                            if (uniquenessRatio > 0)
                            {
-                                disparImage[disp.step * row] = (unsigned char)(d + minSSD.y);
-                                minSSDImage[idx] = minSSD.x;
+                                line_ssds[0] = line_ssd_tails[3*row + 1];
+                                line_ssds[1] = line_ssd_tails[3*row + 2];
+
+                                float thresh = thresh_scale * opt;
+                                int dtest = local_disparity[row];
+
+                                if(batch_opt.x < last_opt)
+                                {
+                                    uniqueness_approved[row] = 1;
+                                    dtest = d + batch_opt.y;
+                                    if ((local_disparity[row] < dtest-1 || local_disparity[row] > dtest+1) && (last_opt <= thresh))
+                                    {
+                                        uniqueness_approved[row] = 0;
+                                    }
+                                }
+
+                                if(uniqueness_approved[row])
+                                {
+                                    for (int ld = 0; ld < N_DISPARITIES + 2; ld++)
+                                    {
+                                        if (((d+ld-2 < dtest-1) || (d+ld-2 > dtest+1)) && (line_ssds[ld] <= thresh))
+                                        {
+                                            uniqueness_approved[row] = 0;
+                                            break;
+                                        }
+                                    }
+                                }
+
+                                line_ssd_tails[3*row + 1] = batch_ssds[6];
+                                line_ssd_tails[3*row + 2] = batch_ssds[7];
+                            }
+
+                            line_ssd_tails[3*row + 0] = opt;
+
+                            if (batch_opt.x < last_opt)
+                            {
+                                local_disparity[row] = (unsigned char)(d + batch_opt.y);
                            }
                        }
                    }
@ -344,10 +432,32 @@ namespace cv { namespace cuda { namespace device
                __syncthreads(); // before initializing shared memory at the beginning of next loop

            } // for d loop
-        }

+            for (int row = 0; row < end_row; row++)
+            {
+                minSSDImage[row * cminSSD_step] = line_ssd_tails[3*row + 0];
+            }
+
+            if (uniquenessRatio > 0)
+            {
+                for (int row = 0; row < end_row; row++)
+                {
+                    // drop disparity for pixel where uniqueness requirement was not satisfied (zero value)
+                    disparImage[disp.step * row] = local_disparity[row] * uniqueness_approved[row];
+                }
+            }
+            else
+            {
+                for (int row = 0; row < end_row; row++)
+                {
+                    disparImage[disp.step * row] = local_disparity[row];
+                }
+            }
+        }

-        template<int RADIUS> void kernel_caller(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp, cudaStream_t & stream)
+        template<int RADIUS> void kernel_caller(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp,
+                                                int maxdisp, int uniquenessRatio, unsigned int* missd_buffer,
+                                                size_t minssd_step, int cwidth, int cheight, cudaStream_t & stream)
        {
            dim3 grid(1,1,1);
            dim3 threads(BLOCK_W, 1, 1);
@ -358,14 +468,17 @@ namespace cv { namespace cuda { namespace device
            //See above:  #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
            size_t smem_size = (BLOCK_W + N_DISPARITIES * (BLOCK_W + 2 * RADIUS)) * sizeof(unsigned int);

-            stereoKernel<RADIUS><<<grid, threads, smem_size, stream>>>(left.data, right.data, left.step, disp, maxdisp);
+            stereoKernel<RADIUS><<<grid, threads, smem_size, stream>>>(left.data, right.data, left.step, disp, maxdisp, uniquenessRatio,
+                                                                       missd_buffer, minssd_step, cwidth, cheight);
            cudaSafeCall( cudaGetLastError() );

            if (stream == 0)
                cudaSafeCall( cudaDeviceSynchronize() );
        };

-        typedef void (*kernel_caller_t)(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp, cudaStream_t & stream);
+        typedef void (*kernel_caller_t)(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp,
+                                        int maxdisp, int uniquenessRatio, unsigned int* missd_buffer,
+                                        size_t minssd_step, int cwidth, int cheight, cudaStream_t & stream);

        const static kernel_caller_t callers[] =
        {
@ -380,46 +493,40 @@ namespace cv { namespace cuda { namespace device
        };
        const int calles_num = sizeof(callers)/sizeof(callers[0]);

-        void stereoBM_CUDA(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp, int winsz, const PtrStepSz<unsigned int>& minSSD_buf, cudaStream_t& stream)
+        void stereoBM_CUDA(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp,
+                           int winsz, int uniquenessRatio, const PtrStepSz<unsigned int>& minSSD_buf, cudaStream_t& stream)
        {
            int winsz2 = winsz >> 1;

            if (winsz2 == 0 || winsz2 >= calles_num)
                CV_Error(cv::Error::StsBadArg, "Unsupported window size");

-            //cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferL1) );
-            //cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferShared) );
-
-            cudaSafeCall( cudaMemset2D(disp.data, disp.step, 0, disp.cols, disp.rows) );
-            cudaSafeCall( cudaMemset2D(minSSD_buf.data, minSSD_buf.step, 0xFF, minSSD_buf.cols * minSSD_buf.elemSize(), disp.rows) );
-
-            cudaSafeCall( cudaMemcpyToSymbol( cwidth, &left.cols, sizeof(left.cols) ) );
-            cudaSafeCall( cudaMemcpyToSymbol( cheight, &left.rows, sizeof(left.rows) ) );
-            cudaSafeCall( cudaMemcpyToSymbol( cminSSDImage, &minSSD_buf.data, sizeof(minSSD_buf.data) ) );
+            cudaSafeCall( cudaMemset2DAsync(disp.data, disp.step, 0, disp.cols, disp.rows, stream) );
+            cudaSafeCall( cudaMemset2DAsync(minSSD_buf.data, minSSD_buf.step, 0xFF, minSSD_buf.cols * minSSD_buf.elemSize(), disp.rows, stream) );

            size_t minssd_step = minSSD_buf.step/minSSD_buf.elemSize();
-            cudaSafeCall( cudaMemcpyToSymbol( cminSSD_step,  &minssd_step, sizeof(minssd_step) ) );
+            callers[winsz2](left, right, disp, maxdisp, uniquenessRatio, minSSD_buf.data, minssd_step, left.cols, left.rows, stream);
+        }

-            callers[winsz2](left, right, disp, maxdisp, stream);
+        __device__ inline int clamp(int x, int a, int b)
+        {
+            return ::max(a, ::min(b, x));
        }

        //////////////////////////////////////////////////////////////////////////////////////////////////
        /////////////////////////////////////// Sobel Prefiler ///////////////////////////////////////////
        //////////////////////////////////////////////////////////////////////////////////////////////////

-        texture<unsigned char, 2, cudaReadModeElementType> texForSobel;
-
-        __global__ void prefilter_kernel(PtrStepSzb output, int prefilterCap)
+        __global__ void prefilter_kernel_xsobel(PtrStepSzb input, PtrStepSzb output, int prefilterCap)
        {
            int x = blockDim.x * blockIdx.x + threadIdx.x;
            int y = blockDim.y * blockIdx.y + threadIdx.y;

            if (x < output.cols && y < output.rows)
            {
-                int conv = (int)tex2D(texForSobel, x - 1, y - 1) * (-1) + (int)tex2D(texForSobel, x + 1, y - 1) * (1) +
-                           (int)tex2D(texForSobel, x - 1, y    ) * (-2) + (int)tex2D(texForSobel, x + 1, y    ) * (2) +
-                           (int)tex2D(texForSobel, x - 1, y + 1) * (-1) + (int)tex2D(texForSobel, x + 1, y + 1) * (1);
-
+                int conv = input.ptr(::max(0,y-1))[::max(0,x-1)] * (-1) + input.ptr(::max(0, y-1))[::min(x+1, input.cols-1)] * (1) +
+                           input.ptr(y  )[::max(0,x-1)] * (-2) + input.ptr(y  )[::min(x+1, input.cols-1)] * (2) +
+                           input.ptr(::min(y+1, input.rows-1))[::max(0,x-1)] * (-1) + input.ptr(::min(y+1, input.rows-1))[::min(x+1,input.cols-1)] * (1);

                conv = ::min(::min(::max(-prefilterCap, conv), prefilterCap) + prefilterCap, 255);
                output.ptr(y)[x] = conv & 0xFF;
@ -428,22 +535,65 @@ namespace cv { namespace cuda { namespace device

        void prefilter_xsobel(const PtrStepSzb& input, const PtrStepSzb& output, int prefilterCap, cudaStream_t & stream)
        {
-            cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
-            cudaSafeCall( cudaBindTexture2D( 0, texForSobel, input.data, desc, input.cols, input.rows, input.step ) );
-
            dim3 threads(16, 16, 1);
            dim3 grid(1, 1, 1);

            grid.x = divUp(input.cols, threads.x);
            grid.y = divUp(input.rows, threads.y);

-            prefilter_kernel<<<grid, threads, 0, stream>>>(output, prefilterCap);
+            prefilter_kernel_xsobel<<<grid, threads, 0, stream>>>(input, output, prefilterCap);
            cudaSafeCall( cudaGetLastError() );

            if (stream == 0)
                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+
+        //////////////////////////////////////////////////////////////////////////////////////////////////
+        ///////////////////////////////////////  Norm Prefiler ///////////////////////////////////////////
+        //////////////////////////////////////////////////////////////////////////////////////////////////

-            cudaSafeCall( cudaUnbindTexture (texForSobel ) );
+        __global__ void prefilter_kernel_norm(PtrStepSzb input, PtrStepSzb output, int prefilterCap, int scale_g, int scale_s, int winsize)
+        {
+            // prefilterCap in range 1..63, checked in StereoBMImpl::compute
+            int x = blockDim.x * blockIdx.x + threadIdx.x;
+            int y = blockDim.y * blockIdx.y + threadIdx.y;
+            int cols = input.cols;
+            int rows = input.rows;
+            int WSZ2 = winsize / 2;
+
+            if(x < cols && y < rows)
+            {
+                int cov1 =                               input.ptr(::max(y-1, 0))[x] * 1 +
+                    input.ptr(y)[::min(x+1, cols-1)] * 1 + input.ptr(y  )[x] * 4 + input.ptr(y)[::min(x+1, cols-1)] * 1 +
+                                                         input.ptr(::min(y+1, rows-1))[x] * 1;
+
+                int cov2 = 0;
+                for(int i = -WSZ2; i < WSZ2+1; i++)
+                    for(int j = -WSZ2; j < WSZ2+1; j++)
+                        cov2 += input.ptr(clamp(y+i, 0, rows-1))[clamp(x+j, 0, cols-1)];
+
+                int res = (cov1*scale_g - cov2*scale_s)>>10;
+                res = clamp(res, -prefilterCap, prefilterCap) + prefilterCap;
+                output.ptr(y)[x] = res;
+            }
+        }
+
+        void prefilter_norm(const PtrStepSzb& input, const PtrStepSzb& output, int prefilterCap, int winsize, cudaStream_t & stream)
+        {
+            dim3 threads(16, 16, 1);
+            dim3 grid(1, 1, 1);
+
+            grid.x = divUp(input.cols, threads.x);
+            grid.y = divUp(input.rows, threads.y);
+
+            int scale_g = winsize*winsize/8, scale_s = (1024 + scale_g)/(scale_g*2);
+            scale_g *= scale_s;
+
+            prefilter_kernel_norm<<<grid, threads, 0, stream>>>(input, output, prefilterCap, scale_g, scale_s, winsize);
+            cudaSafeCall( cudaGetLastError() );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
        }


--- a/modules/cudastereo/src/stereobm.cpp
+++ b/modules/cudastereo/src/stereobm.cpp
@ -55,8 +55,9 @@ namespace cv { namespace cuda { namespace device
 {
    namespace stereobm
    {
-        void stereoBM_CUDA(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int ndisp, int winsz, const PtrStepSz<unsigned int>& minSSD_buf, cudaStream_t & stream);
+        void stereoBM_CUDA(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int ndisp, int winsz, int uniquenessRatio, const PtrStepSz<unsigned int>& minSSD_buf, cudaStream_t & stream);
        void prefilter_xsobel(const PtrStepSzb& input, const PtrStepSzb& output, int prefilterCap /*= 31*/, cudaStream_t & stream);
+        void prefilter_norm(const PtrStepSzb& input, const PtrStepSzb& output, int prefilterCap, int winsize, cudaStream_t & stream);
        void postfilter_textureness(const PtrStepSzb& input, int winsz, float avgTexturenessThreshold, const PtrStepSzb& disp, cudaStream_t & stream);
    }
 }}}
@ -92,8 +93,8 @@ namespace
        int getPreFilterType() const { return preset_; }
        void setPreFilterType(int preFilterType) { preset_ = preFilterType; }

-        int getPreFilterSize() const { return 0; }
-        void setPreFilterSize(int /*preFilterSize*/) {}
+        int getPreFilterSize() const { return preFilterSize_; }
+        void setPreFilterSize(int preFilterSize) { preFilterSize_ = preFilterSize; }

        int getPreFilterCap() const { return preFilterCap_; }
        void setPreFilterCap(int preFilterCap) { preFilterCap_ = preFilterCap; }
@ -101,8 +102,8 @@ namespace
        int getTextureThreshold() const { return static_cast<int>(avergeTexThreshold_); }
        void setTextureThreshold(int textureThreshold) { avergeTexThreshold_ = static_cast<float>(textureThreshold); }

-        int getUniquenessRatio() const { return 0; }
-        void setUniquenessRatio(int /*uniquenessRatio*/) {}
+        int getUniquenessRatio() const { return uniquenessRatio_; }
+        void setUniquenessRatio(int uniquenessRatio) { uniquenessRatio_ = uniquenessRatio; }

        int getSmallerBlockSize() const { return 0; }
        void setSmallerBlockSize(int /*blockSize*/){}
@ -119,12 +120,14 @@ namespace
        int winSize_;
        int preFilterCap_;
        float avergeTexThreshold_;
+        int preFilterSize_;
+        int uniquenessRatio_;

        GpuMat minSSD_, leBuf_, riBuf_;
    };

    StereoBMImpl::StereoBMImpl(int numDisparities, int blockSize)
-        : preset_(0), ndisp_(numDisparities), winSize_(blockSize), preFilterCap_(31), avergeTexThreshold_(3)
+        : preset_(-1), ndisp_(numDisparities), winSize_(blockSize), preFilterCap_(31), avergeTexThreshold_(3), preFilterSize_(9), uniquenessRatio_(0)
    {
    }

@ -169,8 +172,19 @@ namespace
            le_for_bm = leBuf_;
            ri_for_bm = riBuf_;
        }
+        else if(preset_ == cv::StereoBM::PREFILTER_NORMALIZED_RESPONSE)
+        {
+            cuda::ensureSizeIsEnough(left.size(), left.type(), leBuf_);
+            cuda::ensureSizeIsEnough(right.size(), right.type(), riBuf_);
+
+            prefilter_norm( left, leBuf_, preFilterCap_, preFilterSize_, stream);
+            prefilter_norm(right, riBuf_, preFilterCap_, preFilterSize_, stream);
+
+            le_for_bm = leBuf_;
+            ri_for_bm = riBuf_;
+        }

-        stereoBM_CUDA(le_for_bm, ri_for_bm, disparity, ndisp_, winSize_, minSSD_, stream);
+        stereoBM_CUDA(le_for_bm, ri_for_bm, disparity, ndisp_, winSize_, uniquenessRatio_, minSSD_, stream);

        if (avergeTexThreshold_ > 0)
            postfilter_textureness(le_for_bm, winSize_, avergeTexThreshold_, disparity, stream);
--- a/modules/cudastereo/test/test_stereo.cpp
+++ b/modules/cudastereo/test/test_stereo.cpp
@ -79,6 +79,84 @@ CUDA_TEST_P(StereoBM, Regression)
    EXPECT_MAT_NEAR(disp_gold, disp, 0.0);
 }

+CUDA_TEST_P(StereoBM, PrefilterXSobelRegression)
+{
+    cv::Mat left_image  = readImage("stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat right_image = readImage("stereobm/aloe-R.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat disp_gold   = readImage("stereobm/aloe-disp-prefilter-xsobel.png", cv::IMREAD_GRAYSCALE);
+
+    ASSERT_FALSE(left_image.empty());
+    ASSERT_FALSE(right_image.empty());
+    ASSERT_FALSE(disp_gold.empty());
+
+    cv::Ptr<cv::StereoBM> bm = cv::cuda::createStereoBM(128, 19);
+    cv::cuda::GpuMat disp;
+
+    bm->setPreFilterType(cv::StereoBM::PREFILTER_XSOBEL);
+    bm->compute(loadMat(left_image), loadMat(right_image), disp);
+
+    EXPECT_MAT_NEAR(disp_gold, disp, 0.0);
+}
+
+CUDA_TEST_P(StereoBM, PrefilterNormRegression)
+{
+    cv::Mat left_image  = readImage("stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat right_image = readImage("stereobm/aloe-R.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat disp_gold   = readImage("stereobm/aloe-disp-prefilter-norm.png", cv::IMREAD_GRAYSCALE);
+
+    ASSERT_FALSE(left_image.empty());
+    ASSERT_FALSE(right_image.empty());
+    ASSERT_FALSE(disp_gold.empty());
+
+    cv::Ptr<cv::StereoBM> bm = cv::cuda::createStereoBM(128, 19);
+    cv::cuda::GpuMat disp;
+
+    bm->setPreFilterType(cv::StereoBM::PREFILTER_NORMALIZED_RESPONSE);
+    bm->setPreFilterSize(9);
+    bm->compute(loadMat(left_image), loadMat(right_image), disp);
+
+    EXPECT_MAT_NEAR(disp_gold, disp, 0.0);
+}
+
+CUDA_TEST_P(StereoBM, Streams)
+{
+    cv::cuda::Stream stream;
+    cv::Mat left_image  = readImage("stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat right_image = readImage("stereobm/aloe-R.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat disp_gold   = readImage("stereobm/aloe-disp.png", cv::IMREAD_GRAYSCALE);
+
+    ASSERT_FALSE(left_image.empty());
+    ASSERT_FALSE(right_image.empty());
+    ASSERT_FALSE(disp_gold.empty());
+
+    cv::Ptr<cv::cuda::StereoBM> bm = cv::cuda::createStereoBM(128, 19);
+    cv::cuda::GpuMat disp;
+
+    bm->compute(loadMat(left_image), loadMat(right_image), disp, stream);
+    stream.waitForCompletion();
+
+    EXPECT_MAT_NEAR(disp_gold, disp, 0.0);
+}
+
+CUDA_TEST_P(StereoBM, Uniqueness_Regression)
+{
+    cv::Mat left_image  = readImage("stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat right_image = readImage("stereobm/aloe-R.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat disp_gold   = readImage("stereobm/aloe-disp-uniqueness15.png", cv::IMREAD_GRAYSCALE);
+
+    ASSERT_FALSE(left_image.empty());
+    ASSERT_FALSE(right_image.empty());
+    ASSERT_FALSE(disp_gold.empty());
+
+    cv::Ptr<cv::StereoBM> bm = cv::cuda::createStereoBM(128, 19);
+    cv::cuda::GpuMat disp;
+
+    bm->setUniquenessRatio(15);
+    bm->compute(loadMat(left_image), loadMat(right_image), disp);
+
+    EXPECT_MAT_NEAR(disp_gold, disp, 0.0);
+}
+
 INSTANTIATE_TEST_CASE_P(CUDA_Stereo, StereoBM, ALL_DEVICES);

 //////////////////////////////////////////////////////////////////////////
--- a/modules/cudawarping/src/cuda/warp.cu
+++ b/modules/cudawarping/src/cuda/warp.cu
@ -53,55 +53,75 @@ namespace cv { namespace cuda { namespace device
 {
    namespace imgproc
    {
-        __constant__ float c_warpMat[3 * 3];
-
        struct AffineTransform
        {
-            static __device__ __forceinline__ float2 calcCoord(int x, int y)
+            static const int rows = 2;
+            static __device__ __forceinline__ float2 calcCoord(const float warpMat[AffineTransform::rows * 3], int x, int y)
            {
-                const float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
-                const float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
+                const float xcoo = warpMat[0] * x + warpMat[1] * y + warpMat[2];
+                const float ycoo = warpMat[3] * x + warpMat[4] * y + warpMat[5];

                return make_float2(xcoo, ycoo);
            }
+
+            struct Coefficients
+            {
+                Coefficients(const float* c_)
+                {
+                    for(int i = 0; i < AffineTransform::rows * 3; i++)
+                        c[i] = c_[i];
+                }
+                float c[AffineTransform::rows * 3];
+            };
        };

        struct PerspectiveTransform
        {
-            static __device__ __forceinline__ float2 calcCoord(int x, int y)
+            static const int rows = 3;
+            static __device__ __forceinline__ float2 calcCoord(const float warpMat[PerspectiveTransform::rows * 3], int x, int y)
            {
-                const float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
+                const float coeff = 1.0f / (warpMat[6] * x + warpMat[7] * y + warpMat[8]);

-                const float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
-                const float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
+                const float xcoo = coeff * (warpMat[0] * x + warpMat[1] * y + warpMat[2]);
+                const float ycoo = coeff * (warpMat[3] * x + warpMat[4] * y + warpMat[5]);

                return make_float2(xcoo, ycoo);
            }
+            struct Coefficients
+            {
+                Coefficients(const float* c_)
+                {
+                    for(int i = 0; i < PerspectiveTransform::rows * 3; i++)
+                        c[i] = c_[i];
+                }
+
+                float c[PerspectiveTransform::rows * 3];
+            };
        };

        ///////////////////////////////////////////////////////////////////
        // Build Maps

-        template <class Transform> __global__ void buildWarpMaps(PtrStepSzf xmap, PtrStepf ymap)
+        template <class Transform> __global__ void buildWarpMaps(PtrStepSzf xmap, PtrStepf ymap, const typename Transform::Coefficients warpMat)
        {
            const int x = blockDim.x * blockIdx.x + threadIdx.x;
            const int y = blockDim.y * blockIdx.y + threadIdx.y;

            if (x < xmap.cols && y < xmap.rows)
            {
-                const float2 coord = Transform::calcCoord(x, y);
+                const float2 coord = Transform::calcCoord(warpMat.c, x, y);

                xmap(y, x) = coord.x;
                ymap(y, x) = coord.y;
            }
        }

-        template <class Transform> void buildWarpMaps_caller(PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
+        template <class Transform> void buildWarpMaps_caller(PtrStepSzf xmap, PtrStepSzf ymap, const float warpMat[Transform::rows * 3], cudaStream_t stream)
        {
            dim3 block(32, 8);
            dim3 grid(divUp(xmap.cols, block.x), divUp(xmap.rows, block.y));

-            buildWarpMaps<Transform><<<grid, block, 0, stream>>>(xmap, ymap);
+            buildWarpMaps<Transform><<<grid, block, 0, stream>>>(xmap, ymap, warpMat);
            cudaSafeCall( cudaGetLastError() );

            if (stream == 0)
@ -110,29 +130,25 @@ namespace cv { namespace cuda { namespace device

        void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
        {
-            cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
-
-            buildWarpMaps_caller<AffineTransform>(xmap, ymap, stream);
+            buildWarpMaps_caller<AffineTransform>(xmap, ymap, coeffs, stream);
        }

        void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
        {
-            cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
-
-            buildWarpMaps_caller<PerspectiveTransform>(xmap, ymap, stream);
+            buildWarpMaps_caller<PerspectiveTransform>(xmap, ymap, coeffs, stream);
        }

        ///////////////////////////////////////////////////////////////////
        // Warp

-        template <class Transform, class Ptr2D, typename T> __global__ void warp(const Ptr2D src, PtrStepSz<T> dst)
+        template <class Transform, class Ptr2D, typename T> __global__ void warp(const Ptr2D src, PtrStepSz<T> dst, const typename Transform::Coefficients warpMat)
        {
            const int x = blockDim.x * blockIdx.x + threadIdx.x;
            const int y = blockDim.y * blockIdx.y + threadIdx.y;

            if (x < dst.cols && y < dst.rows)
            {
-                const float2 coord = Transform::calcCoord(x, y);
+                const float2 coord = Transform::calcCoord(warpMat.c, x, y);

                dst.ptr(y)[x] = saturate_cast<T>(src(coord.y, coord.x));
            }
@ -140,7 +156,7 @@ namespace cv { namespace cuda { namespace device

        template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherStream
        {
-            static void call(PtrStepSz<T> src, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool)
+            static void call(PtrStepSz<T> src, PtrStepSz<T> dst, const float* borderValue, const float warpMat[Transform::rows*3], cudaStream_t stream, bool)
            {
                typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;

@ -151,14 +167,14 @@ namespace cv { namespace cuda { namespace device
                BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
                Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);

-                warp<Transform><<<grid, block, 0, stream>>>(filter_src, dst);
+                warp<Transform><<<grid, block, 0, stream>>>(filter_src, dst, warpMat);
                cudaSafeCall( cudaGetLastError() );
            }
        };

        template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherNonStream
        {
-            static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, bool)
+            static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, const float warpMat[Transform::rows*3], bool)
            {
                CV_UNUSED(xoff);
                CV_UNUSED(yoff);
@ -173,7 +189,7 @@ namespace cv { namespace cuda { namespace device
                BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
                Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);

-                warp<Transform><<<grid, block>>>(filter_src, dst);
+                warp<Transform><<<grid, block>>>(filter_src, dst, warpMat);
                cudaSafeCall( cudaGetLastError() );

                cudaSafeCall( cudaDeviceSynchronize() );
@ -195,7 +211,7 @@ namespace cv { namespace cuda { namespace device
            }; \
            template <class Transform, template <typename> class Filter, template <typename> class B> struct WarpDispatcherNonStream<Transform, Filter, B, type> \
            { \
-                static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float* borderValue, bool cc20) \
+                static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float* borderValue, const float warpMat[Transform::rows*3], bool cc20) \
                { \
                    typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
                    dim3 block(32, cc20 ? 8 : 4); \
@ -205,14 +221,14 @@ namespace cv { namespace cuda { namespace device
                    B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
                    BorderReader< tex_warp_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
                    Filter< BorderReader< tex_warp_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
-                    warp<Transform><<<grid, block>>>(filter_src, dst); \
+                    warp<Transform><<<grid, block>>>(filter_src, dst, warpMat); \
                    cudaSafeCall( cudaGetLastError() ); \
                    cudaSafeCall( cudaDeviceSynchronize() ); \
                } \
            }; \
            template <class Transform, template <typename> class Filter> struct WarpDispatcherNonStream<Transform, Filter, BrdReplicate, type> \
            { \
-                static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float*, bool) \
+                static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float*, const float warpMat[Transform::rows*3], bool) \
                { \
                    dim3 block(32, 8); \
                    dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
@ -221,14 +237,14 @@ namespace cv { namespace cuda { namespace device
                    if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
                    { \
                        Filter< tex_warp_ ## type ##_reader > filter_src(texSrc); \
-                        warp<Transform><<<grid, block>>>(filter_src, dst); \
+                        warp<Transform><<<grid, block>>>(filter_src, dst, warpMat); \
                    } \
                    else \
                    { \
                        BrdReplicate<type> brd(src.rows, src.cols); \
                        BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
                        Filter< BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
-                        warp<Transform><<<grid, block>>>(filter_src, dst); \
+                        warp<Transform><<<grid, block>>>(filter_src, dst, warpMat); \
                    } \
                    cudaSafeCall( cudaGetLastError() ); \
                    cudaSafeCall( cudaDeviceSynchronize() ); \
@ -263,20 +279,20 @@ namespace cv { namespace cuda { namespace device

        template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcher
        {
-            static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20)
+            static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, const float warpMat[Transform::rows*3], cudaStream_t stream, bool cc20)
            {
                if (stream == 0)
-                    WarpDispatcherNonStream<Transform, Filter, B, T>::call(src, srcWhole, xoff, yoff, dst, borderValue, cc20);
+                    WarpDispatcherNonStream<Transform, Filter, B, T>::call(src, srcWhole, xoff, yoff, dst, borderValue, warpMat, cc20);
                else
-                    WarpDispatcherStream<Transform, Filter, B, T>::call(src, dst, borderValue, stream, cc20);
+                    WarpDispatcherStream<Transform, Filter, B, T>::call(src, dst, borderValue, warpMat, stream, cc20);
            }
        };

        template <class Transform, typename T>
        void warp_caller(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzb dst, int interpolation,
-                         int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
+                         int borderMode, const float* borderValue, const float warpMat[Transform::rows*3], cudaStream_t stream, bool cc20)
        {
-            typedef void (*func_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20);
+            typedef void (*func_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, const float warpMat[Transform::rows*3], cudaStream_t stream, bool cc20);

            static const func_t funcs[3][5] =
            {
@ -304,15 +320,13 @@ namespace cv { namespace cuda { namespace device
            };

            funcs[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff,
-                static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc20);
+                                             static_cast< PtrStepSz<T> >(dst), borderValue, warpMat, stream, cc20);
        }

        template <typename T> void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
                                                  int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
        {
-            cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
-
-            warp_caller<AffineTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20);
+            warp_caller<AffineTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, coeffs, stream, cc20);
        }

        template void warpAffine_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
@ -348,9 +362,7 @@ namespace cv { namespace cuda { namespace device
        template <typename T> void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
                                                  int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
        {
-            cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
-
-            warp_caller<PerspectiveTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20);
+            warp_caller<PerspectiveTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, coeffs, stream, cc20);
        }

        template void warpPerspective_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
--- a/modules/cudawarping/test/test_remap.cpp
+++ b/modules/cudawarping/test/test_remap.cpp
@ -41,6 +41,8 @@
 //M*/

 #include "test_precomp.hpp"
+#include "opencv2/core/matx.hpp"
+#include "nppdefs.h"

 #ifdef HAVE_CUDA

@ -178,5 +180,29 @@ INSTANTIATE_TEST_CASE_P(CUDA_Warping, Remap, testing::Combine(
    WHOLE_SUBMAT));


+class RemapOutOfScope : public  Remap {};
+CUDA_TEST_P(RemapOutOfScope, Regression_18224)
+{
+    cv::Mat src = randomMat(size, type);
+    cv::cuda::GpuMat dst = createMat(xmap.size(), type, useRoi);
+    randu(xmap, NPP_MAX_32S, NPP_MAXABS_32F);
+    randu(ymap, NPP_MAX_32S, NPP_MAXABS_32F);
+
+    cv::cuda::remap(loadMat(src, useRoi), dst, loadMat(xmap, useRoi), loadMat(ymap, useRoi), interpolation, borderType, 0.);
+
+    cv::Mat dst_gold;
+    remapGold(src, xmap, ymap, dst_gold, interpolation, borderType, 0.);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-3 : 1.0);
+}
+
+INSTANTIATE_TEST_CASE_P(CUDA_Warping, RemapOutOfScope, testing::Combine(
+        ALL_DEVICES,
+        DIFFERENT_SIZES,
+        testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+        testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)),
+        testing::Values(BorderType(cv::BORDER_CONSTANT)),
+        WHOLE_SUBMAT));
+
 }} // namespace
 #endif // HAVE_CUDA
--- a/modules/cudev/include/opencv2/cudev/functional/detail/color_cvt.hpp
+++ b/modules/cudev/include/opencv2/cudev/functional/detail/color_cvt.hpp
@ -1083,9 +1083,9 @@ namespace color_cvt_detail
            else
                Z = Z * Z * Z;

-            float B = 0.052891f * X - 0.204043f * Y + 1.151152f * Z;
-            float G = -0.921235f * X + 1.875991f * Y + 0.045244f * Z;
-            float R = 3.079933f * X - 1.537150f * Y - 0.542782f * Z;
+            float B = __saturatef(0.052891f * X - 0.204043f * Y + 1.151152f * Z); //need __saturatef values to (0.0, 1.0)
+            float G = __saturatef(-0.921235f * X + 1.875991f * Y + 0.045244f * Z);
+            float R = __saturatef(3.079933f * X - 1.537150f * Y - 0.542782f * Z);

            if (srgb)
            {
--- a/modules/cvv/CMakeLists.txt
+++ b/modules/cvv/CMakeLists.txt
@ -1,4 +1,4 @@
-if(NOT HAVE_QT5 OR NOT HAVE_CXX11)
+if(NOT HAVE_QT OR NOT HAVE_CXX11 OR QT_VERSION_MAJOR LESS 5)
  ocv_module_disable(cvv)
  return()
 endif()
--- a/modules/freetype/src/freetype.cpp
+++ b/modules/freetype/src/freetype.cpp
@ -238,17 +238,15 @@ void FreeType2Impl::putTextOutline(
    hb_buffer_t *hb_buffer = hb_buffer_create ();
    CV_Assert( hb_buffer != NULL );

-    unsigned int textLen;
-    hb_buffer_guess_segment_properties (hb_buffer);
    hb_buffer_add_utf8 (hb_buffer, _text.c_str(), -1, 0, -1);
-    FT_Vector currentPos = {0,0};
+    hb_buffer_guess_segment_properties (hb_buffer);
+    hb_shape (mHb_font, hb_buffer, NULL, 0);

+    unsigned int textLen = 0;
    hb_glyph_info_t *info =
        hb_buffer_get_glyph_infos(hb_buffer,&textLen );
    CV_Assert( info != NULL );

-    hb_shape (mHb_font, hb_buffer, NULL, 0);
-
    PathUserData *userData = new PathUserData( _img );
    userData->mColor     = _color;
    userData->mCtoL      = mCtoL;
@ -256,6 +254,7 @@ void FreeType2Impl::putTextOutline(
    userData->mLine_type = _line_type;

    // Initilize currentPosition ( in FreeType coordinates)
+    FT_Vector currentPos = {0,0};
    currentPos.x = _org.x * 64;
    currentPos.y = _org.y * 64;

@ -305,15 +304,15 @@ void FreeType2Impl::putTextBitmapMono(
    hb_buffer_t *hb_buffer = hb_buffer_create ();
    CV_Assert( hb_buffer != NULL );

-    unsigned int textLen;
-    hb_buffer_guess_segment_properties (hb_buffer);
    hb_buffer_add_utf8 (hb_buffer, _text.c_str(), -1, 0, -1);
+    hb_buffer_guess_segment_properties (hb_buffer);
+    hb_shape (mHb_font, hb_buffer, NULL, 0);
+
+    unsigned int textLen = 0;
    hb_glyph_info_t *info =
        hb_buffer_get_glyph_infos(hb_buffer,&textLen );
    CV_Assert( info != NULL );

-    hb_shape (mHb_font, hb_buffer, NULL, 0);
-
    _org.y += _fontHeight;
    if( _bottomLeftOrigin == true ){
        _org.y -= _fontHeight;
@ -372,6 +371,7 @@ void FreeType2Impl::putTextBitmapBlend(
   int _fontHeight, Scalar _color,
   int _thickness, int _line_type, bool _bottomLeftOrigin )
 {
+
    CV_Assert( _thickness < 0 );
    CV_Assert( _line_type == 16 );

@ -379,15 +379,15 @@ void FreeType2Impl::putTextBitmapBlend(
    hb_buffer_t *hb_buffer = hb_buffer_create ();
    CV_Assert( hb_buffer != NULL );

-    unsigned int textLen;
-    hb_buffer_guess_segment_properties (hb_buffer);
    hb_buffer_add_utf8 (hb_buffer, _text.c_str(), -1, 0, -1);
+    hb_buffer_guess_segment_properties (hb_buffer);
+    hb_shape (mHb_font, hb_buffer, NULL, 0);
+
+    unsigned int textLen = 0;
    hb_glyph_info_t *info =
        hb_buffer_get_glyph_infos(hb_buffer,&textLen );
    CV_Assert( info != NULL );

-    hb_shape (mHb_font, hb_buffer, NULL, 0);
-
    _org.y += _fontHeight;
    if( _bottomLeftOrigin == true ){
        _org.y -= _fontHeight;
@ -461,13 +461,14 @@ Size FreeType2Impl::getTextSize(
    CV_Assert( hb_buffer != NULL );
    FT_Vector currentPos = {0,0};

-    unsigned int textLen;
-    hb_buffer_guess_segment_properties (hb_buffer);
    hb_buffer_add_utf8 (hb_buffer, _text.c_str(), -1, 0, -1);
+    hb_buffer_guess_segment_properties (hb_buffer);
+    hb_shape (mHb_font, hb_buffer, NULL, 0);
+
+    unsigned int textLen = 0;
    hb_glyph_info_t *info =
        hb_buffer_get_glyph_infos(hb_buffer,&textLen );
    CV_Assert( info != NULL );
-    hb_shape (mHb_font, hb_buffer, NULL, 0);

    // Initilize BoundaryBox ( in OpenCV coordinates )
    int xMin = INT_MAX, yMin = INT_MAX;
--- a/modules/julia/CMakeLists.txt
+++ b/modules/julia/CMakeLists.txt
@ -1,4 +1,4 @@
-if(NOT HAVE_JULIA MATCHES "YES")
+if(NOT HAVE_JULIA)
    message(STATUS "Julia not found. Not compiling Julia Bindings. ${HAVE_JULIA}")
    ocv_module_disable(julia)
 elseif(NOT PYTHON_DEFAULT_EXECUTABLE)
--- a/modules/ovis/src/ovis.cpp
+++ b/modules/ovis/src/ovis.cpp
@ -168,12 +168,10 @@ static SceneNode& _getSceneNode(SceneManager* sceneMgr, const String& name)
    return *mo->getParentSceneNode();
 }

+/// BGR to RGB 0..1
 static ColourValue convertColor(const Scalar& val)
 {
-    // BGR 0..255 (uchar) to RGB 0..1
-    ColourValue ret = ColourValue(val[2], val[1], val[0]) / 255;
-    ret.saturate();
-    return ret;
+    return ColourValue(val[2], val[1], val[0]).saturateCopy();
 }

 class WindowSceneImpl;
@ -614,9 +612,8 @@ public:
                           const Scalar& specularColour) CV_OVERRIDE
    {
        Light* light = sceneMgr->createLight(name);
-        // convert to BGR
-        light->setDiffuseColour(ColourValue(diffuseColour[2], diffuseColour[1], diffuseColour[0]));
-        light->setSpecularColour(ColourValue(specularColour[2], specularColour[1], specularColour[0]));
+        light->setDiffuseColour(convertColor(diffuseColour));
+        light->setSpecularColour(convertColor(specularColour));

        Quaternion q;
        Vector3 t;
--- a/modules/rgbd/CMakeLists.txt
+++ b/modules/rgbd/CMakeLists.txt
@ -1,3 +1,7 @@
 set(the_description "RGBD algorithms")

 ocv_define_module(rgbd opencv_core opencv_3d opencv_imgproc OPTIONAL opencv_viz WRAP python)
+
+if(HAVE_OPENGL)
+  ocv_target_link_libraries(${the_module} PRIVATE "${OPENGL_LIBRARIES}")
+endif()
--- a/modules/sfm/CMakeLists.txt
+++ b/modules/sfm/CMakeLists.txt
@ -117,6 +117,11 @@ ocv_add_module(sfm

 add_definitions(/DGLOG_NO_ABBREVIATED_SEVERITIES)  # avoid ERROR macro conflict in glog (ceres dependency)

+if(WIN32)
+  # Avoid error due to min/max being already defined as a macro
+  add_definitions(-DNOMINMAX)
+endif(WIN32)
+
 ocv_warnings_disable(CMAKE_CXX_FLAGS
  -Wundef
  -Wshadow
--- a/modules/text/samples/textdetection.py
+++ b/modules/text/samples/textdetection.py
@ -17,42 +17,40 @@ if (len(sys.argv) < 2):

 pathname = os.path.dirname(sys.argv[0])

-
 img      = cv.imread(str(sys.argv[1]))
 # for visualization
 vis      = img.copy()


 # Extract channels to be processed individually
-channels = cv.text.computeNMChannels(img)
+channels = list(cv.text.computeNMChannels(img))
 # Append negative channels to detect ER- (bright regions over dark background)
 cn = len(channels)-1
 for c in range(0,cn):
-  channels.append((255-channels[c]))
+  channels.append(255-channels[c])

 # Apply the default cascade classifier to each independent channel (could be done in parallel)
+
+erc1 = cv.text.loadClassifierNM1('trained_classifierNM1.xml')
+er1 = cv.text.createERFilterNM1(erc1,16,0.00015,0.13,0.2,True,0.1)
+
+erc2 = cv.text.loadClassifierNM2('trained_classifierNM2.xml')
+er2 = cv.text.createERFilterNM2(erc2,0.5)
+
 print("Extracting Class Specific Extremal Regions from "+str(len(channels))+" channels ...")
 print("    (...) this may take a while (...)")
 for channel in channels:

-  erc1 = cv.text.loadClassifierNM1(pathname+'/trained_classifierNM1.xml')
-  er1 = cv.text.createERFilterNM1(erc1,16,0.00015,0.13,0.2,True,0.1)
-
-  erc2 = cv.text.loadClassifierNM2(pathname+'/trained_classifierNM2.xml')
-  er2 = cv.text.createERFilterNM2(erc2,0.5)
-
  regions = cv.text.detectRegions(channel,er1,er2)

  rects = cv.text.erGrouping(img,channel,[r.tolist() for r in regions])
  #rects = cv.text.erGrouping(img,channel,[x.tolist() for x in regions], cv.text.ERGROUPING_ORIENTATION_ANY,'../../GSoC2014/opencv_contrib/modules/text/samples/trained_classifier_erGrouping.xml',0.5)

  #Visualization
-  for r in range(0,np.shape(rects)[0]):
-    rect = rects[r]
+  for rect in rects:
    cv.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (0, 0, 0), 2)
    cv.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 255, 255), 1)

-
 #Visualization
 cv.imshow("Text detection result", vis)
 cv.waitKey(0)
--- a/modules/viz/samples/viz_sample_02.py
+++ b/modules/viz/samples/viz_sample_02.py
@ -3,15 +3,15 @@ import cv2 as cv

 my_window = cv.viz_Viz3d("Coordinate Frame")

-axe = cv.viz_PyWCoordinateSystem()
-axis = cv.viz_PyWLine((-1.0,-1.0,-1.0), (1.0,1.0,1.0), cv.viz_PyColor().green())
+axe = cv.viz_WCoordinateSystem()
+axis = cv.viz_WLine((-1.0,-1.0,-1.0), (1.0,1.0,1.0), cv.viz_Color().green())
 axis.setRenderingProperty(cv.viz.LINE_WIDTH, 4.0);
 my_window.showWidget("axe",axis)
-plan = cv.viz_PyWPlane((-1.0,-1.0,-1.0), (1.0,.0,.0), (-.0,.0,-1.0))
+plan = cv.viz_WPlane((-1.0,-1.0,-1.0), (1.0,.0,.0), (-.0,.0,-1.0))
 #my_window.showWidget("plan", plan)
-cube = cv.viz_PyWCube((0.5,0.5,0.0), (0.0,0.0,-0.5), True, cv.viz_PyColor().blue())
+cube = cv.viz_WCube((0.5,0.5,0.0), (0.0,0.0,-0.5), True, cv.viz_Color().blue())

-#my_window.showWidget("Cube Widget",cube)
+my_window.showWidget("Cube Widget",cube)
 pi = np.arccos(-1)
 print("First event loop is over")
 my_window.spin()
@ -27,7 +27,8 @@ while not my_window.wasStopped():
    rot_vec[0, 2] += np.pi * 0.01
    translation_phase += pi * 0.01
    translation = np.sin(translation_phase)
-    pose = cv.viz_PyAffine3(rot_vec, (translation, translation, translation))
-    my_window.setWidgetPosePy("Cube Widget", pose)
-    my_window.spinOnce(1, True)
+    pose = cv.viz_Affine3d(rot_vec, (translation, translation, translation))
+    my_window.setWidgetPose("Cube Widget",pose)
+    my_window.spinOnce(1, True);
+
 print("Last event loop is over")
--- a/modules/viz/samples/viz_sample_03.py
+++ b/modules/viz/samples/viz_sample_03.py
@ -2,7 +2,7 @@ import numpy as np
 import cv2 as cv

 def load_bunny():
-    with open(cv.samples.findFile("viz/bunny.ply"), 'r') as f:
+    with open(cv.samples.findFile("../viz/data/bunny.ply"), 'r') as f:
        s = f.read()
    ligne = s.split('\n')
    if len(ligne) == 5753:
@ -13,28 +13,28 @@ def load_bunny():
            d = ligne[idx].split(' ')
            pts3d[0,idx-12,:] = (float(d[0]), float(d[1]), float(d[2]))
    pts3d = 5 * pts3d
-    return cv.viz_PyWCloud(pts3d)
+    return cv.viz_WCloud(pts3d)

 myWindow = cv.viz_Viz3d("Coordinate Frame")
-axe = cv.viz_PyWCoordinateSystem()
+axe = cv.viz_WCoordinateSystem()
 myWindow.showWidget("axe",axe)

 cam_pos =  (3.0, 3.0, 3.0)
 cam_focal_point = (3.0,3.0,2.0)
 cam_y_dir = (-1.0,0.0,0.0)
-cam_pose = cv.viz.makeCameraPosePy(cam_pos, cam_focal_point, cam_y_dir)
+cam_pose = cv.viz.makeCameraPose(cam_pos, cam_focal_point, cam_y_dir)
 print("OK")
-transform = cv.viz.makeTransformToGlobalPy((0.0,-1.0,0.0), (-1.0,0.0,0.0), (0.0,0.0,-1.0), cam_pos)
+transform = cv.viz.makeTransformToGlobal((0.0,-1.0,0.0), (-1.0,0.0,0.0), (0.0,0.0,-1.0), cam_pos)
 pw_bunny = load_bunny()
-cloud_pose = cv.viz_PyAffine3()
+cloud_pose = cv.viz_Affine3d()
 cloud_pose = cloud_pose.translate((0, 0, 3))
 cloud_pose_global = transform.product(cloud_pose)

-cpw = cv.viz_PyWCameraPosition(0.5)
-cpw_frustum = cv.viz_PyWCameraPosition(0.3)
+cpw = cv.viz_WCameraPosition(0.5)
+cpw_frustum = cv.viz_WCameraPosition(0.3)
 myWindow.showWidget("CPW", cpw);
 myWindow.showWidget("CPW_FRUSTUM", cpw_frustum)
-myWindow.setViewerPosePy(cam_pose)
+myWindow.setViewerPose(cam_pose)
 myWindow.showWidget("bunny", pw_bunny, cloud_pose_global)
 #myWindow.setWidgetPosePy("bunny")
 myWindow.spin();
--- a/modules/wechat_qrcode/README.md
+++ b/modules/wechat_qrcode/README.md
@ -9,4 +9,4 @@ WeChat QR code detector is a high-performance and lightweight QR code detect and

 3. More robust finder pattern detection. Besides traditional horizontal line searching, we propose an area size based finder pattern detection method. we calculate the area size of black and white block to locate the finder pattern by the pre-computed connected cells.

-4. Massive engineering optimization. Based on [zing-cpp](https://github.com/glassechidna/zxing-cpp), we conduct massive engineering optimization to boost the decoding success rate, such as trying more binarization methods, supporting N:1:3:1:1 finder pattern detection, finding more alignment pattern, clustering similar size finder pattern, and etc.
+4. Massive engineering optimization. Based on [zxing-cpp](https://github.com/glassechidna/zxing-cpp), we conduct massive engineering optimization to boost the decoding success rate, such as trying more binarization methods, supporting N:1:3:1:1 finder pattern detection, finding more alignment pattern, clustering similar size finder pattern, and etc.
--- a/modules/wechat_qrcode/src/zxing/common/bitarray.cpp
+++ b/modules/wechat_qrcode/src/zxing/common/bitarray.cpp
@ -15,12 +15,6 @@ using zxing::BitArray;
 using zxing::ErrorHandler;
 using zxing::Ref;

-#if __WORDSIZE == 64
-// typedef long int int64_t;
-#else
-typedef long long int int64_t;
-#endif
-
 BitArray::BitArray(int size_) : size(size_), bits(size_), nextSets(size_), nextUnSets(size_) {}

 void BitArray::setUnchar(int i, unsigned char newBits) { bits[i] = newBits; }
--- a/modules/ximgproc/doc/ximgproc.bib
+++ b/modules/ximgproc/doc/ximgproc.bib
@ -380,3 +380,12 @@ pages={617--632},
  year={2013},
  publisher={Elsevier}
 }
+
+@article{loke2021accelerated,
+  title={Accelerated superpixel image segmentation with a parallelized DBSCAN algorithm},
+  author={Loke, Seng Cheong and MacDonald, Bruce A and Parsons, Matthew and W{\"u}nsche, Burkhard Claus},
+  journal={Journal of Real-Time Image Processing},
+  pages={1--16},
+  year={2021},
+  publisher={Springer}
+}
--- a/modules/ximgproc/include/opencv2/ximgproc.hpp
+++ b/modules/ximgproc/include/opencv2/ximgproc.hpp
@ -43,6 +43,7 @@
 #include "ximgproc/structured_edge_detection.hpp"
 #include "ximgproc/edgeboxes.hpp"
 #include "ximgproc/edge_drawing.hpp"
+#include "ximgproc/scansegment.hpp"
 #include "ximgproc/seeds.hpp"
 #include "ximgproc/segmentation.hpp"
 #include "ximgproc/fast_hough_transform.hpp"
@ -60,6 +61,7 @@
 #include "ximgproc/run_length_morphology.hpp"
 #include "ximgproc/edgepreserving_filter.hpp"
 #include "ximgproc/color_match.hpp"
+#include "ximgproc/radon_transform.hpp"


 /** @defgroup ximgproc Extended Image Processing
--- a/modules/ximgproc/include/opencv2/ximgproc/edge_drawing.hpp
+++ b/modules/ximgproc/include/opencv2/ximgproc/edge_drawing.hpp
@ -67,8 +67,14 @@ public:
    CV_WRAP virtual void getEdgeImage(OutputArray dst) = 0;
    CV_WRAP virtual void getGradientImage(OutputArray dst) = 0;

+    /** @brief Returns std::vector<std::vector<Point>> of detected edge segments, see detectEdges()
+     */
    CV_WRAP virtual std::vector<std::vector<Point> > getSegments() = 0;

+    /** @brief Returns for each line found in detectLines() its edge segment index in getSegments()
+     */
+    CV_WRAP virtual std::vector<int> getSegmentIndicesOfLines() const = 0;
+
    /** @brief Detects lines.

    @param lines  output Vec<4f> contains start point and end point of detected lines.
--- a/modules/ximgproc/include/opencv2/ximgproc/radon_transform.hpp
+++ b/modules/ximgproc/include/opencv2/ximgproc/radon_transform.hpp
@ -0,0 +1,40 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef __OPENCV_RADON_TRANSFORM_HPP__
+#define __OPENCV_RADON_TRANSFORM_HPP__
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgproc.hpp"
+
+namespace cv { namespace ximgproc {
+/**
+* @brief   Calculate Radon Transform of an image.
+* @param   src         The source (input) image.
+* @param   dst         The destination image, result of transformation.
+* @param   theta       Angle resolution of the transform in degrees.
+* @param   start_angle Start angle of the transform in degrees.
+* @param   end_angle   End angle of the transform in degrees.
+* @param   crop        Crop the source image into a circle.
+* @param   norm        Normalize the output Mat to grayscale and convert type to CV_8U
+*
+* This function calculates the Radon Transform of a given image in any range.
+* See https://engineering.purdue.edu/~malcolm/pct/CTI_Ch03.pdf for detail.
+* If the input type is CV_8U, the output will be CV_32S.
+* If the input type is CV_32F or CV_64F, the output will be CV_64F
+* The output size will be num_of_integral x src_diagonal_length.
+* If crop is selected, the input image will be crop into square then circle,
+* and output size will be num_of_integral x min_edge.
+*
+*/
+CV_EXPORTS_W void RadonTransform(InputArray src,
+                                      OutputArray dst,
+                                      double theta = 1,
+                                      double start_angle = 0,
+                                      double end_angle = 180,
+                                      bool crop = false,
+                                      bool norm = false);
+} }
+
+#endif
--- a/modules/ximgproc/include/opencv2/ximgproc/scansegment.hpp
+++ b/modules/ximgproc/include/opencv2/ximgproc/scansegment.hpp
@ -0,0 +1,83 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2021, Dr Seng Cheong Loke (lokesengcheong@gmail.com)
+
+#ifndef __OPENCV_XIMGPROC_SCANSEGMENT_HPP__
+#define __OPENCV_XIMGPROC_SCANSEGMENT_HPP__
+
+#include <opencv2/core.hpp>
+
+namespace cv { namespace ximgproc {
+
+/** @brief Class implementing the F-DBSCAN (Accelerated superpixel image segmentation with a parallelized DBSCAN algorithm) superpixels
+algorithm by Loke SC, et al. @cite loke2021accelerated for original paper.
+
+The algorithm uses a parallelised DBSCAN cluster search that is resistant to noise, competitive in segmentation quality, and faster than
+existing superpixel segmentation methods. When tested on the Berkeley Segmentation Dataset, the average processing speed is 175 frames/s
+with a Boundary Recall of 0.797 and an Achievable Segmentation Accuracy of 0.944. The computational complexity is quadratic O(n2) and
+more suited to smaller images, but can still process a 2MP colour image faster than the SEEDS algorithm in OpenCV. The output is deterministic
+when the number of processing threads is fixed, and requires the source image to be in Lab colour format.
+*/
+class CV_EXPORTS_W ScanSegment : public Algorithm
+{
+public:
+    virtual ~ScanSegment();
+
+    /** @brief Returns the actual superpixel segmentation from the last image processed using iterate.
+
+    Returns zero if no image has been processed.
+    */
+    CV_WRAP virtual int getNumberOfSuperpixels() = 0;
+
+    /** @brief Calculates the superpixel segmentation on a given image with the initialized
+    parameters in the ScanSegment object.
+
+    This function can be called again for other images without the need of initializing the algorithm with createScanSegment().
+    This save the computational cost of allocating memory for all the structures of the algorithm.
+
+    @param img Input image. Supported format: CV_8UC3. Image size must match with the initialized
+    image size with the function createScanSegment(). It MUST be in Lab color space.
+    */
+    CV_WRAP virtual void iterate(InputArray img) = 0;
+
+    /** @brief Returns the segmentation labeling of the image.
+
+    Each label represents a superpixel, and each pixel is assigned to one superpixel label.
+
+    @param labels_out Return: A CV_32UC1 integer array containing the labels of the superpixel
+    segmentation. The labels are in the range [0, getNumberOfSuperpixels()].
+    */
+    CV_WRAP virtual void getLabels(OutputArray labels_out) = 0;
+
+    /** @brief Returns the mask of the superpixel segmentation stored in the ScanSegment object.
+
+    The function return the boundaries of the superpixel segmentation.
+
+    @param image Return: CV_8UC1 image mask where -1 indicates that the pixel is a superpixel border, and 0 otherwise.
+    @param thick_line If false, the border is only one pixel wide, otherwise all pixels at the border are masked.
+    */
+    CV_WRAP virtual void getLabelContourMask(OutputArray image, bool thick_line = false) = 0;
+};
+
+/** @brief Initializes a ScanSegment object.
+
+The function initializes a ScanSegment object for the input image. It stores the parameters of
+the image: image_width and image_height. It also sets the parameters of the F-DBSCAN superpixel
+algorithm, which are: num_superpixels, threads, and merge_small.
+
+@param image_width Image width.
+@param image_height Image height.
+@param num_superpixels Desired number of superpixels. Note that the actual number may be smaller
+due to restrictions (depending on the image size). Use getNumberOfSuperpixels() to
+get the actual number.
+@param slices Number of processing threads for parallelisation. Setting -1 uses the maximum number
+of threads. In practice, four threads is enough for smaller images and eight threads for larger ones.
+@param merge_small merge small segments to give the desired number of superpixels. Processing is
+much faster without merging, but many small segments will be left in the image.
+*/
+CV_EXPORTS_W cv::Ptr<ScanSegment> createScanSegment(int image_width, int image_height, int num_superpixels, int slices = 8, bool merge_small = true);
+
+}}  // namespace
+#endif
--- a/modules/ximgproc/perf/perf_radon_transform.cpp
+++ b/modules/ximgproc/perf/perf_radon_transform.cpp
@ -0,0 +1,35 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test { namespace {
+
+typedef tuple<Size, MatType> RadonTransformPerfTestParam;
+typedef perf::TestBaseWithParam<RadonTransformPerfTestParam> RadonTransformPerfTest;
+
+PERF_TEST_P(RadonTransformPerfTest, perf,
+    testing::Combine(
+        testing::Values(TYPICAL_MAT_SIZES),
+        testing::Values(CV_8UC1, CV_32FC1, CV_64FC1)
+    )
+)
+{
+    Size srcSize = get<0>(GetParam());
+    int  srcType = get<1>(GetParam());
+
+    Mat src(srcSize, srcType);
+    Mat radon;
+
+    declare.in(src, WARMUP_RNG);
+
+    TEST_CYCLE()
+    {
+        RadonTransform(src, radon);
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+} }
--- a/modules/ximgproc/samples/radon_transform_demo.cpp
+++ b/modules/ximgproc/samples/radon_transform_demo.cpp
@ -0,0 +1,18 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include <opencv2/highgui.hpp>
+#include <opencv2/ximgproc/radon_transform.hpp>
+
+using namespace cv;
+
+int main() {
+    Mat src = imread("peilin_plane.png", IMREAD_GRAYSCALE);
+    Mat radon;
+    ximgproc::RadonTransform(src, radon, 1, 0, 180, false, true);
+    imshow("src image", src);
+    imshow("Radon transform", radon);
+    waitKey();
+    return 0;
+}
--- a/modules/ximgproc/samples/radon_transform_demo.py
+++ b/modules/ximgproc/samples/radon_transform_demo.py
@ -0,0 +1,13 @@
+# This file is part of OpenCV project.
+# It is subject to the license terms in the LICENSE file found in the top-level directory
+# of this distribution and at http://opencv.org/license.html.
+
+import numpy as np
+import cv2 as cv
+
+if __name__ == "__main__":
+    src = cv.imread("peilin_plane.png", cv.IMREAD_GRAYSCALE)
+    radon = cv.ximgproc.RadonTransform(src)
+    cv.imshow("src image", src)
+    cv.imshow("Radon transform", radon)
+    cv.waitKey()
--- a/modules/ximgproc/src/edge_drawing.cpp
+++ b/modules/ximgproc/src/edge_drawing.cpp
@ -109,6 +109,7 @@ public:
    void getGradientImage(OutputArray dst) CV_OVERRIDE;

    vector<vector<Point> > getSegments() CV_OVERRIDE;
+    vector<int> getSegmentIndicesOfLines() const CV_OVERRIDE;
    void detectLines(OutputArray lines) CV_OVERRIDE;
    void detectEllipses(OutputArray ellipses) CV_OVERRIDE;

@ -120,6 +121,7 @@ protected:
    int height;       // height of source image
    uchar *srcImg;
    vector<vector<Point> > segmentPoints;
+    vector<int> segmentIndicesOfLines;
    Mat smoothImage;
    uchar *edgeImg;   // pointer to edge image data
    uchar *smoothImg; // pointer to smoothed image data
@ -440,6 +442,11 @@ std::vector<std::vector<Point> > EdgeDrawingImpl::getSegments()
    return segmentPoints;
 }

+std::vector<int> EdgeDrawingImpl::getSegmentIndicesOfLines() const
+{
+    return segmentIndicesOfLines;
+}
+
 void EdgeDrawingImpl::ComputeGradient()
 {
    for (int j = 0; j < width; j++)
@ -1312,12 +1319,15 @@ void EdgeDrawingImpl::detectLines(OutputArray _lines)
    for (int i = 1; i <= size - linesNo; i++)
        lines.pop_back();

+    segmentIndicesOfLines.clear();
    for (int i = 0; i < linesNo; i++)
    {
        Vec4f line((float)lines[i].sx, (float)lines[i].sy, (float)lines[i].ex, (float)lines[i].ey);
        linePoints.push_back(line);
+        segmentIndicesOfLines.push_back(lines[i].segmentNo);
    }
    Mat(linePoints).copyTo(_lines);
+
    delete[] x;
    delete[] y;
 }
--- a/modules/ximgproc/src/edge_drawing_common.hpp
+++ b/modules/ximgproc/src/edge_drawing_common.hpp
@ -537,7 +537,7 @@ public:
 	}

 	~EDArcs() {
-		delete arcs;
+		delete[] arcs;
 	}
 };

@ -552,8 +552,8 @@ struct BufferManager {
 	}

 	~BufferManager() {
-		delete x;
-		delete y;
+		delete[] x;
+		delete[] y;
 	}

 	double *getX() { return &x[index]; }
--- a/modules/ximgproc/src/radon_transform.cpp
+++ b/modules/ximgproc/src/radon_transform.cpp
@ -0,0 +1,81 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "precomp.hpp"
+
+namespace cv {namespace ximgproc {
+    void RadonTransform(InputArray src,
+                             OutputArray dst,
+                             double theta,
+                             double start_angle,
+                             double end_angle,
+                             bool crop,
+                             bool norm)
+    {
+        CV_Assert(src.dims() == 2);
+        CV_Assert(src.channels() == 1);
+        CV_Assert((end_angle - start_angle) * theta > 0);
+
+        Mat _srcMat = src.getMat();
+
+        int _row_num, _col_num, _out_mat_type;
+        _col_num = cvRound((end_angle - start_angle) / theta);
+        transpose(_srcMat, _srcMat);
+        Mat _masked_src;
+        cv::Point _center;
+
+        if (_srcMat.type() == CV_32FC1 || _srcMat.type() == CV_64FC1) {
+            _out_mat_type = CV_64FC1;
+        }
+        else {
+            _out_mat_type = CV_32SC1;
+        }
+
+        if (crop) {
+            // crop the source into square
+            _row_num = min(_srcMat.rows, _srcMat.cols);
+            cv::Rect _crop_ROI(
+                _srcMat.cols / 2 - _row_num / 2,
+                _srcMat.rows / 2 - _row_num / 2,
+                _row_num, _row_num);
+            _srcMat = _srcMat(_crop_ROI);
+            // crop the source into circle
+            Mat _mask(_srcMat.size(), CV_8UC1, Scalar(0));
+            _center = Point(_srcMat.cols / 2, _srcMat.rows / 2);
+            circle(_mask, _center, _srcMat.cols / 2, Scalar(255), FILLED);
+            _srcMat.copyTo(_masked_src, _mask);
+        }
+        else {
+            // avoid cropping corner when rotating
+            _row_num = cvCeil(sqrt(_srcMat.rows * _srcMat.rows + _srcMat.cols * _srcMat.cols));
+            _masked_src = Mat(Size(_row_num, _row_num), _srcMat.type(), Scalar(0));
+            _center = Point(_masked_src.cols / 2, _masked_src.rows / 2);
+            _srcMat.copyTo(_masked_src(Rect(
+                (_row_num - _srcMat.cols) / 2,
+                (_row_num - _srcMat.rows) / 2,
+                _srcMat.cols, _srcMat.rows)));
+        }
+
+        double _t;
+        Mat _rotated_src;
+        Mat _radon(_row_num, _col_num, _out_mat_type);
+
+        for (int _col = 0; _col < _col_num; _col++) {
+            // rotate the source by _t
+            _t = (start_angle + _col * theta);
+            cv::Mat _r_matrix = cv::getRotationMatrix2D(_center, _t, 1);
+            cv::warpAffine(_masked_src, _rotated_src, _r_matrix, _masked_src.size());
+            Mat _col_mat = _radon.col(_col);
+            // make projection
+            cv::reduce(_rotated_src, _col_mat, 1, REDUCE_SUM, _out_mat_type);
+        }
+
+        if (norm) {
+            normalize(_radon, _radon, 0, 255, NORM_MINMAX, CV_8UC1);
+        }
+
+        _radon.copyTo(dst);
+        return;
+    }
+} }
--- a/modules/ximgproc/src/scansegment.cpp
+++ b/modules/ximgproc/src/scansegment.cpp
@ -0,0 +1,770 @@
+////////////////////////////////////////////////////////////////////////////////////////
+//
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2021, Dr Seng Cheong Loke (lokesengcheong@gmail.com)
+//
+//
+
+#include "precomp.hpp"
+
+#include <numeric>
+#include <atomic>
+
+namespace cv {
+namespace ximgproc {
+
+ScanSegment::~ScanSegment()
+{
+    // nothing
+}
+
+class ScanSegmentImpl CV_FINAL : public ScanSegment
+{
+#define UNKNOWN 0
+#define BORDER -1
+#define UNCLASSIFIED -2
+#define NONE -3
+
+public:
+
+    ScanSegmentImpl(int image_width, int image_height, int num_superpixels, int slices, bool merge_small);
+
+    virtual ~ScanSegmentImpl();
+
+    virtual int getNumberOfSuperpixels() CV_OVERRIDE { return clusterCount; }
+
+    virtual void iterate(InputArray img) CV_OVERRIDE;
+
+    virtual void getLabels(OutputArray labels_out) CV_OVERRIDE;
+
+    virtual void getLabelContourMask(OutputArray image, bool thick_line = false) CV_OVERRIDE;
+
+private:
+    static const int neighbourCount = 8;    // number of pixel neighbours
+    static const int smallClustersDiv = 10000;  // divide total pixels by this to give smallClusters
+    const float tolerance100 = 10.0f;       // colour tolerance for image size of 100x100px
+
+    int processthreads;                     // concurrent threads for parallel processing
+    int width, height;                      // image size
+    int superpixels;                        // number of superpixels
+    bool merge;                             // merge small superpixels
+    int indexSize;                          // size of label mat vector
+    int clusterSize;                        // max size of clusters
+    int clusterCount;                       // number of superpixels from the most recent iterate
+    float adjTolerance;                     // adjusted colour tolerance
+
+    int horzDiv, vertDiv;                   // number of horizontal and vertical segments
+    float horzLength, vertLength;           // length of each segment
+    int effectivethreads;                   // effective number of concurrent threads
+    int smallClusters;                      // clusters below this pixel count are considered small for merging
+
+    cv::AutoBuffer<cv::Rect> seedRects;     // autobuffer of seed rectangles
+    cv::AutoBuffer<cv::Rect> seedRectsExt;  // autobuffer of extended seed rectangles
+    cv::AutoBuffer<cv::Rect> offsetRects;   // autobuffer of offset rectangles
+    cv::Point neighbourLoc[8] = { cv::Point(-1, -1), cv::Point(0, -1), cv::Point(1, -1), cv::Point(-1, 0), cv::Point(1, 0), cv::Point(-1, 1), cv::Point(0, 1), cv::Point(1, 1) };                // neighbour locations
+
+    std::vector<int> indexNeighbourVec;     // indices for parallel processing
+    std::vector<std::pair<int, int>> indexProcessVec;
+
+    cv::AutoBuffer<int> labelsBuffer;       // label autobuffer
+    cv::AutoBuffer<int> clusterBuffer;      // cluster autobuffer
+    cv::AutoBuffer<uchar> pixelBuffer;      // pixel autobuffer
+    std::vector<cv::AutoBuffer<int>> offsetVec; // vector of offset autobuffers
+    cv::Vec3b* labBuffer;                   // lab buffer
+    int neighbourLocBuffer[neighbourCount]; // neighbour locations
+
+    std::atomic<int> clusterIndex, clusterID;  // atomic indices
+
+    cv::Mat src, labelsMat;                 // mats
+
+    struct WSNode
+    {
+        int next;
+        int mask_ofs;
+        int img_ofs;
+    };
+
+    // Queue for WSNodes
+    struct WSQueue
+    {
+        WSQueue() { first = last = 0; }
+        int first, last;
+    };
+
+    void OP1(int v);
+    void OP2(std::pair<int, int> const& p);
+    void OP3(int v);
+    void OP4(std::pair<int, int> const& p);
+    void expandCluster(int* offsetBuffer, const cv::Point& point);
+    void calculateCluster(int* offsetBuffer, int* offsetEnd, int pointIndex, int currentClusterID);
+    static int allocWSNodes(std::vector<WSNode>& storage);
+    static void watershedEx(const cv::Mat& src, cv::Mat& dst);
+};
+
+CV_EXPORTS Ptr<ScanSegment> createScanSegment(int image_width, int image_height, int num_superpixels, int slices, bool merge_small)
+{
+    return makePtr<ScanSegmentImpl>(image_width, image_height, num_superpixels, slices, merge_small);
+}
+
+ScanSegmentImpl::ScanSegmentImpl(int image_width, int image_height, int num_superpixels, int slices, bool merge_small)
+{
+    // set the number of process threads
+    processthreads = (slices > 0) ? slices : cv::getNumThreads();
+
+    width = image_width;
+    height = image_height;
+    superpixels = num_superpixels;
+    merge = merge_small;
+    indexSize = height * width;
+    clusterSize = cvRound(1.1f * (float)(width * height) / (float)superpixels);
+    clusterCount = 0;
+    labelsMat = cv::Mat(height, width, CV_32SC1);
+
+    // divide bounds area into uniformly distributed rectangular segments
+    int shortCount = cvFloor(sqrtf((float)processthreads));
+    int longCount = processthreads / shortCount;
+    horzDiv = width > height ? longCount : shortCount;
+    vertDiv = width > height ? shortCount : longCount;
+    horzLength = (float)width / (float)horzDiv;
+    vertLength = (float)height / (float)vertDiv;
+    effectivethreads = horzDiv * vertDiv;
+    smallClusters = 0;
+
+    // get array of seed rects
+    seedRects = cv::AutoBuffer<cv::Rect>(horzDiv * vertDiv);
+    seedRectsExt = cv::AutoBuffer<cv::Rect>(horzDiv * vertDiv);
+    offsetRects = cv::AutoBuffer<cv::Rect>(horzDiv * vertDiv);
+    for (int y = 0; y < vertDiv; y++) {
+        for (int x = 0; x < horzDiv; x++) {
+            int xStart = cvFloor((float)x * horzLength);
+            int yStart = cvFloor((float)y * vertLength);
+            cv::Rect seedRect = cv::Rect(xStart, yStart, (int)(x == horzDiv - 1 ? width - xStart : horzLength), (int)(y == vertDiv - 1 ? height - yStart : vertLength));
+
+            int bnd_l = seedRect.x;
+            int bnd_t = seedRect.y;
+            int bnd_r = seedRect.x + seedRect.width - 1;
+            int bnd_b = seedRect.y + seedRect.height - 1;
+            if (bnd_l > 0) {
+                bnd_l -= 1;
+            }
+            if (bnd_t > 0) {
+                bnd_t -= 1;
+            }
+            if (bnd_r < width - 1) {
+                bnd_r += 1;
+            }
+            if (bnd_b < height - 1) {
+                bnd_b += 1;
+            }
+
+            seedRects.data()[(y * horzDiv) + x] = seedRect;
+            seedRectsExt.data()[(y * horzDiv) + x] = cv::Rect(bnd_l, bnd_t, bnd_r - bnd_l + 1, bnd_b - bnd_t + 1);
+            offsetRects.data()[(y * horzDiv) + x] = cv::Rect(seedRect.x - bnd_l, seedRect.y - bnd_t, seedRect.width, seedRect.height);
+        }
+    }
+
+    // get adjusted tolerance = (100 / average length (horz/vert)) x sqrt(3) [ie. euclidean lab colour distance sqrt(l2 + a2 + b2)] x tolerance100
+    adjTolerance = (200.0f / (width + height)) * sqrtf(3) * tolerance100;
+    adjTolerance = adjTolerance * adjTolerance;
+
+    // create neighbour vector
+    indexNeighbourVec = std::vector<int>(effectivethreads);
+    std::iota(indexNeighbourVec.begin(), indexNeighbourVec.end(), 0);
+
+    // create process vector
+    indexProcessVec = std::vector<std::pair<int, int>>(processthreads);
+    int processDiv = indexSize / processthreads;
+    int processCurrent = 0;
+    for (int i = 0; i < processthreads - 1; i++) {
+        indexProcessVec[i] = std::make_pair(processCurrent, processCurrent + processDiv);
+        processCurrent += processDiv;
+    }
+    indexProcessVec[processthreads - 1] = std::make_pair(processCurrent, indexSize);
+
+    // create buffers and initialise
+    labelsBuffer = cv::AutoBuffer<int>(indexSize);
+    clusterBuffer = cv::AutoBuffer<int>(indexSize);
+    pixelBuffer = cv::AutoBuffer<uchar>(indexSize);
+    offsetVec = std::vector<cv::AutoBuffer<int>>(effectivethreads);
+    int offsetSize = (clusterSize + 1) * sizeof(int);
+    for (int i = 0; i < effectivethreads; i++) {
+        offsetVec[i] = cv::AutoBuffer<int>(offsetSize);
+    }
+    for (int i = 0; i < neighbourCount; i++) {
+        neighbourLocBuffer[i] = (neighbourLoc[i].y * width) + neighbourLoc[i].x;
+    }
+}
+
+ScanSegmentImpl::~ScanSegmentImpl()
+{
+    // clean up
+    if (!src.empty()) {
+        src.release();
+    }
+    if (!labelsMat.empty()) {
+        labelsMat.release();
+    }
+}
+
+void ScanSegmentImpl::iterate(InputArray img)
+{
+    if (img.isMat())
+    {
+        // get Mat
+        src = img.getMat();
+
+        // image should be valid
+        CV_Assert(!src.empty());
+    }
+    else if (img.isMatVector())
+    {
+        std::vector<cv::Mat> vec;
+
+        // get vector Mat
+        img.getMatVector(vec);
+
+        // array should be valid
+        CV_Assert(!vec.empty());
+
+        // merge into Mat
+        cv::merge(vec, src);
+    }
+    else
+        CV_Error(Error::StsInternal, "Invalid InputArray.");
+
+    int depth = src.depth();
+
+    CV_Assert(src.size().width == width && src.size().height == height);
+    CV_Assert(depth == CV_8U);
+    CV_Assert(src.channels() == 3);
+
+    clusterCount = 0;
+    clusterIndex.store(0);
+    clusterID.store(1);
+
+    smallClusters = indexSize / smallClustersDiv;
+
+    // set labels to NONE
+    labelsMat.setTo(NONE);
+
+    // set labels buffer to UNCLASSIFIED
+    std::fill(labelsBuffer.data(), labelsBuffer.data() + indexSize, UNCLASSIFIED);
+
+    // apply light blur
+    cv::medianBlur(src, src, 3);
+
+    // start at the center of the rect, then run through the remainder
+    labBuffer = reinterpret_cast<cv::Vec3b*>(src.data);
+    cv::parallel_for_(Range(0, (int)indexNeighbourVec.size()), [&](const Range& range) {
+        for (int i = range.start; i < range.end; i++) {
+            OP1(i);
+        }
+    });
+
+    if (merge) {
+        // get cutoff size for clusters
+        std::vector<std::pair<int, int>> countVec;
+        int clusterIndexSize = clusterIndex.load();
+        countVec.reserve(clusterIndexSize / 2);
+        for (int i = 1; i < clusterIndexSize; i += 2) {
+            int count = clusterBuffer.data()[i];
+            if (count >= smallClusters) {
+                int currentID = clusterBuffer.data()[i - 1];
+                countVec.push_back(std::make_pair(currentID, count));
+            }
+        }
+
+        // sort descending
+        std::sort(countVec.begin(), countVec.end(), [](const std::pair<int, int>& left, const std::pair<int, int>& right) {
+            return left.second > right.second;
+        });
+
+        int countSize = (int)countVec.size();
+        int cutoff = MAX(smallClusters, countVec[MIN(countSize - 1, superpixels - 1)].second);
+        clusterCount = (int)std::count_if(countVec.begin(), countVec.end(), [&cutoff](std::pair<int, int> p) {return p.second > cutoff; });
+
+        // change labels to 1 -> clusterCount, 0 = UNKNOWN, reuse clusterbuffer
+        std::fill_n(clusterBuffer.data(), indexSize, UNKNOWN);
+        int countLimit = cutoff == -1 ? (int)countVec.size() : clusterCount;
+        for (int i = 0; i < countLimit; i++) {
+            clusterBuffer.data()[countVec[i].first] = i + 1;
+        }
+
+        parallel_for_(Range(0, (int)indexProcessVec.size()), [&](const Range& range) {
+            for (int i = range.start; i < range.end; i++) {
+                OP2(indexProcessVec[i]);
+            }
+        });
+
+        // make copy of labels buffer
+        memcpy(labelsMat.data, labelsBuffer.data(), indexSize * sizeof(int));
+
+        // run watershed
+        cv::parallel_for_(Range(0, (int)indexNeighbourVec.size()), [&](const Range& range) {
+            for (int i = range.start; i < range.end; i++) {
+                OP3(i);
+            }
+        });
+
+        // copy back to labels mat
+        parallel_for_(Range(0, (int)indexProcessVec.size()), [&](const Range& range) {
+            for (int i = range.start; i < range.end; i++) {
+                OP4(indexProcessVec[i]);
+            }
+        });
+    }
+    else
+    {
+        memcpy(labelsMat.data, labelsBuffer.data(), indexSize * sizeof(int));
+    }
+
+    src.release();
+}
+
+void ScanSegmentImpl::OP1(int v)
+{
+    cv::Rect seedRect = seedRects.data()[v];
+    for (int y = seedRect.y; y < seedRect.y + seedRect.height; y++) {
+        for (int x = seedRect.x; x < seedRect.x + seedRect.width; x++) {
+            expandCluster(offsetVec[v].data(), cv::Point(x, y));
+        }
+    }
+}
+
+void ScanSegmentImpl::OP2(std::pair<int, int> const& p)
+{
+    for (int i = p.first; i < p.second; i++) {
+        labelsBuffer.data()[i] = clusterBuffer.data()[labelsBuffer.data()[i]];
+        if (labelsBuffer.data()[i] == UNKNOWN) {
+            pixelBuffer.data()[i] = 255;
+        }
+        else {
+            pixelBuffer.data()[i] = 0;
+        }
+    }
+}
+
+void ScanSegmentImpl::OP3(int v)
+{
+    cv::Rect seedRectExt = seedRectsExt.data()[v];
+    cv::Mat seedLabels = labelsMat(seedRectExt).clone();
+    watershedEx(src(seedRectExt), seedLabels);
+    seedLabels(offsetRects.data()[v]).copyTo(labelsMat(seedRects.data()[v]));
+    seedLabels.release();
+}
+
+void ScanSegmentImpl::OP4(std::pair<int, int> const& p)
+{
+    for (int i = p.first; i < p.second; i++) {
+        if (pixelBuffer.data()[i] == 0) {
+            ((int*)labelsMat.data)[i] = labelsBuffer.data()[i] - 1;
+        }
+        else {
+            ((int*)labelsMat.data)[i] -= 1;
+        }
+    }
+}
+
+// expand clusters from a point
+void ScanSegmentImpl::expandCluster(int* offsetBuffer, const cv::Point& point)
+{
+    int pointIndex = (point.y * width) + point.x;
+    if (labelsBuffer.data()[pointIndex] == UNCLASSIFIED) {
+        int offsetStart = 0;
+        int offsetEnd = 0;
+        int currentClusterID = clusterID.fetch_add(1);
+
+        calculateCluster(offsetBuffer, &offsetEnd, pointIndex, currentClusterID);
+
+        if (offsetStart == offsetEnd) {
+            labelsBuffer.data()[pointIndex] = UNKNOWN;
+        }
+        else {
+            // set cluster id and get core point index
+            labelsBuffer.data()[pointIndex] = currentClusterID;
+
+            while (offsetStart < offsetEnd) {
+                int intoffset2 = *(offsetBuffer + offsetStart);
+                offsetStart++;
+                calculateCluster(offsetBuffer, &offsetEnd, intoffset2, currentClusterID);
+            }
+
+            // add origin point
+            offsetBuffer[offsetEnd] = pointIndex;
+            offsetEnd++;
+
+            // store to buffer
+            int currentClusterIndex = clusterIndex.fetch_add(2);
+            clusterBuffer.data()[currentClusterIndex] = currentClusterID;
+            clusterBuffer.data()[currentClusterIndex + 1] = offsetEnd;
+        }
+    }
+}
+
+void ScanSegmentImpl::calculateCluster(int* offsetBuffer, int* offsetEnd, int pointIndex, int currentClusterID)
+{
+    for (int i = 0; i < neighbourCount; i++) {
+        if (*offsetEnd < clusterSize) {
+            int intoffset2 = pointIndex + neighbourLocBuffer[i];
+            if (intoffset2 >= 0 && intoffset2 < indexSize && labelsBuffer.data()[intoffset2] == UNCLASSIFIED) {
+                int diff1 = (int)labBuffer[pointIndex][0] - (int)labBuffer[intoffset2][0];
+                int diff2 = (int)labBuffer[pointIndex][1] - (int)labBuffer[intoffset2][1];
+                int diff3 = (int)labBuffer[pointIndex][2] - (int)labBuffer[intoffset2][2];
+
+                if ((diff1 * diff1) + (diff2 * diff2) + (diff3 * diff3) <= (int)adjTolerance) {
+                    labelsBuffer.data()[intoffset2] = currentClusterID;
+                    offsetBuffer[*offsetEnd] = intoffset2;
+                    (*offsetEnd)++;
+                }
+            }
+        }
+        else { break; }
+    }
+}
+
+int ScanSegmentImpl::allocWSNodes(std::vector<ScanSegmentImpl::WSNode>& storage)
+{
+    int sz = (int)storage.size();
+    int newsz = MAX(128, sz * 3 / 2);
+
+    storage.resize(newsz);
+    if (sz == 0)
+    {
+        storage[0].next = 0;
+        sz = 1;
+    }
+    for (int i = sz; i < newsz - 1; i++)
+        storage[i].next = i + 1;
+    storage[newsz - 1].next = 0;
+    return sz;
+}
+
+//the modified version of watershed algorithm from OpenCV
+void ScanSegmentImpl::watershedEx(const cv::Mat& src, cv::Mat& dst)
+{
+    // https://github.com/Seaball/watershed_with_mask
+
+    // Labels for pixels
+    const int IN_QUEUE = -2; // Pixel visited
+    // possible bit values = 2^8
+    const int NQ = 256;
+
+    cv::Size size = src.size();
+    int channel = 3;
+    // Vector of every created node
+    std::vector<WSNode> storage;
+    int free_node = 0, node;
+    // Priority queue of queues of nodes
+    // from high priority (0) to low priority (255)
+    WSQueue q[NQ];
+    // Non-empty queue with highest priority
+    int active_queue;
+    int i, j;
+    // Color differences
+    int db, dg, dr;
+    int subs_tab[513];
+
+    // MAX(a,b) = b + MAX(a-b,0)
+#define ws_max(a,b) ((b) + subs_tab[(a)-(b)+NQ])
+// MIN(a,b) = a - MAX(a-b,0)
+#define ws_min(a,b) ((a) - subs_tab[(a)-(b)+NQ])
+
+    // Create a new node with offsets mofs and iofs in queue idx
+#define ws_push(idx,mofs,iofs)          \
+{                                       \
+    if (!free_node)                     \
+        free_node = allocWSNodes(storage); \
+    node = free_node;                   \
+    free_node = storage[free_node].next; \
+    storage[node].next = 0;             \
+    storage[node].mask_ofs = mofs;      \
+    storage[node].img_ofs = iofs;       \
+    if (q[idx].last)                   \
+        storage[q[idx].last].next = node; \
+    else                                \
+        q[idx].first = node;            \
+    q[idx].last = node;                 \
+}
+
+    // Get next node from queue idx
+#define ws_pop(idx,mofs,iofs)           \
+{                                       \
+    node = q[idx].first;                \
+    q[idx].first = storage[node].next;  \
+    if (!storage[node].next)           \
+        q[idx].last = 0;                \
+    storage[node].next = free_node;     \
+    free_node = node;                   \
+    mofs = storage[node].mask_ofs;      \
+    iofs = storage[node].img_ofs;       \
+}
+
+// Get highest absolute channel difference in diff
+#define c_diff(ptr1,ptr2,diff)           \
+{                                        \
+    db = std::abs((ptr1)[0] - (ptr2)[0]); \
+    dg = std::abs((ptr1)[1] - (ptr2)[1]); \
+    dr = std::abs((ptr1)[2] - (ptr2)[2]); \
+    diff = ws_max(db, dg);                \
+    diff = ws_max(diff, dr);              \
+    CV_Assert(0 <= diff && diff <= 255);  \
+}
+
+    CV_Assert(src.type() == CV_8UC3 && dst.type() == CV_32SC1);
+    CV_Assert(src.size() == dst.size());
+
+    // Current pixel in input image
+    const uchar* img = src.ptr();
+    // Step size to next row in input image
+    int istep = int(src.step / sizeof(img[0]));
+
+    // Current pixel in mask image
+    int* mask = dst.ptr<int>();
+    // Step size to next row in mask image
+    int mstep = int(dst.step / sizeof(mask[0]));
+
+    for (i = 0; i < 256; i++)
+        subs_tab[i] = 0;
+    for (i = 256; i <= 512; i++)
+        subs_tab[i] = i - 256;
+
+    //for (j = 0; j < size.width; j++)
+    //mask[j] = mask[j + mstep*(size.height - 1)] = 0;
+
+    // initial phase: put all the neighbor pixels of each marker to the ordered queue -
+    // determine the initial boundaries of the basins
+    for (i = 1; i < size.height - 1; i++) {
+        img += istep; mask += mstep;
+        mask[0] = mask[size.width - 1] = 0; // boundary pixels
+
+        for (j = 1; j < size.width - 1; j++) {
+            int* m = mask + j;
+            if (m[0] < 0)
+                m[0] = 0;
+            if (m[0] == 0 && (m[-1] > 0 || m[1] > 0 || m[-mstep] > 0 || m[mstep] > 0))
+            {
+                // Find smallest difference to adjacent markers
+                const uchar* ptr = img + j * channel;
+                int idx = 256, t;
+                if (m[-1] > 0) {
+                    c_diff(ptr, ptr - channel, idx);
+                }
+                if (m[1] > 0) {
+                    c_diff(ptr, ptr + channel, t);
+                    idx = ws_min(idx, t);
+                }
+                if (m[-mstep] > 0) {
+                    c_diff(ptr, ptr - istep, t);
+                    idx = ws_min(idx, t);
+                }
+                if (m[mstep] > 0) {
+                    c_diff(ptr, ptr + istep, t);
+                    idx = ws_min(idx, t);
+                }
+
+                // Add to according queue
+                CV_Assert(0 <= idx && idx <= 255);
+                ws_push(idx, i * mstep + j, i * istep + j * channel);
+                m[0] = IN_QUEUE;//initial unvisited
+            }
+        }
+    }
+    // find the first non-empty queue
+    for (i = 0; i < NQ; i++)
+        if (q[i].first)
+            break;
+
+    // if there is no markers, exit immediately
+    if (i == NQ)
+        return;
+
+    active_queue = i;//first non-empty priority queue
+    img = src.ptr();
+    mask = dst.ptr<int>();
+
+    // recursively fill the basins
+    int diff = 0, temp = 0;
+    for (;;)
+    {
+        int mofs, iofs;
+        int lab = 0, t;
+        int* m;
+        const uchar* ptr;
+
+        // Get non-empty queue with highest priority
+        // Exit condition: empty priority queue
+        if (q[active_queue].first == 0)
+        {
+            for (i = active_queue + 1; i < NQ; i++)
+                if (q[i].first)
+                    break;
+            if (i == NQ)
+            {
+                std::vector<WSNode>().swap(storage);
+                break;
+            }
+            active_queue = i;
+        }
+
+        // Get next node
+        ws_pop(active_queue, mofs, iofs);
+        int top = 1, bottom = 1, left = 1, right = 1;
+        if (0 <= mofs && mofs < mstep)//pixel on the top
+            top = 0;
+        if ((mofs % mstep) == 0)//pixel in the left column
+            left = 0;
+        if ((mofs + 1) % mstep == 0)//pixel in the right column
+            right = 0;
+        if (mstep * (size.height - 1) <= mofs && mofs < mstep * size.height)//pixel on the bottom
+            bottom = 0;
+
+        // Calculate pointer to current pixel in input and marker image
+        m = mask + mofs;
+        ptr = img + iofs;
+        // Check surrounding pixels for labels to determine label for current pixel
+        if (left) {//the left point can be visited
+            t = m[-1];
+            if (t > 0) {
+                lab = t;
+                c_diff(ptr, ptr - channel, diff);
+            }
+        }
+        if (right) {// Right point can be visited
+            t = m[1];
+            if (t > 0) {
+                if (lab == 0) {//and this point didn't be labeled before
+                    lab = t;
+                    c_diff(ptr, ptr + channel, diff);
+                }
+                else if (t != lab) {
+                    c_diff(ptr, ptr + channel, temp);
+                    diff = ws_min(diff, temp);
+                    if (diff == temp)
+                        lab = t;
+                }
+            }
+        }
+        if (top) {
+            t = m[-mstep]; // Top
+            if (t > 0) {
+                if (lab == 0) {//and this point didn't be labeled before
+                    lab = t;
+                    c_diff(ptr, ptr - istep, diff);
+                }
+                else if (t != lab) {
+                    c_diff(ptr, ptr - istep, temp);
+                    diff = ws_min(diff, temp);
+                    if (diff == temp)
+                        lab = t;
+                }
+            }
+        }
+        if (bottom) {
+            t = m[mstep]; // Bottom
+            if (t > 0) {
+                if (lab == 0) {
+                    lab = t;
+                }
+                else if (t != lab) {
+                    c_diff(ptr, ptr + istep, temp);
+                    diff = ws_min(diff, temp);
+                    if (diff == temp)
+                        lab = t;
+                }
+            }
+        }
+        // Set label to current pixel in marker image
+        CV_Assert(lab != 0);//lab must be labeled with a nonzero number
+        m[0] = lab;
+
+        // Add adjacent, unlabeled pixels to corresponding queue
+        if (left) {
+            if (m[-1] == 0)//left pixel with marker 0
+            {
+                c_diff(ptr, ptr - channel, t);
+                ws_push(t, mofs - 1, iofs - channel);
+                active_queue = ws_min(active_queue, t);
+                m[-1] = IN_QUEUE;
+            }
+        }
+
+        if (right)
+        {
+            if (m[1] == 0)//right pixel with marker 0
+            {
+                c_diff(ptr, ptr + channel, t);
+                ws_push(t, mofs + 1, iofs + channel);
+                active_queue = ws_min(active_queue, t);
+                m[1] = IN_QUEUE;
+            }
+        }
+
+        if (top)
+        {
+            if (m[-mstep] == 0)//top pixel with marker 0
+            {
+                c_diff(ptr, ptr - istep, t);
+                ws_push(t, mofs - mstep, iofs - istep);
+                active_queue = ws_min(active_queue, t);
+                m[-mstep] = IN_QUEUE;
+            }
+        }
+
+        if (bottom) {
+            if (m[mstep] == 0)//down pixel with marker 0
+            {
+                c_diff(ptr, ptr + istep, t);
+                ws_push(t, mofs + mstep, iofs + istep);
+                active_queue = ws_min(active_queue, t);
+                m[mstep] = IN_QUEUE;
+            }
+        }
+    }
+}
+
+void ScanSegmentImpl::getLabels(OutputArray labels_out)
+{
+    labels_out.assign(labelsMat);
+}
+
+void ScanSegmentImpl::getLabelContourMask(OutputArray image, bool thick_line)
+{
+    image.create(height, width, CV_8UC1);
+    cv::Mat dst = image.getMat();
+    dst.setTo(cv::Scalar(0));
+
+    const int dx8[8] = { -1, -1, 0, 1, 1, 1, 0, -1 };
+    const int dy8[8] = { 0, -1, -1, -1, 0, 1, 1, 1 };
+
+    for (int j = 0; j < height; j++)
+    {
+        for (int k = 0; k < width; k++)
+        {
+            int neighbors = 0;
+            for (int i = 0; i < 8; i++)
+            {
+                int x = k + dx8[i];
+                int y = j + dy8[i];
+
+                if ((x >= 0 && x < width) && (y >= 0 && y < height))
+                {
+                    int index = y * width + x;
+                    int mainindex = j * width + k;
+                    if (((int*)labelsMat.data)[mainindex] != ((int*)labelsMat.data)[index])
+                    {
+                        if (thick_line || !*dst.ptr<uchar>(y, x))
+                            neighbors++;
+                    }
+                }
+            }
+            if (neighbors > 1)
+                *dst.ptr<uchar>(j, k) = (uchar)255;
+        }
+    }
+}
+
+} // namespace ximgproc
+} // namespace cv
--- a/modules/ximgproc/test/test_radon_transform.cpp
+++ b/modules/ximgproc/test/test_radon_transform.cpp
@ -0,0 +1,81 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "test_precomp.hpp"
+
+namespace opencv_test {namespace {
+
+TEST(RadonTransformTest, output_size)
+{
+    Mat src(Size(256, 256), CV_8U, Scalar(0));
+    circle(src, Point(128, 128), 64, Scalar(255), FILLED);
+    Mat radon;
+    cv::ximgproc::RadonTransform(src, radon);
+
+    EXPECT_EQ(363, radon.rows);
+    EXPECT_EQ(180, radon.cols);
+
+    cv::ximgproc::RadonTransform(src, radon, 1, 0, 180, true);
+
+    EXPECT_EQ(256, radon.rows);
+    EXPECT_EQ(180, radon.cols);
+}
+
+TEST(RadonTransformTest, output_type)
+{
+    Mat src_int(Size(256, 256), CV_8U, Scalar(0));
+    circle(src_int, Point(128, 128), 64, Scalar(255), FILLED);
+    Mat radon, radon_norm;
+    cv::ximgproc::RadonTransform(src_int, radon);
+    cv::ximgproc::RadonTransform(src_int, radon_norm, 1, 0, 180, false, true);
+
+    EXPECT_EQ(CV_32SC1, radon.type());
+    EXPECT_EQ(CV_8U, radon_norm.type());
+
+    Mat src_float(Size(256, 256), CV_32FC1, Scalar(0));
+    Mat src_double(Size(256, 256), CV_32FC1, Scalar(0));
+    cv::ximgproc::RadonTransform(src_float, radon);
+    cv::ximgproc::RadonTransform(src_float, radon_norm, 1, 0, 180, false, true);
+    EXPECT_EQ(CV_64FC1, radon.type());
+    EXPECT_EQ(CV_8U, radon_norm.type());
+    cv::ximgproc::RadonTransform(src_double, radon);
+    EXPECT_EQ(CV_64FC1, radon.type());
+    EXPECT_EQ(CV_8U, radon_norm.type());
+}
+
+TEST(RadonTransformTest, accuracy_by_pixel)
+{
+    Mat src(Size(256, 256), CV_8U, Scalar(0));
+    circle(src, Point(128, 128), 64, Scalar(255), FILLED);
+    Mat radon;
+    cv::ximgproc::RadonTransform(src, radon);
+
+    ASSERT_EQ(CV_32SC1, radon.type());
+
+    EXPECT_EQ(0, radon.at<int>(0, 0));
+
+    EXPECT_LT(18000, radon.at<int>(128, 128));
+    EXPECT_GT(19000, radon.at<int>(128, 128));
+}
+
+TEST(RadonTransformTest, accuracy_uchar)
+{
+    Mat src(Size(10, 10), CV_8UC1, Scalar(1));
+    cv::Mat radon;
+    ximgproc::RadonTransform(src, radon, 45, 0, 180, false, false);
+
+    EXPECT_EQ(100, sum(radon.col(0))[0]);
+}
+
+TEST(RadonTransformTest, accuracy_float)
+{
+    Mat src(Size(10, 10), CV_32FC1, Scalar(1.1));
+    cv::Mat radon;
+    ximgproc::RadonTransform(src, radon, 45, 0, 180, false, false);
+
+    EXPECT_LT(109, sum(radon.col(0))[0]);
+    EXPECT_GT(111, sum(radon.col(0))[0]);
+}
+
+} }
--- a/modules/ximgproc/test/test_scansegment.cpp
+++ b/modules/ximgproc/test/test_scansegment.cpp
@ -0,0 +1,35 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "test_precomp.hpp"
+
+namespace opencv_test { namespace {
+
+static void runScanSegment(int slices)
+
+{
+    Mat img = imread(cvtest::findDataFile("cv/shared/lena.png"), IMREAD_COLOR);
+    Mat labImg;
+    cvtColor(img, labImg, COLOR_BGR2Lab);
+    Ptr<ScanSegment> ss = createScanSegment(labImg.cols, labImg.rows, 500, slices, true);
+    ss->iterate(labImg);
+    int numSuperpixels = ss->getNumberOfSuperpixels();
+    EXPECT_GT(numSuperpixels, 100);
+    EXPECT_LE(numSuperpixels, 500);
+    Mat res;
+    ss->getLabelContourMask(res, false);
+    EXPECT_GE(cvtest::norm(res, NORM_L1), 1000000);
+
+    if (cvtest::debugLevel >= 10)
+    {
+        imshow("ScanSegment", res);
+        waitKey();
+    }
+}
+
+TEST(ximgproc_ScanSegment, smoke) { runScanSegment(1); }
+TEST(ximgproc_ScanSegment, smoke4) { runScanSegment(4); }
+TEST(ximgproc_ScanSegment, smoke8) { runScanSegment(8); }
+
+}} // namespace
--- a/modules/xobjdetect/src/lbpfeatures.cpp
+++ b/modules/xobjdetect/src/lbpfeatures.cpp
@ -93,7 +93,7 @@ void CvLBPEvaluator::generateFeatures()

 CvLBPEvaluator::Feature::Feature()
 {
-    rect = cvRect(0, 0, 0, 0);
+    rect = Rect(0, 0, 0, 0);
 }

 CvLBPEvaluator::Feature::Feature( int offset, int x, int y, int _blockWidth, int _blockHeight )
@ -108,7 +108,7 @@ CvLBPEvaluator::Feature::Feature( int offset, int x, int y, int _blockWidth, int

 void CvLBPEvaluator::Feature::calcPoints(int offset)
 {
-    Rect tr = rect = cvRect(x_, y_, block_w_, block_h_);
+    Rect tr = rect = Rect(x_, y_, block_w_, block_h_);
    CV_SUM_OFFSETS( p[0], p[1], p[4], p[5], tr, offset )
    tr.x += 2*rect.width;
    CV_SUM_OFFSETS( p[2], p[3], p[6], p[7], tr, offset )
--- a/modules/xobjdetect/src/precomp.hpp
+++ b/modules/xobjdetect/src/precomp.hpp
@ -46,14 +46,9 @@ the use of this software, even if advised of the possibility of such damage.
 #define __OPENCV_XOBJDETECT_PRECOMP_HPP__

 #include <opencv2/xobjdetect.hpp>
-
 #include <opencv2/core/utility.hpp>
-
 #include <opencv2/imgproc.hpp>
-#include <opencv2/imgproc/types_c.h>
-
 #include <opencv2/core.hpp>
-
 #include <opencv2/imgcodecs.hpp>
 #include <opencv2/objdetect.hpp>

--- a/modules/xobjdetect/src/waldboost.cpp
+++ b/modules/xobjdetect/src/waldboost.cpp
@ -73,14 +73,14 @@ static void compute_min_step(const Mat &data_pos, const Mat &data_neg, size_t n_

    Mat reduced_pos, reduced_neg;

-    reduce(data_pos, reduced_pos, 1, CV_REDUCE_MIN);
-    reduce(data_neg, reduced_neg, 1, CV_REDUCE_MIN);
+    reduce(data_pos, reduced_pos, 1, REDUCE_MIN);
+    reduce(data_neg, reduced_neg, 1, REDUCE_MIN);
    min(reduced_pos, reduced_neg, data_min);
    data_min -= 0.01;

    Mat data_max;
-    reduce(data_pos, reduced_pos, 1, CV_REDUCE_MAX);
-    reduce(data_neg, reduced_neg, 1, CV_REDUCE_MAX);
+    reduce(data_pos, reduced_pos, 1, REDUCE_MAX);
+    reduce(data_neg, reduced_neg, 1, REDUCE_MAX);
    max(reduced_pos, reduced_neg, data_max);
    data_max += 0.01;

--- a/modules/xobjdetect/src/wbdetector.cpp
+++ b/modules/xobjdetect/src/wbdetector.cpp
@ -108,8 +108,8 @@ void WBDetectorImpl::train(
    vector<Mat> pos_imgs = read_imgs(pos_samples_path);
    vector<Mat> neg_imgs = sample_patches(neg_imgs_path, 24, 24, pos_imgs.size() * 10);

-    assert(pos_imgs.size());
-    assert(neg_imgs.size());
+    CV_Assert(pos_imgs.size());
+    CV_Assert(neg_imgs.size());

    int n_features;
    Mat pos_data, neg_data;
@ -173,7 +173,7 @@ void WBDetectorImpl::train(
            if (confidences.rows > 0) {
                Mat1i indices;
                sortIdx(confidences, indices,
-                        CV_SORT_EVERY_COLUMN + CV_SORT_DESCENDING);
+                        SORT_EVERY_COLUMN + SORT_DESCENDING);

                int win_count = min(max_per_image, confidences.rows);
                win_count = min(win_count, stage_neg - bootstrap_count);
@ -209,7 +209,7 @@ void WBDetectorImpl::detect(
    Ptr<CvFeatureEvaluator> eval = CvFeatureEvaluator::create();
    eval->init(params, 1, Size(24, 24));
    boost_.detect(eval, img, scales, bboxes, confidences);
-    assert(confidences.size() == bboxes.size());
+    CV_Assert(confidences.size() == bboxes.size());
 }

 Ptr<WBDetector>