From 392991fa0b82c7d76444522ed9d7c8afb5493123 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.a.alekhin@gmail.com>
Date: Sun, 13 Dec 2020 19:03:11 +0000
Subject: [PATCH 01/10] core(opencl): add version check before
 clCreateFromGLTexture() call

---
 modules/core/include/opencv2/core/ocl.hpp |  5 +++
 modules/core/src/ocl.cpp                  | 39 +++++++++++++++++------
 modules/core/src/opengl.cpp               | 33 +++++++++++++++++++
 3 files changed, 68 insertions(+), 9 deletions(-)
diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp
index 95f0fcdd66..ca46cdc38a 100644
--- a/modules/core/include/opencv2/core/ocl.hpp
+++ b/modules/core/include/opencv2/core/ocl.hpp
@@ -720,7 +720,12 @@ public:
 
     String name() const;
     String vendor() const;
+
+    /// See CL_PLATFORM_VERSION
     String version() const;
+    int versionMajor() const;
+    int versionMinor() const;
+
     int deviceNumber() const;
     void getDevice(Device& device, int d) const;
 
diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp
index 24b18dc7f8..8e5612f422 100644
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@@ -1162,25 +1162,27 @@ Platform& Platform::getDefault()
 
 /////////////////////////////////////// Device ////////////////////////////////////////////
 
-// deviceVersion has format
+// Version has format:
 //   OpenCL<space><major_version.minor_version><space><vendor-specific information>
 // by specification
 //   http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
 //   http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
-static void parseDeviceVersion(const String &deviceVersion, int &major, int &minor)
+//   https://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/clGetPlatformInfo.html
+//   https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clGetPlatformInfo.html
+static void parseOpenCLVersion(const String &version, int &major, int &minor)
 {
     major = minor = 0;
-    if (10 >= deviceVersion.length())
+    if (10 >= version.length())
         return;
-    const char *pstr = deviceVersion.c_str();
+    const char *pstr = version.c_str();
     if (0 != strncmp(pstr, "OpenCL ", 7))
         return;
-    size_t ppos = deviceVersion.find('.', 7);
+    size_t ppos = version.find('.', 7);
     if (String::npos == ppos)
         return;
-    String temp = deviceVersion.substr(7, ppos - 7);
+    String temp = version.substr(7, ppos - 7);
     major = atoi(temp.c_str());
-    temp = deviceVersion.substr(ppos + 1);
+    temp = version.substr(ppos + 1);
     minor = atoi(temp.c_str());
 }
 
@@ -1203,7 +1205,7 @@ struct Device::Impl
         addressBits_ = getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS);
 
         String deviceVersion_ = getStrProp(CL_DEVICE_VERSION);
-        parseDeviceVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_);
+        parseOpenCLVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_);
 
         size_t pos = 0;
         while (pos < extensions_.size())
@@ -5958,6 +5960,9 @@ struct PlatformInfo::Impl
         refcount = 1;
         handle = *(cl_platform_id*)id;
         getDevices(devices, handle);
+
+        version_ = getStrProp(CL_PLATFORM_VERSION);
+        parseOpenCLVersion(version_, versionMajor_, versionMinor_);
     }
 
     String getStrProp(cl_platform_info prop) const
@@ -5971,6 +5976,10 @@ struct PlatformInfo::Impl
     IMPLEMENT_REFCOUNTABLE();
     std::vector<cl_device_id> devices;
     cl_platform_id handle;
+
+    String version_;
+    int versionMajor_;
+    int versionMinor_;
 };
 
 PlatformInfo::PlatformInfo()
@@ -6033,7 +6042,19 @@ String PlatformInfo::vendor() const
 
 String PlatformInfo::version() const
 {
-    return p ? p->getStrProp(CL_PLATFORM_VERSION) : String();
+    return p ? p->version_ : String();
+}
+
+int PlatformInfo::versionMajor() const
+{
+    CV_Assert(p);
+    return p->versionMajor_;
+}
+
+int PlatformInfo::versionMinor() const
+{
+    CV_Assert(p);
+    return p->versionMinor_;
 }
 
 static void getPlatforms(std::vector<cl_platform_id>& platforms)
diff --git a/modules/core/src/opengl.cpp b/modules/core/src/opengl.cpp
index dd6f23862b..02f28f5f1a 100644
--- a/modules/core/src/opengl.cpp
+++ b/modules/core/src/opengl.cpp
@@ -1575,6 +1575,7 @@ void cv::ogl::render(const ogl::Arrays& arr, InputArray indices, int mode, Scala
 // CL-GL Interoperability
 
 #ifdef HAVE_OPENCL
+#  include "opencv2/core/opencl/runtime/opencl_core.hpp"
 #  include "opencv2/core/opencl/runtime/opencl_gl.hpp"
 #  ifdef cl_khr_gl_sharing
 #    define HAVE_OPENCL_OPENGL_SHARING
@@ -1595,6 +1596,34 @@ void cv::ogl::render(const ogl::Arrays& arr, InputArray indices, int mode, Scala
 
 namespace cv { namespace ogl {
 
+#if defined(HAVE_OPENCL) && defined(HAVE_OPENGL) && defined(HAVE_OPENCL_OPENGL_SHARING)
+// Check to avoid crash in OpenCL runtime: https://github.com/opencv/opencv/issues/5209
+static void checkOpenCLVersion()
+{
+    using namespace cv::ocl;
+    const Device& device = Device::getDefault();
+    //CV_Assert(!device.empty());
+    cl_device_id dev = (cl_device_id)device.ptr();
+    CV_Assert(dev);
+
+    cl_platform_id platform_id = 0;
+    size_t sz = 0;
+
+    cl_int status = clGetDeviceInfo(dev, CL_DEVICE_PLATFORM, sizeof(platform_id), &platform_id, &sz);
+    CV_Assert(status == CL_SUCCESS && sz == sizeof(cl_platform_id));
+    CV_Assert(platform_id);
+
+    PlatformInfo pi(&platform_id);
+    int versionMajor = pi.versionMajor();
+    int versionMinor = pi.versionMinor();
+    if (versionMajor < 1 || (versionMajor == 1 && versionMinor <= 1))
+        CV_Error_(cv::Error::OpenCLApiCallError,
+            ("OpenCL: clCreateFromGLTexture requires OpenCL 1.2+ version: %d.%d - %s (%s)",
+                versionMajor, versionMinor, pi.name().c_str(), pi.version().c_str())
+        );
+}
+#endif
+
 namespace ocl {
 
 Context& initializeContextFromGL()
@@ -1714,6 +1743,8 @@ void convertToGLTexture2D(InputArray src, Texture2D& texture)
     Context& ctx = Context::getDefault();
     cl_context context = (cl_context)ctx.ptr();
 
+    checkOpenCLVersion();  // clCreateFromGLTexture requires OpenCL 1.2
+
     UMat u = src.getUMat();
 
     // TODO Add support for roi
@@ -1772,6 +1803,8 @@ void convertFromGLTexture2D(const Texture2D& texture, OutputArray dst)
     Context& ctx = Context::getDefault();
     cl_context context = (cl_context)ctx.ptr();
 
+    checkOpenCLVersion();  // clCreateFromGLTexture requires OpenCL 1.2
+
     // TODO Need to specify ACCESS_WRITE here somehow to prevent useless data copying!
     dst.create(texture.size(), textureType);
     UMat u = dst.getUMat();

From b01ae2de055f90a2c6b9ecdb3cb2d0adf1ff29b7 Mon Sep 17 00:00:00 2001
From: Pavel Grunt <pavel.grunt@innovatrics.com>
Date: Mon, 14 Dec 2020 12:02:25 +0100
Subject: [PATCH 02/10] Fix a typo s/VERISON/VERSION/

---
 doc/js_tutorials/js_setup/js_usage/js_usage.markdown | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/js_tutorials/js_setup/js_usage/js_usage.markdown b/doc/js_tutorials/js_setup/js_usage/js_usage.markdown
index 5f9f338f2d..80f7f7c21e 100644
--- a/doc/js_tutorials/js_setup/js_usage/js_usage.markdown
+++ b/doc/js_tutorials/js_setup/js_usage/js_usage.markdown
@@ -4,7 +4,7 @@ Using OpenCV.js {#tutorial_js_usage}
 Steps
 -----
 
-In this tutorial, you will learn how to include and start to use `opencv.js` inside a web page. You can get a copy of `opencv.js` from `opencv-{VERSION_NUMBER}-docs.zip` in each [release](https://github.com/opencv/opencv/releases), or simply download the prebuilt script from the online documentations at "https://docs.opencv.org/{VERISON_NUMBER}/opencv.js" (For example, [https://docs.opencv.org/3.4.0/opencv.js](https://docs.opencv.org/3.4.0/opencv.js). Use `master` if you want the latest build). You can also build your own copy by following the tutorial on Build Opencv.js.
+In this tutorial, you will learn how to include and start to use `opencv.js` inside a web page. You can get a copy of `opencv.js` from `opencv-{VERSION_NUMBER}-docs.zip` in each [release](https://github.com/opencv/opencv/releases), or simply download the prebuilt script from the online documentations at "https://docs.opencv.org/{VERSION_NUMBER}/opencv.js" (For example, [https://docs.opencv.org/3.4.0/opencv.js](https://docs.opencv.org/3.4.0/opencv.js). Use `master` if you want the latest build). You can also build your own copy by following the tutorial on Build Opencv.js.
 
 ### Create a web page
 

From 48d9031efbbb91f7f8eb82bf4747e55ef96c5fdb Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.a.alekhin@gmail.com>
Date: Mon, 14 Dec 2020 18:29:52 +0000
Subject: [PATCH 03/10] videoio(test): add FFmpeg backend check

- configure through OPENCV_TEST_VIDEOIO_BACKEND_REQUIRE_FFMPEG environment variable
---
 modules/videoio/test/test_main.cpp | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/modules/videoio/test/test_main.cpp b/modules/videoio/test/test_main.cpp
index 4eb2abd8f8..889c7987cd 100644
--- a/modules/videoio/test/test_main.cpp
+++ b/modules/videoio/test/test_main.cpp
@@ -2,5 +2,20 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 #include "test_precomp.hpp"
+#include <opencv2/core/utils/logger.hpp>
 
-CV_TEST_MAIN("highgui")
+static
+void initTests()
+{
+#ifndef WINRT  // missing getenv
+    const std::vector<cv::VideoCaptureAPIs> backends = cv::videoio_registry::getStreamBackends();
+    const char* requireFFmpeg = getenv("OPENCV_TEST_VIDEOIO_BACKEND_REQUIRE_FFMPEG");
+    if (requireFFmpeg && !isBackendAvailable(cv::CAP_FFMPEG, backends))
+    {
+        CV_LOG_FATAL(NULL, "OpenCV-Test: required FFmpeg backend is not available (broken plugin?). STOP.");
+        exit(1);
+    }
+#endif
+}
+
+CV_TEST_MAIN("highgui", initTests())

From 4b3d2c88349ae6dff5741c312927fcd0243f612d Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.a.alekhin@gmail.com>
Date: Tue, 15 Dec 2020 00:41:35 +0000
Subject: [PATCH 04/10] dnn(ocl): fix gemm kernels with beta=0

- dst is not initialized, may include NaN values
- 0*NaN produces NaN
---
 modules/core/src/ocl.cpp                      | 11 ++-
 .../dnn/src/ocl4dnn/src/math_functions.cpp    | 78 +++++++++++--------
 modules/dnn/src/opencl/gemm_buffer.cl         | 76 ++++++++++++++----
 modules/dnn/test/test_onnx_importer.cpp       |  3 -
 4 files changed, 117 insertions(+), 51 deletions(-)

diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp
index 24b18dc7f8..781dad7aea 100644
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@@ -2949,6 +2949,15 @@ bool Kernel::empty() const
     return ptr() == 0;
 }
 
+static cv::String dumpValue(size_t sz, const void* p)
+{
+    if (sz == 4)
+        return cv::format("%d / %uu / 0x%08x / %g", *(int*)p, *(int*)p, *(int*)p, *(float*)p);
+    if (sz == 8)
+        return cv::format("%lld / %lluu / 0x%16llx / %g", *(long long*)p, *(long long*)p, *(long long*)p, *(double*)p);
+    return cv::format("%p", p);
+}
+
 int Kernel::set(int i, const void* value, size_t sz)
 {
     if (!p || !p->handle)
@@ -2959,7 +2968,7 @@ int Kernel::set(int i, const void* value, size_t sz)
         p->cleanupUMats();
 
     cl_int retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value);
-    CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, value=%p)", p->name.c_str(), (int)i, (int)sz, (void*)value).c_str());
+    CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, value=%s)", p->name.c_str(), (int)i, (int)sz, dumpValue(sz, value).c_str()).c_str());
     if (retval != CL_SUCCESS)
         return -1;
     return i+1;
diff --git a/modules/dnn/src/ocl4dnn/src/math_functions.cpp b/modules/dnn/src/ocl4dnn/src/math_functions.cpp
index 47224c3be6..e26a3c3f06 100644
--- a/modules/dnn/src/ocl4dnn/src/math_functions.cpp
+++ b/modules/dnn/src/ocl4dnn/src/math_functions.cpp
@@ -88,13 +88,13 @@ ocl::Image2D ocl4dnnGEMMCopyBufferToImage(UMat buffer, int offset,
             size_t global_copy[2];
             global_copy[0] = width;
             global_copy[1] = height;
-            oclk_gemm_copy.set(0, ocl::KernelArg::PtrReadOnly(buffer));
-            oclk_gemm_copy.set(1, image);
-            oclk_gemm_copy.set(2, offset);
-            oclk_gemm_copy.set(3, width);
-            oclk_gemm_copy.set(4, height);
-            oclk_gemm_copy.set(5, ld);
-            oclk_gemm_copy.run(2, global_copy, NULL, false);
+            oclk_gemm_copy
+                .args(
+                    ocl::KernelArg::PtrReadOnly(buffer),
+                    image, offset,
+                    width, height,
+                    ld)
+                .run(2, global_copy, NULL, false);
         }
     } else {
         if (!padding)
@@ -112,13 +112,13 @@ ocl::Image2D ocl4dnnGEMMCopyBufferToImage(UMat buffer, int offset,
             global_copy[0] = padded_width;
             global_copy[1] = padded_height;
 
-            oclk_gemm_copy.set(0, ocl::KernelArg::PtrReadOnly(buffer));
-            oclk_gemm_copy.set(1, image);
-            oclk_gemm_copy.set(2, offset);
-            oclk_gemm_copy.set(3, width);
-            oclk_gemm_copy.set(4, height);
-            oclk_gemm_copy.set(5, ld);
-
+            oclk_gemm_copy
+                .args(
+                    ocl::KernelArg::PtrReadOnly(buffer),
+                    image, offset,
+                    width, height,
+                    ld)
+                .run(2, global_copy, NULL, false);
             oclk_gemm_copy.run(2, global_copy, NULL, false);
         }
     }
@@ -465,8 +465,12 @@ static bool ocl4dnnFastBufferGEMM(const CBLAS_TRANSPOSE TransA,
         kernel_name += "_float";
     }
 
+    bool isBetaZero = beta == 0;
+
     String opts = format("-DTYPE=%d", halfPrecisionMode ? TYPE_HALF : TYPE_FLOAT);
-    ocl::Kernel oclk_gemm_float(kernel_name.c_str(), ocl::dnn::gemm_buffer_oclsrc, opts);
+    if (isBetaZero)
+        opts += " -DZERO_BETA=1";
+
     size_t local[2] = {};
     size_t global[2] = {};
     if (TransA == CblasNoTrans && TransB != CblasNoTrans && is_small_batch) {
@@ -496,27 +500,37 @@ static bool ocl4dnnFastBufferGEMM(const CBLAS_TRANSPOSE TransA,
         local[1] = ly;
     }
 
-    int arg_idx = 0;
-    oclk_gemm_float.set(arg_idx++, ocl::KernelArg::PtrReadOnly(A));
-    oclk_gemm_float.set(arg_idx++, offA);
-    oclk_gemm_float.set(arg_idx++, ocl::KernelArg::PtrReadOnly(B));
-    oclk_gemm_float.set(arg_idx++, offB);
-    oclk_gemm_float.set(arg_idx++, ocl::KernelArg::PtrWriteOnly(C));
-    oclk_gemm_float.set(arg_idx++, offC);
-    oclk_gemm_float.set(arg_idx++, M);
-    oclk_gemm_float.set(arg_idx++, N);
-    oclk_gemm_float.set(arg_idx++, K);
-    oclk_gemm_float.set(arg_idx++, (float)alpha);
-    oclk_gemm_float.set(arg_idx++, (float)beta);
-
     bool ret = true;
-    if (TransB == CblasNoTrans || TransA != CblasNoTrans) {
+    if (TransB == CblasNoTrans || TransA != CblasNoTrans)
+    {
+        // _NN_
         int stride = 256;
         for (int start_index = 0; start_index < K; start_index += stride) {
-            oclk_gemm_float.set(arg_idx, start_index);
-            ret = oclk_gemm_float.run(2, global, local, false);
+            ocl::Kernel oclk_gemm_float(kernel_name.c_str(), ocl::dnn::gemm_buffer_oclsrc, opts);
+            oclk_gemm_float.args(
+                ocl::KernelArg::PtrReadOnly(A), offA,
+                ocl::KernelArg::PtrReadOnly(B), offB,
+                isBetaZero ? ocl::KernelArg::PtrWriteOnly(C) : ocl::KernelArg::PtrReadWrite(C), offC,
+                M, N, K,
+                (float)alpha, (float)beta,
+                start_index
+            );
+            ret &= oclk_gemm_float.run(2, global, local, false);
         }
-    } else {
+    }
+    else
+    {
+        // _NT_
+        //C.reshape(1,1).setTo(0xfe00 /*FP16 NAN*/);  // stable one-line reproducer for https://github.com/opencv/opencv/issues/18937
+        //C.reshape(1,1).setTo(0);  // non-optimal fixup (and not accurate)
+        ocl::Kernel oclk_gemm_float(kernel_name.c_str(), ocl::dnn::gemm_buffer_oclsrc, opts);
+        oclk_gemm_float.args(
+            ocl::KernelArg::PtrReadOnly(A), offA,
+            ocl::KernelArg::PtrReadOnly(B), offB,
+            isBetaZero ? ocl::KernelArg::PtrWriteOnly(C) : ocl::KernelArg::PtrReadWrite(C), offC,
+            M, N, K,
+            (float)alpha, (float)beta
+        );
         ret = oclk_gemm_float.run(2, global, local, false);
     }
     return ret;
diff --git a/modules/dnn/src/opencl/gemm_buffer.cl b/modules/dnn/src/opencl/gemm_buffer.cl
index 8cbc34dde5..b345983aee 100644
--- a/modules/dnn/src/opencl/gemm_buffer.cl
+++ b/modules/dnn/src/opencl/gemm_buffer.cl
@@ -90,6 +90,12 @@
 #pragma OPENCL EXTENSION  cl_intel_subgroups : enable
 #endif
 
+#ifdef ZERO_BETA
+#define BETA_ZERO_CHECK(b0, v)  (b0)
+#else
+#define BETA_ZERO_CHECK(b0, v)  (v)
+#endif
+
 #define VEC_SIZE        4
 #define LWG_HEIGHT      4
 #define TILE_M          8
@@ -143,14 +149,14 @@ __kernel void TEMPLATE(gemm_buffer_NN, Dtype)(
     int row6 = mad24(global_y, TILE_M, 6) < M ? 6 : border;
     int row7 = mad24(global_y, TILE_M, 7) < M ? 7 : border;
 
-    Dtype4 dot00 = (start_index != 0) ? vload4(0, dst_write0) : beta * vload4(0, dst_write0);
-    Dtype4 dot01 = (start_index != 0) ? vload4(0, dst_write0 + 1 * N) : beta * vload4(0, dst_write0 + 1 * N);
-    Dtype4 dot02 = (start_index != 0) ? vload4(0, dst_write0 + 2 * N) : beta * vload4(0, dst_write0 + 2 * N);
-    Dtype4 dot03 = (start_index != 0) ? vload4(0, dst_write0 + 3 * N) : beta * vload4(0, dst_write0 + 3 * N);
-    Dtype4 dot04 = (start_index != 0) ? vload4(0, dst_write0 + 4 * N) : beta * vload4(0, dst_write0 + 4 * N);
-    Dtype4 dot05 = (start_index != 0) ? vload4(0, dst_write0 + 5 * N) : beta * vload4(0, dst_write0 + 5 * N);
-    Dtype4 dot06 = (start_index != 0) ? vload4(0, dst_write0 + 6 * N) : beta * vload4(0, dst_write0 + 6 * N);
-    Dtype4 dot07 = (start_index != 0) ? vload4(0, dst_write0 + 7 * N) : beta * vload4(0, dst_write0 + 7 * N);
+    Dtype4 dot00 = (start_index != 0) ? vload4(0, dst_write0) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0));
+    Dtype4 dot01 = (start_index != 0) ? vload4(0, dst_write0 + 1 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 1 * N));
+    Dtype4 dot02 = (start_index != 0) ? vload4(0, dst_write0 + 2 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 2 * N));
+    Dtype4 dot03 = (start_index != 0) ? vload4(0, dst_write0 + 3 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 3 * N));
+    Dtype4 dot04 = (start_index != 0) ? vload4(0, dst_write0 + 4 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 4 * N));
+    Dtype4 dot05 = (start_index != 0) ? vload4(0, dst_write0 + 5 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 5 * N));
+    Dtype4 dot06 = (start_index != 0) ? vload4(0, dst_write0 + 6 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 6 * N));
+    Dtype4 dot07 = (start_index != 0) ? vload4(0, dst_write0 + 7 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 7 * N));
 
     int end_index = min(start_index + 256, K);
     int w = start_index;
@@ -579,7 +585,7 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)(
     output = (local_x == 5) ? _dot.s5 : output; \
     output = (local_x == 6) ? _dot.s6 : output; \
     output = (local_x == 7) ? _dot.s7 : output; \
-    dst_write0[0] = mad(output, alpha, beta * dst_write0[0]); \
+    dst_write0[0] = BETA_ZERO_CHECK(alpha * output, mad(output, alpha, beta * dst_write0[0])); \
     dst_write0 += N;
 
     if(global_x < N && global_y * 8 < M) {
@@ -765,7 +771,7 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)(
     output = (local_x == 5) ? _dot.s5 : output; \
     output = (local_x == 6) ? _dot.s6 : output; \
     output = (local_x == 7) ? _dot.s7 : output; \
-    dst_write0[0] = mad(output, alpha, beta * dst_write0[0]); \
+    dst_write0[0] = BETA_ZERO_CHECK(alpha * output, mad(output, alpha, beta * dst_write0[0])); \
     dst_write0 += N;
 
     if(global_x < N && global_y * 8 < M) {
@@ -819,8 +825,9 @@ void TEMPLATE(gemm_buffer_NT_M_2_edgerows,Dtype)(
     const Dtype4 b1 = {srca_read1[i*4], srca_read1[(i*4+1)], srca_read1[(i*4+2)], srca_read1[(i*4+3)]};
 #pragma unroll
     for(int j = 0; j < rows; ++j) {
-      dot0[j] += b0 * vload4(i, srcb_read + j * K);
-      dot1[j] += b1 * vload4(i, srcb_read + j * K);
+      Dtype4 a = vload4(i, srcb_read + j * K);
+      dot0[j] += b0 * a;
+      dot1[j] += b1 * a;
     }
 
     i += get_local_size(0);
@@ -859,11 +866,19 @@ void TEMPLATE(gemm_buffer_NT_M_2_edgerows,Dtype)(
     }
   }
 
+  barrier(CLK_LOCAL_MEM_FENCE);
   if(lid == 0) {
 #pragma unroll
     for(int j = 0; j < rows; ++j) {
-      dstc0[(x_gid * 4  + j)] = alpha * work_each0[j] + beta * dstc0[(x_gid * 4 + j)];
-      dstc1[(x_gid * 4  + j)] = alpha * work_each1[j] + beta * dstc1[(x_gid * 4 + j)];
+#ifdef ZERO_BETA
+      Dtype a0 = alpha * work_each0[j];
+      Dtype a1 = alpha * work_each1[j];
+#else
+      Dtype a0 = alpha * work_each0[j] + beta * dstc0[(x_gid * 4 + j)];
+      Dtype a1 = alpha * work_each1[j] + beta * dstc1[(x_gid * 4 + j)];
+#endif
+      dstc0[(x_gid * 4  + j)] = a0;
+      dstc1[(x_gid * 4  + j)] = a1;
     }
   }
 }
@@ -952,9 +967,15 @@ __kernel void TEMPLATE(gemm_buffer_NT_M_2,Dtype)(
       }
     }
 
-    if(lid == 0) {
+    if(lid == 0)
+    {
+#ifdef ZERO_BETA
+      dstc0[x_gid] = alpha * work0[0];
+      dstc1[x_gid] = alpha * work1[0];
+#else
       dstc0[x_gid] = alpha * work0[0] + beta * dstc0[x_gid];
       dstc1[x_gid] = alpha * work1[0] + beta * dstc1[x_gid];
+#endif
     }
   }
 }
@@ -1058,10 +1079,17 @@ void TEMPLATE(gemm_buffer_NT_M_4_edgerows,Dtype)(
   if(lid == 0) {
 #pragma unroll
     for(int j = 0; j < rows; ++j) {
+#ifdef ZERO_BETA
+      dstc0[(x_gid * 4  + j)] = alpha * work_each0[j];
+      dstc1[(x_gid * 4  + j)] = alpha * work_each1[j];
+      dstc2[(x_gid * 4  + j)] = alpha * work_each2[j];
+      dstc3[(x_gid * 4  + j)] = alpha * work_each3[j];
+#else
       dstc0[(x_gid * 4  + j)] = alpha * work_each0[j] + beta * dstc0[(x_gid * 4 + j)];
       dstc1[(x_gid * 4  + j)] = alpha * work_each1[j] + beta * dstc1[(x_gid * 4 + j)];
       dstc2[(x_gid * 4  + j)] = alpha * work_each2[j] + beta * dstc2[(x_gid * 4 + j)];
       dstc3[(x_gid * 4  + j)] = alpha * work_each3[j] + beta * dstc3[(x_gid * 4 + j)];
+#endif
     }
   }
 }
@@ -1179,10 +1207,17 @@ __kernel void TEMPLATE(gemm_buffer_NT_M_4,Dtype)(
     }
 
     if(lid == 0) {
+#ifdef ZERO_BETA
+      dstc0[x_gid] = alpha * work0[0];
+      dstc1[x_gid] = alpha * work1[0];
+      dstc2[x_gid] = alpha * work2[0];
+      dstc3[x_gid] = alpha * work3[0];
+#else
       dstc0[x_gid] = alpha * work0[0] + beta * dstc0[x_gid];
       dstc1[x_gid] = alpha * work1[0] + beta * dstc1[x_gid];
       dstc2[x_gid] = alpha * work2[0] + beta * dstc2[x_gid];
       dstc3[x_gid] = alpha * work3[0] + beta * dstc3[x_gid];
+#endif
     }
   }
 }
@@ -1320,6 +1355,16 @@ __kernel void TEMPLATE(gemm_buffer_NT_M_8,Dtype)(
   }
 
   if(lid == 0) {
+#ifdef ZERO_BETA
+    dstc0[x_gid] = alpha * work0[0];
+    dstc1[x_gid] = alpha * work1[0];
+    dstc2[x_gid] = alpha * work2[0];
+    dstc3[x_gid] = alpha * work3[0];
+    dstc4[x_gid] = alpha * work4[0];
+    dstc5[x_gid] = alpha * work5[0];
+    dstc6[x_gid] = alpha * work6[0];
+    dstc7[x_gid] = alpha * work7[0];
+#else
     dstc0[x_gid] = alpha * work0[0] + beta * dstc0[x_gid];
     dstc1[x_gid] = alpha * work1[0] + beta * dstc1[x_gid];
     dstc2[x_gid] = alpha * work2[0] + beta * dstc2[x_gid];
@@ -1328,6 +1373,7 @@ __kernel void TEMPLATE(gemm_buffer_NT_M_8,Dtype)(
     dstc5[x_gid] = alpha * work5[0] + beta * dstc5[x_gid];
     dstc6[x_gid] = alpha * work6[0] + beta * dstc6[x_gid];
     dstc7[x_gid] = alpha * work7[0] + beta * dstc7[x_gid];
+#endif
   }
 }
 #undef SLM_SIZE
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
index 9ba10d4b47..f38ca6700f 100644
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -718,9 +718,6 @@ TEST_P(Test_ONNX_layers, Conv1d_variable_weight_bias)
 
 TEST_P(Test_ONNX_layers, GatherMultiOutput)
 {
-    if (cvtest::skipUnstableTests && backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
-        throw SkipTestException("Skip unstable test: https://github.com/opencv/opencv/issues/18937");
-
 #if defined(INF_ENGINE_RELEASE)
     if (target == DNN_TARGET_MYRIAD)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE);

From c240355cc6cd56083b064866d37ff437ed0eeef3 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.a.alekhin@gmail.com>
Date: Tue, 15 Dec 2020 01:34:20 +0000
Subject: [PATCH 05/10] dnn(ocl): avoid mess FP16/FP32 in convolution layer

---
 modules/core/src/convert.dispatch.cpp         |  2 +-
 modules/core/src/opencl/halfconvert.cl        | 13 ++-
 modules/dnn/src/layers/convolution_layer.cpp  | 26 +++---
 modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp   |  2 -
 .../src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp  | 88 ++++++++++---------
 modules/dnn/src/opencl/conv_spatial_helper.cl |  7 +-
 6 files changed, 77 insertions(+), 61 deletions(-)

diff --git a/modules/core/src/convert.dispatch.cpp b/modules/core/src/convert.dispatch.cpp
index 4d396b5e99..c32c5bb420 100644
--- a/modules/core/src/convert.dispatch.cpp
+++ b/modules/core/src/convert.dispatch.cpp
@@ -138,7 +138,7 @@ static bool ocl_convertFp16( InputArray _src, OutputArray _dst, int sdepth, int
                               sdepth == CV_32F ? "half" : "float",
                               rowsPerWI,
                               sdepth == CV_32F ? " -D FLOAT_TO_HALF " : "");
-    ocl::Kernel k("convertFp16", ocl::core::halfconvert_oclsrc, build_opt);
+    ocl::Kernel k(sdepth == CV_32F ? "convertFp16_FP32_to_FP16" : "convertFp16_FP16_to_FP32", ocl::core::halfconvert_oclsrc, build_opt);
     if (k.empty())
         return false;
 
diff --git a/modules/core/src/opencl/halfconvert.cl b/modules/core/src/opencl/halfconvert.cl
index 506df69faf..9df602f406 100644
--- a/modules/core/src/opencl/halfconvert.cl
+++ b/modules/core/src/opencl/halfconvert.cl
@@ -47,8 +47,17 @@
 #endif
 #endif
 
-__kernel void convertFp16(__global const uchar * srcptr, int src_step, int src_offset,
-                          __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols)
+__kernel void
+#ifdef FLOAT_TO_HALF
+    convertFp16_FP32_to_FP16
+#else
+    convertFp16_FP16_to_FP32
+#endif
+(
+    __global const uchar * srcptr, int src_step, int src_offset,
+    __global uchar * dstptr, int dst_step, int dst_offset,
+    int dst_rows, int dst_cols
+)
 {
     int x = get_global_id(0);
     int y0 = get_global_id(1) * rowsPerWI;
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
index 63bd386119..f131c023ab 100644
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -1461,16 +1461,7 @@ public:
             umat_blobs.resize(n);
             for (size_t i = 0; i < n; i++)
             {
-                if (use_half)
-                {
-                    Mat matFP32;
-                    convertFp16(inputs[i + 1], matFP32);
-                    matFP32.copyTo(umat_blobs[i]);
-                }
-                else
-                {
-                    inputs[i + 1].copyTo(umat_blobs[i]);
-                }
+                inputs[i + 1].copyTo(umat_blobs[i]);
             }
             inputs.resize(1);
         }
@@ -1481,7 +1472,10 @@ public:
             umat_blobs.resize(n);
             for (size_t i = 0; i < n; i++)
             {
-                blobs[i].copyTo(umat_blobs[i]);
+                if (use_half)
+                    convertFp16(blobs[i], umat_blobs[i]);
+                else
+                    blobs[i].copyTo(umat_blobs[i]);
             }
         }
 
@@ -1537,14 +1531,20 @@ public:
 
         if (fusedWeights)
         {
-            weightsMat.copyTo(umat_blobs[0]);
+            if (use_half)
+                convertFp16(weightsMat, umat_blobs[0]);
+            else
+                weightsMat.copyTo(umat_blobs[0]);
             fusedWeights = false;
         }
         if (fusedBias)
         {
             if ( umat_blobs.size() < 2 )
                 umat_blobs.resize(2);
-            umat_blobs[1] = UMat(biasvec, true);
+            if (use_half)
+                convertFp16(Mat(biasvec, true), umat_blobs[1]);
+            else
+                Mat(biasvec, true).copyTo(umat_blobs[1]);
             convolutionOp->setBias(true);
             fusedBias = false;
         }
diff --git a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
index 8de7ba26e2..7bb277d102 100644
--- a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
+++ b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
@@ -274,8 +274,6 @@ class OCL4DNNConvSpatial
         int32_t group_;
         bool bias_term_;
         UMat swizzled_weights_umat;
-        UMat weights_half;
-        UMat bias_half;
         UMat bottom_data2_;
 
         int32_t bottom_index_;
diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
index 3707a31846..fd98919343 100644
--- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
+++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
@@ -588,16 +588,16 @@ bool OCL4DNNConvSpatial<Dtype>::Forward(const UMat& bottom,
         fused_eltwise_ = false;
     }
 
-    if (use_half_ && bias_half.empty() && !bias.empty())
-        convertFp16(bias, bias_half);
+    if (use_half_ && !bias.empty())
+        CV_CheckTypeEQ(bias.type(), CV_16SC1, "");
 
-    if (use_half_ && weights_half.empty())
-        convertFp16(weight, weights_half);
+    if (use_half_)
+        CV_CheckTypeEQ(weight.type(), CV_16SC1, "");
 
-    prepareKernel(bottom, top, weight, (use_half_) ? bias_half : bias, numImages);
+    prepareKernel(bottom, top, weight, bias, numImages);
     if (bestKernelConfig.empty())
         return false;
-    return convolve(bottom, top, weight, (use_half_) ? bias_half : bias, numImages, bestKernelConfig);
+    return convolve(bottom, top, weight, bias, numImages, bestKernelConfig);
 }
 
 template<typename Dtype>
@@ -744,29 +744,26 @@ bool OCL4DNNConvSpatial<Dtype>::swizzleWeight(const UMat &weight,
                                      kernel_h_ * (int)alignSize(kernel_w_, 2),
                                      (use_half_) ? CV_16SC1 : CV_32FC1);
 
-    UMat swizzled_weights_tmp;
-    if (use_half_)
-        swizzled_weights_tmp.create(shape(swizzled_weights_umat), CV_32F);
-
     if (!interleave) {
-        cl_uint argIdx = 0;
         int32_t channels = channels_ / group_;
 
-        ocl::Kernel oclk_copy_weight(CL_KERNEL_SELECT("copyWeightsSwizzled"),
-                                     cv::ocl::dnn::conv_spatial_helper_oclsrc);
+        ocl::Kernel oclk_copy_weight(
+            use_half_ ? "copyWeightsSwizzled_half" : "copyWeightsSwizzled_float",
+            cv::ocl::dnn::conv_spatial_helper_oclsrc,
+            use_half_ ? "-DHALF_SUPPORT=1 -DDtype=half" : "-DDtype=float"
+        );
         if (oclk_copy_weight.empty())
             return false;
 
-        oclk_copy_weight.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight));
-        if (use_half_)
-            oclk_copy_weight.set(argIdx++, ocl::KernelArg::PtrWriteOnly(swizzled_weights_tmp));
-        else
-            oclk_copy_weight.set(argIdx++, ocl::KernelArg::PtrWriteOnly(swizzled_weights_umat));
-        oclk_copy_weight.set(argIdx++, kernel_w_);
-        oclk_copy_weight.set(argIdx++, kernel_h_);
-        oclk_copy_weight.set(argIdx++, channels);
-        oclk_copy_weight.set(argIdx++, num_output_);
-        oclk_copy_weight.set(argIdx++, swizzled_factor);
+        oclk_copy_weight.args(
+            ocl::KernelArg::PtrReadOnly(weight),
+            ocl::KernelArg::PtrWriteOnly(swizzled_weights_umat),
+            kernel_w_,
+            kernel_h_,
+            channels,
+            num_output_,
+            swizzled_factor
+        );
 
         size_t global_work_size_copy[3] = {
             (size_t) (alignSize(num_output_, swizzled_factor) * channels * kernel_w_ * kernel_h_), 1, 1 };
@@ -778,13 +775,24 @@ bool OCL4DNNConvSpatial<Dtype>::swizzleWeight(const UMat &weight,
         }
     } else {
         // assumption: kernel dimension is 2
-        Mat weightMat = weight.getMat(ACCESS_READ);
-        Dtype* cpu_weight = (Dtype *)weightMat.ptr<float>();
+        Mat weightMat;
         Mat swizzledWeightMat;
+        UMat weight_tmp; // FP32 in half mode, TODO implement FP16 repack
         if (use_half_)
-            swizzledWeightMat = swizzled_weights_tmp.getMat(ACCESS_WRITE);
+        {
+            CV_CheckTypeEQ(weight.type(), CV_16SC1, "");
+            convertFp16(weight, weight_tmp);
+            weightMat = weight_tmp.getMat(ACCESS_READ);
+            swizzledWeightMat.create(shape(swizzled_weights_umat), CV_32F);
+        }
         else
+        {
+            weightMat = weight.getMat(ACCESS_READ);
             swizzledWeightMat = swizzled_weights_umat.getMat(ACCESS_WRITE);
+        }
+
+        CV_CheckTypeEQ(weightMat.type(), CV_32FC1, "");
+        Dtype* cpu_weight = (Dtype *)weightMat.ptr<float>();
         Dtype* cpu_swizzled_weight = (Dtype *)swizzledWeightMat.ptr<float>();
 
         int interleavedRows = (kernel_w_ / 2) * 2;
@@ -792,26 +800,28 @@ bool OCL4DNNConvSpatial<Dtype>::swizzleWeight(const UMat &weight,
         int blockWidth = swizzled_factor;  // should equal to simd size.
         int rowAlignment = 32;
         size_t interleaved_filter_size = M_ * kernel_w_ * kernel_h_ * channels_ * sizeof(Dtype);
-        Dtype * tmpSwizzledWeight = reinterpret_cast<Dtype*>(malloc(interleaved_filter_size));
-        CHECK_EQ(tmpSwizzledWeight != NULL, true) << "Failed to allocate temporary swizzled weight";
+        cv::AutoBuffer<Dtype, 0> tmpSwizzledWeight(interleaved_filter_size);
         for (int od = 0; od < M_; od++)
             for (int id = 0; id < channels_; id++)
                 for (int r = 0; r < kernel_h_; r++)
                     for (int c = 0; c < kernel_w_; c++)
                         tmpSwizzledWeight[((id * kernel_h_ + r)* kernel_w_ + c) * M_ + od] =
                             cpu_weight[((od * channels_ + id) * kernel_h_ + r)*kernel_w_+c];
+
         interleaveMatrix(cpu_swizzled_weight,
-                         tmpSwizzledWeight,
+                         tmpSwizzledWeight.data(),
                          kernel_w_ * kernel_h_ * channels_, M_,
                          interleavedRows,
                          nonInterleavedRows,
                          blockWidth,
                          rowAlignment);
-        free(tmpSwizzledWeight);
-    }
 
-    if (use_half_)
-        convertFp16(swizzled_weights_tmp, swizzled_weights_umat);
+        // unmap OpenCL buffers
+        weightMat.release();
+
+        if (use_half_)
+            convertFp16(swizzledWeightMat, swizzled_weights_umat);
+    }
 
     return true;
 }
@@ -1104,10 +1114,7 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
         cl_uint argIdx = 0;
         setFusionArg(fused_activ_, fused_eltwise_, kernel, argIdx);
         kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom));
-        if (use_half_)
-            kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weights_half));
-        else
-            kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight));
+        kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight));
         if (bias_term_)
             kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bias));
         kernel.set(argIdx++, ocl::KernelArg::PtrWriteOnly(top));
@@ -1148,10 +1155,7 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
                 setFusionArg(fused_activ_, fused_eltwise_, kernel, argIdx);
                 kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom));
                 kernel.set(argIdx++, image_offset);
-                if (use_half_)
-                    kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weights_half));
-                else
-                    kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight));
+                kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight));
                 kernel.set(argIdx++, kernel_offset);
                 if (bias_term_)
                     kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bias));
@@ -1956,7 +1960,7 @@ void OCL4DNNConvSpatial<Dtype>::prepareKernel(const UMat &bottom, UMat &top,
 
     UMat benchData(1, numImages * top_dim_, (use_half_) ? CV_16SC1 : CV_32FC1);
 
-    calculateBenchmark(bottom, benchData, (use_half_) ? weights_half : weight, bias, numImages);
+    calculateBenchmark(bottom, benchData, weight, bias, numImages);
 
     if (run_auto_tuning_ || force_auto_tuning_)
     {
diff --git a/modules/dnn/src/opencl/conv_spatial_helper.cl b/modules/dnn/src/opencl/conv_spatial_helper.cl
index 9d5a89f7b1..33d9db57c8 100644
--- a/modules/dnn/src/opencl/conv_spatial_helper.cl
+++ b/modules/dnn/src/opencl/conv_spatial_helper.cl
@@ -39,9 +39,14 @@
 //
 //M*/
 
+#ifdef HALF_SUPPORT
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16:enable
+#endif
+#endif
+
 #define CONCAT(A,B) A##_##B
 #define TEMPLATE(name,type) CONCAT(name,type)
-#define Dtype float
 
 __kernel void TEMPLATE(copyWeightsSwizzled, Dtype)
     (__global Dtype* weightIn,

From 085a1318016230b877041e30da539f923e13ef68 Mon Sep 17 00:00:00 2001
From: Ian Maquignaz <9im14@queensu.ca>
Date: Fri, 11 Dec 2020 20:16:40 -0500
Subject: [PATCH 06/10] Applied '@ref' linking for 3.4 Calib3D parameters and
 added enum cv::fisheye::CALIB_ZERO_DISPARITY == cv::CALIB_ZERO_DISPARITY ==
 0x400 == 1 << 10. Fisheye test has been updated to use new enum
 cv::fisheye::CALIB_ZERO_DISPARITY and included CV_StaticAssert(...) to ensure
 cv::CALIB_ZERO_DISPARITY == cv::fisheye::CALIB_ZERO_DISPARITY.

---
 modules/calib3d/include/opencv2/calib3d.hpp | 223 ++++++++++----------
 modules/calib3d/test/test_fisheye.cpp       |   8 +-
 2 files changed, 119 insertions(+), 112 deletions(-)

diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp
index e6cad17ae1..7ec20981be 100644
--- a/modules/calib3d/include/opencv2/calib3d.hpp
+++ b/modules/calib3d/include/opencv2/calib3d.hpp
@@ -567,15 +567,15 @@ or vector\<Point2f\> .
 a vector\<Point2f\> .
 @param method Method used to compute a homography matrix. The following methods are possible:
 -   **0** - a regular method using all the points, i.e., the least squares method
--   **RANSAC** - RANSAC-based robust method
--   **LMEDS** - Least-Median robust method
--   **RHO** - PROSAC-based robust method
+-   @ref RANSAC - RANSAC-based robust method
+-   @ref LMEDS - Least-Median robust method
+-   @ref RHO - PROSAC-based robust method
 @param ransacReprojThreshold Maximum allowed reprojection error to treat a point pair as an inlier
 (used in the RANSAC and RHO methods only). That is, if
 \f[\| \texttt{dstPoints} _i -  \texttt{convertPointsHomogeneous} ( \texttt{H} * \texttt{srcPoints} _i) \|_2  >  \texttt{ransacReprojThreshold}\f]
 then the point \f$i\f$ is considered as an outlier. If srcPoints and dstPoints are measured in pixels,
 it usually makes sense to set this parameter somewhere in the range of 1 to 10.
-@param mask Optional output mask set by a robust method ( RANSAC or LMEDS ). Note that the input
+@param mask Optional output mask set by a robust method ( RANSAC or LMeDS ). Note that the input
 mask values are ignored.
 @param maxIters The maximum number of RANSAC iterations.
 @param confidence Confidence level, between 0 and 1.
@@ -806,37 +806,37 @@ the model coordinate system to the camera coordinate system.
 the provided rvec and tvec values as initial approximations of the rotation and translation
 vectors, respectively, and further optimizes them.
 @param flags Method for solving a PnP problem:
--   **SOLVEPNP_ITERATIVE** Iterative method is based on a Levenberg-Marquardt optimization. In
+-   @ref SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In
 this case the function finds such a pose that minimizes reprojection error, that is the sum
 of squared distances between the observed projections imagePoints and the projected (using
 @ref projectPoints ) objectPoints .
--   **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
+-   @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
 "Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
 In this case the function requires exactly four object and image points.
--   **SOLVEPNP_AP3P** Method is based on the paper of T. Ke, S. Roumeliotis
+-   @ref SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis
 "An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
 In this case the function requires exactly four object and image points.
--   **SOLVEPNP_EPNP** Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the
+-   @ref SOLVEPNP_EPNP Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the
 paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
--   **SOLVEPNP_DLS** **Broken implementation. Using this flag will fallback to EPnP.** \n
+-   @ref SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n
 Method is based on the paper of J. Hesch and S. Roumeliotis.
 "A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
--   **SOLVEPNP_UPNP** **Broken implementation. Using this flag will fallback to EPnP.** \n
+-   @ref SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n
 Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto,
 F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
 Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
 assuming that both have the same value. Then the cameraMatrix is updated with the estimated
 focal length.
--   **SOLVEPNP_IPPE** Method is based on the paper of T. Collins and A. Bartoli.
+-   @ref SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli.
 "Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points.
--   **SOLVEPNP_IPPE_SQUARE** Method is based on the paper of Toby Collins and Adrien Bartoli.
+-   @ref SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli.
 "Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation.
 It requires 4 coplanar object points defined in the following order:
   - point 0: [-squareLength / 2,  squareLength / 2, 0]
   - point 1: [ squareLength / 2,  squareLength / 2, 0]
   - point 2: [ squareLength / 2, -squareLength / 2, 0]
   - point 3: [-squareLength / 2, -squareLength / 2, 0]
--   **SOLVEPNP_SQPNP** Method is based on the paper "A Consistently Fast and Globally Optimal Solution to the
+-   @ref SOLVEPNP_SQPNP Method is based on the paper "A Consistently Fast and Globally Optimal Solution to the
 Perspective-n-Point Problem" by G. Terzakis and M.Lourakis (@cite Terzakis20). It requires 3 or more points.
 
 
@@ -946,23 +946,23 @@ a 3D point expressed in the world frame into the camera frame:
         - Thus, given some data D = np.array(...) where D.shape = (N,M), in order to use a subset of
         it as, e.g., imagePoints, one must effectively copy it into a new array: imagePoints =
         np.ascontiguousarray(D[:,:2]).reshape((N,1,2))
-   -   The methods **SOLVEPNP_DLS** and **SOLVEPNP_UPNP** cannot be used as the current implementations are
+   -   The methods @ref SOLVEPNP_DLS and @ref SOLVEPNP_UPNP cannot be used as the current implementations are
        unstable and sometimes give completely wrong results. If you pass one of these two
-       flags, **SOLVEPNP_EPNP** method will be used instead.
-   -   The minimum number of points is 4 in the general case. In the case of **SOLVEPNP_P3P** and **SOLVEPNP_AP3P**
+       flags, @ref SOLVEPNP_EPNP method will be used instead.
+   -   The minimum number of points is 4 in the general case. In the case of @ref SOLVEPNP_P3P and @ref SOLVEPNP_AP3P
        methods, it is required to use exactly 4 points (the first 3 points are used to estimate all the solutions
        of the P3P problem, the last one is used to retain the best solution that minimizes the reprojection error).
-   -   With **SOLVEPNP_ITERATIVE** method and `useExtrinsicGuess=true`, the minimum number of points is 3 (3 points
+   -   With @ref SOLVEPNP_ITERATIVE method and `useExtrinsicGuess=true`, the minimum number of points is 3 (3 points
        are sufficient to compute a pose but there are up to 4 solutions). The initial solution should be close to the
        global solution to converge.
-   -   With **SOLVEPNP_IPPE** input points must be >= 4 and object points must be coplanar.
-   -   With **SOLVEPNP_IPPE_SQUARE** this is a special case suitable for marker pose estimation.
+   -   With @ref SOLVEPNP_IPPE input points must be >= 4 and object points must be coplanar.
+   -   With @ref SOLVEPNP_IPPE_SQUARE this is a special case suitable for marker pose estimation.
        Number of input points must be 4. Object points must be defined in the following order:
          - point 0: [-squareLength / 2,  squareLength / 2, 0]
          - point 1: [ squareLength / 2,  squareLength / 2, 0]
          - point 2: [ squareLength / 2, -squareLength / 2, 0]
          - point 3: [-squareLength / 2, -squareLength / 2, 0]
-    -  With **SOLVEPNP_SQPNP** input points must be >= 3
+    -  With @ref SOLVEPNP_SQPNP input points must be >= 3
  */
 CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints,
                             InputArray cameraMatrix, InputArray distCoeffs,
@@ -1031,9 +1031,9 @@ assumed.
 the model coordinate system to the camera coordinate system. A P3P problem has up to 4 solutions.
 @param tvecs Output translation vectors.
 @param flags Method for solving a P3P problem:
--   **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
+-   @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
 "Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
--   **SOLVEPNP_AP3P** Method is based on the paper of T. Ke and S. Roumeliotis.
+-   @ref SOLVEPNP_AP3P Method is based on the paper of T. Ke and S. Roumeliotis.
 "An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
 
 The function estimates the object pose given 3 object points, their corresponding image
@@ -1133,39 +1133,39 @@ the model coordinate system to the camera coordinate system.
 the provided rvec and tvec values as initial approximations of the rotation and translation
 vectors, respectively, and further optimizes them.
 @param flags Method for solving a PnP problem:
--   **SOLVEPNP_ITERATIVE** Iterative method is based on a Levenberg-Marquardt optimization. In
+-   @ref SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In
 this case the function finds such a pose that minimizes reprojection error, that is the sum
 of squared distances between the observed projections imagePoints and the projected (using
 projectPoints ) objectPoints .
--   **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
+-   @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
 "Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
 In this case the function requires exactly four object and image points.
--   **SOLVEPNP_AP3P** Method is based on the paper of T. Ke, S. Roumeliotis
+-   @ref SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis
 "An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
 In this case the function requires exactly four object and image points.
--   **SOLVEPNP_EPNP** Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the
+-   @ref SOLVEPNP_EPNP Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the
 paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
--   **SOLVEPNP_DLS** **Broken implementation. Using this flag will fallback to EPnP.** \n
+-   @ref SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n
 Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis.
 "A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
--   **SOLVEPNP_UPNP** **Broken implementation. Using this flag will fallback to EPnP.** \n
+-   @ref SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n
 Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto,
 F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
 Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
 assuming that both have the same value. Then the cameraMatrix is updated with the estimated
 focal length.
--   **SOLVEPNP_IPPE** Method is based on the paper of T. Collins and A. Bartoli.
+-   @ref SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli.
 "Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points.
--   **SOLVEPNP_IPPE_SQUARE** Method is based on the paper of Toby Collins and Adrien Bartoli.
+-   @ref SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli.
 "Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation.
 It requires 4 coplanar object points defined in the following order:
   - point 0: [-squareLength / 2,  squareLength / 2, 0]
   - point 1: [ squareLength / 2,  squareLength / 2, 0]
   - point 2: [ squareLength / 2, -squareLength / 2, 0]
   - point 3: [-squareLength / 2, -squareLength / 2, 0]
-@param rvec Rotation vector used to initialize an iterative PnP refinement algorithm, when flag is SOLVEPNP_ITERATIVE
+@param rvec Rotation vector used to initialize an iterative PnP refinement algorithm, when flag is @ref SOLVEPNP_ITERATIVE
 and useExtrinsicGuess is set to true.
-@param tvec Translation vector used to initialize an iterative PnP refinement algorithm, when flag is SOLVEPNP_ITERATIVE
+@param tvec Translation vector used to initialize an iterative PnP refinement algorithm, when flag is @ref SOLVEPNP_ITERATIVE
 and useExtrinsicGuess is set to true.
 @param reprojectionError Optional vector of reprojection error, that is the RMS error
 (\f$ \text{RMSE} = \sqrt{\frac{\sum_{i}^{N} \left ( \hat{y_i} - y_i \right )^2}{N}} \f$) between the input image points
@@ -1277,17 +1277,17 @@ a 3D point expressed in the world frame into the camera frame:
         - Thus, given some data D = np.array(...) where D.shape = (N,M), in order to use a subset of
         it as, e.g., imagePoints, one must effectively copy it into a new array: imagePoints =
         np.ascontiguousarray(D[:,:2]).reshape((N,1,2))
-   -   The methods **SOLVEPNP_DLS** and **SOLVEPNP_UPNP** cannot be used as the current implementations are
+   -   The methods @ref SOLVEPNP_DLS and @ref SOLVEPNP_UPNP cannot be used as the current implementations are
        unstable and sometimes give completely wrong results. If you pass one of these two
-       flags, **SOLVEPNP_EPNP** method will be used instead.
-   -   The minimum number of points is 4 in the general case. In the case of **SOLVEPNP_P3P** and **SOLVEPNP_AP3P**
+       flags, @ref SOLVEPNP_EPNP method will be used instead.
+   -   The minimum number of points is 4 in the general case. In the case of @ref SOLVEPNP_P3P and @ref SOLVEPNP_AP3P
        methods, it is required to use exactly 4 points (the first 3 points are used to estimate all the solutions
        of the P3P problem, the last one is used to retain the best solution that minimizes the reprojection error).
-   -   With **SOLVEPNP_ITERATIVE** method and `useExtrinsicGuess=true`, the minimum number of points is 3 (3 points
+   -   With @ref SOLVEPNP_ITERATIVE method and `useExtrinsicGuess=true`, the minimum number of points is 3 (3 points
        are sufficient to compute a pose but there are up to 4 solutions). The initial solution should be close to the
        global solution to converge.
-   -   With **SOLVEPNP_IPPE** input points must be >= 4 and object points must be coplanar.
-   -   With **SOLVEPNP_IPPE_SQUARE** this is a special case suitable for marker pose estimation.
+   -   With @ref SOLVEPNP_IPPE input points must be >= 4 and object points must be coplanar.
+   -   With @ref SOLVEPNP_IPPE_SQUARE this is a special case suitable for marker pose estimation.
        Number of input points must be 4. Object points must be defined in the following order:
          - point 0: [-squareLength / 2,  squareLength / 2, 0]
          - point 1: [ squareLength / 2,  squareLength / 2, 0]
@@ -1327,13 +1327,13 @@ CV_EXPORTS_W Mat initCameraMatrix2D( InputArrayOfArrays objectPoints,
 ( patternSize = cvSize(points_per_row,points_per_colum) = cvSize(columns,rows) ).
 @param corners Output array of detected corners.
 @param flags Various operation flags that can be zero or a combination of the following values:
--   **CALIB_CB_ADAPTIVE_THRESH** Use adaptive thresholding to convert the image to black
+-   @ref CALIB_CB_ADAPTIVE_THRESH Use adaptive thresholding to convert the image to black
 and white, rather than a fixed threshold level (computed from the average image brightness).
--   **CALIB_CB_NORMALIZE_IMAGE** Normalize the image gamma with equalizeHist before
+-   @ref CALIB_CB_NORMALIZE_IMAGE Normalize the image gamma with equalizeHist before
 applying fixed or adaptive thresholding.
--   **CALIB_CB_FILTER_QUADS** Use additional criteria (like contour area, perimeter,
+-   @ref CALIB_CB_FILTER_QUADS Use additional criteria (like contour area, perimeter,
 square-like shape) to filter out false quads extracted at the contour retrieval stage.
--   **CALIB_CB_FAST_CHECK** Run a fast check on the image that looks for chessboard corners,
+-   @ref CALIB_CB_FAST_CHECK Run a fast check on the image that looks for chessboard corners,
 and shortcut the call if none is found. This can drastically speed up the call in the
 degenerate condition when no chessboard is observed.
 
@@ -1448,9 +1448,9 @@ struct CV_EXPORTS_W_SIMPLE CirclesGridFinderParameters2 : public CirclesGridFind
 ( patternSize = Size(points_per_row, points_per_colum) ).
 @param centers output array of detected centers.
 @param flags various operation flags that can be one of the following values:
--   **CALIB_CB_SYMMETRIC_GRID** uses symmetric pattern of circles.
--   **CALIB_CB_ASYMMETRIC_GRID** uses asymmetric pattern of circles.
--   **CALIB_CB_CLUSTERING** uses a special algorithm for grid detection. It is more robust to
+-   @ref CALIB_CB_SYMMETRIC_GRID uses symmetric pattern of circles.
+-   @ref CALIB_CB_ASYMMETRIC_GRID uses asymmetric pattern of circles.
+-   @ref CALIB_CB_CLUSTERING uses a special algorithm for grid detection. It is more robust to
 perspective distortions but much more sensitive to background clutter.
 @param blobDetector feature detector that finds blobs like dark circles on light background.
                     If `blobDetector` is NULL then `image` represents Point2f array of candidates.
@@ -1464,7 +1464,7 @@ row). Otherwise, if the function fails to find all the corners or reorder them,
 Sample usage of detecting and drawing the centers of circles: :
 @code
     Size patternsize(7,7); //number of centers
-    Mat gray = ....; //source image
+    Mat gray = ...; //source image
     vector<Point2f> centers; //this will be filled by the detected centers
 
     bool patternfound = findCirclesGrid(gray, patternsize, centers);
@@ -1509,8 +1509,8 @@ respectively. In the old interface all the vectors of object points from differe
 concatenated together.
 @param imageSize Size of the image used only to initialize the camera intrinsic matrix.
 @param cameraMatrix Input/output 3x3 floating-point camera intrinsic matrix
-\f$\cameramatrix{A}\f$ . If CV\_CALIB\_USE\_INTRINSIC\_GUESS
-and/or CALIB_FIX_ASPECT_RATIO are specified, some or all of fx, fy, cx, cy must be
+\f$\cameramatrix{A}\f$ . If @ref CV_CALIB_USE_INTRINSIC_GUESS
+and/or @ref CALIB_FIX_ASPECT_RATIO are specified, some or all of fx, fy, cx, cy must be
 initialized before calling the function.
 @param distCoeffs Input/output vector of distortion coefficients
 \f$\distcoeffs\f$.
@@ -1533,40 +1533,40 @@ parameters. Order of deviations values: \f$(R_0, T_0, \dotsc , R_{M - 1}, T_{M -
 the number of pattern views. \f$R_i, T_i\f$ are concatenated 1x3 vectors.
  @param perViewErrors Output vector of the RMS re-projection error estimated for each pattern view.
 @param flags Different flags that may be zero or a combination of the following values:
--   **CALIB_USE_INTRINSIC_GUESS** cameraMatrix contains valid initial values of
+-   @ref CALIB_USE_INTRINSIC_GUESS cameraMatrix contains valid initial values of
 fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image
 center ( imageSize is used), and focal distances are computed in a least-squares fashion.
 Note, that if intrinsic parameters are known, there is no need to use this function just to
 estimate extrinsic parameters. Use solvePnP instead.
--   **CALIB_FIX_PRINCIPAL_POINT** The principal point is not changed during the global
+-   @ref CALIB_FIX_PRINCIPAL_POINT The principal point is not changed during the global
 optimization. It stays at the center or at a different location specified when
-CALIB_USE_INTRINSIC_GUESS is set too.
--   **CALIB_FIX_ASPECT_RATIO** The functions consider only fy as a free parameter. The
+ @ref CALIB_USE_INTRINSIC_GUESS is set too.
+-   @ref CALIB_FIX_ASPECT_RATIO The functions consider only fy as a free parameter. The
 ratio fx/fy stays the same as in the input cameraMatrix . When
-CALIB_USE_INTRINSIC_GUESS is not set, the actual input values of fx and fy are
+ @ref CALIB_USE_INTRINSIC_GUESS is not set, the actual input values of fx and fy are
 ignored, only their ratio is computed and used further.
--   **CALIB_ZERO_TANGENT_DIST** Tangential distortion coefficients \f$(p_1, p_2)\f$ are set
+-   @ref CALIB_ZERO_TANGENT_DIST Tangential distortion coefficients \f$(p_1, p_2)\f$ are set
 to zeros and stay zero.
--   **CALIB_FIX_K1,...,CALIB_FIX_K6** The corresponding radial distortion
-coefficient is not changed during the optimization. If CALIB_USE_INTRINSIC_GUESS is
+-   @ref CALIB_FIX_K1,..., @ref CALIB_FIX_K6 The corresponding radial distortion
+coefficient is not changed during the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is
 set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0.
--   **CALIB_RATIONAL_MODEL** Coefficients k4, k5, and k6 are enabled. To provide the
+-   @ref CALIB_RATIONAL_MODEL Coefficients k4, k5, and k6 are enabled. To provide the
 backward compatibility, this extra flag should be explicitly specified to make the
 calibration function use the rational model and return 8 coefficients. If the flag is not
 set, the function computes and returns only 5 distortion coefficients.
--   **CALIB_THIN_PRISM_MODEL** Coefficients s1, s2, s3 and s4 are enabled. To provide the
+-   @ref CALIB_THIN_PRISM_MODEL Coefficients s1, s2, s3 and s4 are enabled. To provide the
 backward compatibility, this extra flag should be explicitly specified to make the
 calibration function use the thin prism model and return 12 coefficients. If the flag is not
 set, the function computes and returns only 5 distortion coefficients.
--   **CALIB_FIX_S1_S2_S3_S4** The thin prism distortion coefficients are not changed during
-the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
+-   @ref CALIB_FIX_S1_S2_S3_S4 The thin prism distortion coefficients are not changed during
+the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
 supplied distCoeffs matrix is used. Otherwise, it is set to 0.
--   **CALIB_TILTED_MODEL** Coefficients tauX and tauY are enabled. To provide the
+-   @ref CALIB_TILTED_MODEL Coefficients tauX and tauY are enabled. To provide the
 backward compatibility, this extra flag should be explicitly specified to make the
 calibration function use the tilted sensor model and return 14 coefficients. If the flag is not
 set, the function computes and returns only 5 distortion coefficients.
--   **CALIB_FIX_TAUX_TAUY** The coefficients of the tilted sensor model are not changed during
-the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
+-   @ref CALIB_FIX_TAUX_TAUY The coefficients of the tilted sensor model are not changed during
+the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
 supplied distCoeffs matrix is used. Otherwise, it is set to 0.
 @param criteria Termination criteria for the iterative optimization algorithm.
 
@@ -1578,7 +1578,7 @@ points and their corresponding 2D projections in each view must be specified. Th
 by using an object with known geometry and easily detectable feature points. Such an object is
 called a calibration rig or calibration pattern, and OpenCV has built-in support for a chessboard as
 a calibration rig (see @ref findChessboardCorners). Currently, initialization of intrinsic
-parameters (when CALIB_USE_INTRINSIC_GUESS is not set) is only implemented for planar calibration
+parameters (when @ref CALIB_USE_INTRINSIC_GUESS is not set) is only implemented for planar calibration
 patterns (where Z-coordinates of the object points must be all zeros). 3D calibration rigs can also
 be used as long as initial cameraMatrix is provided.
 
@@ -1689,39 +1689,39 @@ second camera coordinate system.
 @param F Output fundamental matrix.
 @param perViewErrors Output vector of the RMS re-projection error estimated for each pattern view.
 @param flags Different flags that may be zero or a combination of the following values:
--   **CALIB_FIX_INTRINSIC** Fix cameraMatrix? and distCoeffs? so that only R, T, E, and F
+-   @ref CALIB_FIX_INTRINSIC Fix cameraMatrix? and distCoeffs? so that only R, T, E, and F
 matrices are estimated.
--   **CALIB_USE_INTRINSIC_GUESS** Optimize some or all of the intrinsic parameters
+-   @ref CALIB_USE_INTRINSIC_GUESS Optimize some or all of the intrinsic parameters
 according to the specified flags. Initial values are provided by the user.
--   **CALIB_USE_EXTRINSIC_GUESS** R and T contain valid initial values that are optimized further.
+-   @ref CALIB_USE_EXTRINSIC_GUESS R and T contain valid initial values that are optimized further.
 Otherwise R and T are initialized to the median value of the pattern views (each dimension separately).
--   **CALIB_FIX_PRINCIPAL_POINT** Fix the principal points during the optimization.
--   **CALIB_FIX_FOCAL_LENGTH** Fix \f$f^{(j)}_x\f$ and \f$f^{(j)}_y\f$ .
--   **CALIB_FIX_ASPECT_RATIO** Optimize \f$f^{(j)}_y\f$ . Fix the ratio \f$f^{(j)}_x/f^{(j)}_y\f$
+-   @ref CALIB_FIX_PRINCIPAL_POINT Fix the principal points during the optimization.
+-   @ref CALIB_FIX_FOCAL_LENGTH Fix \f$f^{(j)}_x\f$ and \f$f^{(j)}_y\f$ .
+-   @ref CALIB_FIX_ASPECT_RATIO Optimize \f$f^{(j)}_y\f$ . Fix the ratio \f$f^{(j)}_x/f^{(j)}_y\f$
 .
--   **CALIB_SAME_FOCAL_LENGTH** Enforce \f$f^{(0)}_x=f^{(1)}_x\f$ and \f$f^{(0)}_y=f^{(1)}_y\f$ .
--   **CALIB_ZERO_TANGENT_DIST** Set tangential distortion coefficients for each camera to
+-   @ref CALIB_SAME_FOCAL_LENGTH Enforce \f$f^{(0)}_x=f^{(1)}_x\f$ and \f$f^{(0)}_y=f^{(1)}_y\f$ .
+-   @ref CALIB_ZERO_TANGENT_DIST Set tangential distortion coefficients for each camera to
 zeros and fix there.
--   **CALIB_FIX_K1,...,CALIB_FIX_K6** Do not change the corresponding radial
-distortion coefficient during the optimization. If CALIB_USE_INTRINSIC_GUESS is set,
+-   @ref CALIB_FIX_K1,..., @ref CALIB_FIX_K6 Do not change the corresponding radial
+distortion coefficient during the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is set,
 the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0.
--   **CALIB_RATIONAL_MODEL** Enable coefficients k4, k5, and k6. To provide the backward
+-   @ref CALIB_RATIONAL_MODEL Enable coefficients k4, k5, and k6. To provide the backward
 compatibility, this extra flag should be explicitly specified to make the calibration
 function use the rational model and return 8 coefficients. If the flag is not set, the
 function computes and returns only 5 distortion coefficients.
--   **CALIB_THIN_PRISM_MODEL** Coefficients s1, s2, s3 and s4 are enabled. To provide the
+-   @ref CALIB_THIN_PRISM_MODEL Coefficients s1, s2, s3 and s4 are enabled. To provide the
 backward compatibility, this extra flag should be explicitly specified to make the
 calibration function use the thin prism model and return 12 coefficients. If the flag is not
 set, the function computes and returns only 5 distortion coefficients.
--   **CALIB_FIX_S1_S2_S3_S4** The thin prism distortion coefficients are not changed during
-the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
+-   @ref CALIB_FIX_S1_S2_S3_S4 The thin prism distortion coefficients are not changed during
+the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
 supplied distCoeffs matrix is used. Otherwise, it is set to 0.
--   **CALIB_TILTED_MODEL** Coefficients tauX and tauY are enabled. To provide the
+-   @ref CALIB_TILTED_MODEL Coefficients tauX and tauY are enabled. To provide the
 backward compatibility, this extra flag should be explicitly specified to make the
 calibration function use the tilted sensor model and return 14 coefficients. If the flag is not
 set, the function computes and returns only 5 distortion coefficients.
--   **CALIB_FIX_TAUX_TAUY** The coefficients of the tilted sensor model are not changed during
-the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
+-   @ref CALIB_FIX_TAUX_TAUY The coefficients of the tilted sensor model are not changed during
+the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
 supplied distCoeffs matrix is used. Otherwise, it is set to 0.
 @param criteria Termination criteria for the iterative optimization algorithm.
 
@@ -1769,10 +1769,10 @@ Besides the stereo-related information, the function can also perform a full cal
 the two cameras. However, due to the high dimensionality of the parameter space and noise in the
 input data, the function can diverge from the correct solution. If the intrinsic parameters can be
 estimated with high accuracy for each of the cameras individually (for example, using
-calibrateCamera ), you are recommended to do so and then pass CALIB_FIX_INTRINSIC flag to the
+calibrateCamera ), you are recommended to do so and then pass @ref CALIB_FIX_INTRINSIC flag to the
 function along with the computed intrinsic parameters. Otherwise, if all the parameters are
 estimated at once, it makes sense to restrict some parameters, for example, pass
-CALIB_SAME_FOCAL_LENGTH and CALIB_ZERO_TANGENT_DIST flags, which is usually a
+ @ref CALIB_SAME_FOCAL_LENGTH and @ref CALIB_ZERO_TANGENT_DIST flags, which is usually a
 reasonable assumption.
 
 Similarly to calibrateCamera, the function minimizes the total re-projection error for all the
@@ -1822,7 +1822,7 @@ rectified first camera's image.
 camera, i.e. it projects points given in the rectified first camera coordinate system into the
 rectified second camera's image.
 @param Q Output \f$4 \times 4\f$ disparity-to-depth mapping matrix (see @ref reprojectImageTo3D).
-@param flags Operation flags that may be zero or CALIB_ZERO_DISPARITY . If the flag is set,
+@param flags Operation flags that may be zero or @ref CALIB_ZERO_DISPARITY . If the flag is set,
 the function makes the principal points of each camera have the same pixel coordinates in the
 rectified views. And if the flag is not set, the function may still shift the images in the
 horizontal or vertical direction (depending on the orientation of epipolar lines) to maximize the
@@ -1869,7 +1869,7 @@ coordinates. The function distinguishes the following two cases:
                      \end{bmatrix} ,\f]
 
     where \f$T_x\f$ is a horizontal shift between the cameras and \f$cx_1=cx_2\f$ if
-    CALIB_ZERO_DISPARITY is set.
+    @ref CALIB_ZERO_DISPARITY is set.
 
 -   **Vertical stereo**: the first and the second camera views are shifted relative to each other
     mainly in the vertical direction (and probably a bit in the horizontal direction too). The epipolar
@@ -1888,7 +1888,7 @@ coordinates. The function distinguishes the following two cases:
                      \end{bmatrix},\f]
 
     where \f$T_y\f$ is a vertical shift between the cameras and \f$cy_1=cy_2\f$ if
-    CALIB_ZERO_DISPARITY is set.
+    @ref CALIB_ZERO_DISPARITY is set.
 
 As you can see, the first three columns of P1 and P2 will effectively be the new "rectified" camera
 matrices. The matrices, together with R1 and R2 , can then be passed to initUndistortRectifyMap to
@@ -2231,8 +2231,8 @@ same camera intrinsic matrix. If this assumption does not hold for your use case
 to normalized image coordinates, which are valid for the identity camera intrinsic matrix. When
 passing these coordinates, pass the identity matrix for this parameter.
 @param method Method for computing an essential matrix.
--   **RANSAC** for the RANSAC algorithm.
--   **LMEDS** for the LMedS algorithm.
+-   @ref RANSAC for the RANSAC algorithm.
+-   @ref LMEDS for the LMedS algorithm.
 @param prob Parameter used for the RANSAC or LMedS methods only. It specifies a desirable level of
 confidence (probability) that the estimated matrix is correct.
 @param threshold Parameter used for RANSAC. It is the maximum distance from a point to an epipolar
@@ -2264,8 +2264,8 @@ be floating-point (single or double precision).
 are feature points from cameras with same focal length and principal point.
 @param pp principal point of the camera.
 @param method Method for computing a fundamental matrix.
--   **RANSAC** for the RANSAC algorithm.
--   **LMEDS** for the LMedS algorithm.
+-   @ref RANSAC for the RANSAC algorithm.
+-   @ref LMEDS for the LMedS algorithm.
 @param threshold Parameter used for RANSAC. It is the maximum distance from a point to an epipolar
 line in pixels, beyond which the point is considered an outlier and is not used for computing the
 final fundamental matrix. It can be set to something like 1-3, depending on the accuracy of the
@@ -2668,8 +2668,8 @@ b_2\\
 @param to Second input 2D point set containing \f$(x,y)\f$.
 @param inliers Output vector indicating which points are inliers (1-inlier, 0-outlier).
 @param method Robust method used to compute transformation. The following methods are possible:
--   cv::RANSAC - RANSAC-based robust method
--   cv::LMEDS - Least-Median robust method
+-   @ref RANSAC - RANSAC-based robust method
+-   @ref LMEDS - Least-Median robust method
 RANSAC is the default method.
 @param ransacReprojThreshold Maximum reprojection error in the RANSAC algorithm to consider
 a point as an inlier. Applies only to RANSAC.
@@ -2714,8 +2714,8 @@ two 2D point sets.
 @param to Second input 2D point set.
 @param inliers Output vector indicating which points are inliers.
 @param method Robust method used to compute transformation. The following methods are possible:
--   cv::RANSAC - RANSAC-based robust method
--   cv::LMEDS - Least-Median robust method
+-   @ref RANSAC - RANSAC-based robust method
+-   @ref LMEDS - Least-Median robust method
 RANSAC is the default method.
 @param ransacReprojThreshold Maximum reprojection error in the RANSAC algorithm to consider
 a point as an inlier. Applies only to RANSAC.
@@ -3012,7 +3012,8 @@ namespace fisheye
         CALIB_FIX_K3                = 1 << 6,
         CALIB_FIX_K4                = 1 << 7,
         CALIB_FIX_INTRINSIC         = 1 << 8,
-        CALIB_FIX_PRINCIPAL_POINT   = 1 << 9
+        CALIB_FIX_PRINCIPAL_POINT   = 1 << 9,
+        CALIB_ZERO_DISPARITY        = 1 << 10
     };
 
     /** @brief Projects points using fisheye model
@@ -3145,7 +3146,7 @@ namespace fisheye
     @param image_size Size of the image used only to initialize the camera intrinsic matrix.
     @param K Output 3x3 floating-point camera intrinsic matrix
     \f$\cameramatrix{A}\f$ . If
-    fisheye::CALIB_USE_INTRINSIC_GUESS/ is specified, some or all of fx, fy, cx, cy must be
+    @ref fisheye::CALIB_USE_INTRINSIC_GUESS is specified, some or all of fx, fy, cx, cy must be
     initialized before calling the function.
     @param D Output vector of distortion coefficients \f$\distcoeffsfisheye\f$.
     @param rvecs Output vector of rotation vectors (see Rodrigues ) estimated for each pattern view.
@@ -3155,17 +3156,17 @@ namespace fisheye
     position of the calibration pattern in the k-th pattern view (k=0.. *M* -1).
     @param tvecs Output vector of translation vectors estimated for each pattern view.
     @param flags Different flags that may be zero or a combination of the following values:
-    -   **fisheye::CALIB_USE_INTRINSIC_GUESS** cameraMatrix contains valid initial values of
+    -    @ref fisheye::CALIB_USE_INTRINSIC_GUESS  cameraMatrix contains valid initial values of
     fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image
     center ( imageSize is used), and focal distances are computed in a least-squares fashion.
-    -   **fisheye::CALIB_RECOMPUTE_EXTRINSIC** Extrinsic will be recomputed after each iteration
+    -    @ref fisheye::CALIB_RECOMPUTE_EXTRINSIC  Extrinsic will be recomputed after each iteration
     of intrinsic optimization.
-    -   **fisheye::CALIB_CHECK_COND** The functions will check validity of condition number.
-    -   **fisheye::CALIB_FIX_SKEW** Skew coefficient (alpha) is set to zero and stay zero.
-    -   **fisheye::CALIB_FIX_K1..fisheye::CALIB_FIX_K4** Selected distortion coefficients
+    -    @ref fisheye::CALIB_CHECK_COND  The functions will check validity of condition number.
+    -    @ref fisheye::CALIB_FIX_SKEW  Skew coefficient (alpha) is set to zero and stay zero.
+    -    @ref fisheye::CALIB_FIX_K1,..., @ref fisheye::CALIB_FIX_K4 Selected distortion coefficients
     are set to zeros and stay zero.
-    -   **fisheye::CALIB_FIX_PRINCIPAL_POINT** The principal point is not changed during the global
-optimization. It stays at the center or at a different location specified when CALIB_USE_INTRINSIC_GUESS is set too.
+    -    @ref fisheye::CALIB_FIX_PRINCIPAL_POINT  The principal point is not changed during the global
+optimization. It stays at the center or at a different location specified when @ref fisheye::CALIB_USE_INTRINSIC_GUESS is set too.
     @param criteria Termination criteria for the iterative optimization algorithm.
      */
     CV_EXPORTS_W double calibrate(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, const Size& image_size,
@@ -3189,7 +3190,7 @@ optimization. It stays at the center or at a different location specified when C
     @param P2 Output 3x4 projection matrix in the new (rectified) coordinate systems for the second
     camera.
     @param Q Output \f$4 \times 4\f$ disparity-to-depth mapping matrix (see reprojectImageTo3D ).
-    @param flags Operation flags that may be zero or CALIB_ZERO_DISPARITY . If the flag is set,
+    @param flags Operation flags that may be zero or @ref fisheye::CALIB_ZERO_DISPARITY . If the flag is set,
     the function makes the principal points of each camera have the same pixel coordinates in the
     rectified views. And if the flag is not set, the function may still shift the images in the
     horizontal or vertical direction (depending on the orientation of epipolar lines) to maximize the
@@ -3215,7 +3216,7 @@ optimization. It stays at the center or at a different location specified when C
     observed by the second camera.
     @param K1 Input/output first camera intrinsic matrix:
     \f$\vecthreethree{f_x^{(j)}}{0}{c_x^{(j)}}{0}{f_y^{(j)}}{c_y^{(j)}}{0}{0}{1}\f$ , \f$j = 0,\, 1\f$ . If
-    any of fisheye::CALIB_USE_INTRINSIC_GUESS , fisheye::CALIB_FIX_INTRINSIC are specified,
+    any of @ref fisheye::CALIB_USE_INTRINSIC_GUESS , @ref fisheye::CALIB_FIX_INTRINSIC are specified,
     some or all of the matrix components must be initialized.
     @param D1 Input/output vector of distortion coefficients \f$\distcoeffsfisheye\f$ of 4 elements.
     @param K2 Input/output second camera intrinsic matrix. The parameter is similar to K1 .
@@ -3225,16 +3226,16 @@ optimization. It stays at the center or at a different location specified when C
     @param R Output rotation matrix between the 1st and the 2nd camera coordinate systems.
     @param T Output translation vector between the coordinate systems of the cameras.
     @param flags Different flags that may be zero or a combination of the following values:
-    -   **fisheye::CALIB_FIX_INTRINSIC** Fix K1, K2? and D1, D2? so that only R, T matrices
+    -    @ref fisheye::CALIB_FIX_INTRINSIC  Fix K1, K2? and D1, D2? so that only R, T matrices
     are estimated.
-    -   **fisheye::CALIB_USE_INTRINSIC_GUESS** K1, K2 contains valid initial values of
+    -    @ref fisheye::CALIB_USE_INTRINSIC_GUESS  K1, K2 contains valid initial values of
     fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image
     center (imageSize is used), and focal distances are computed in a least-squares fashion.
-    -   **fisheye::CALIB_RECOMPUTE_EXTRINSIC** Extrinsic will be recomputed after each iteration
+    -    @ref fisheye::CALIB_RECOMPUTE_EXTRINSIC  Extrinsic will be recomputed after each iteration
     of intrinsic optimization.
-    -   **fisheye::CALIB_CHECK_COND** The functions will check validity of condition number.
-    -   **fisheye::CALIB_FIX_SKEW** Skew coefficient (alpha) is set to zero and stay zero.
-    -   **fisheye::CALIB_FIX_K1..4** Selected distortion coefficients are set to zeros and stay
+    -    @ref fisheye::CALIB_CHECK_COND  The functions will check validity of condition number.
+    -    @ref fisheye::CALIB_FIX_SKEW  Skew coefficient (alpha) is set to zero and stay zero.
+    -   @ref fisheye::CALIB_FIX_K1,..., @ref fisheye::CALIB_FIX_K4 Selected distortion coefficients are set to zeros and stay
     zero.
     @param criteria Termination criteria for the iterative optimization algorithm.
      */
diff --git a/modules/calib3d/test/test_fisheye.cpp b/modules/calib3d/test/test_fisheye.cpp
index 8e509cf35e..58a79dcc88 100644
--- a/modules/calib3d/test/test_fisheye.cpp
+++ b/modules/calib3d/test/test_fisheye.cpp
@@ -390,6 +390,12 @@ TEST_F(fisheyeTest, EstimateUncertainties)
 
 TEST_F(fisheyeTest, stereoRectify)
 {
+    // For consistency purposes
+    CV_StaticAssert(
+        static_cast<int>(cv::CALIB_ZERO_DISPARITY) == static_cast<int>(cv::fisheye::CALIB_ZERO_DISPARITY),
+        "For the purpose of continuity the following should be true: cv::CALIB_ZERO_DISPARITY == cv::fisheye::CALIB_ZERO_DISPARITY"
+    );
+
     const std::string folder =combine(datasets_repository_path, "calib-3_stereo_from_JY");
 
     cv::Size calibration_size = this->imageSize, requested_size = calibration_size;
@@ -402,7 +408,7 @@ TEST_F(fisheyeTest, stereoRectify)
     double balance = 0.0, fov_scale = 1.1;
     cv::Mat R1, R2, P1, P2, Q;
     cv::fisheye::stereoRectify(K1, D1, K2, D2, calibration_size, theR, theT, R1, R2, P1, P2, Q,
-                      cv::CALIB_ZERO_DISPARITY, requested_size, balance, fov_scale);
+                      cv::fisheye::CALIB_ZERO_DISPARITY, requested_size, balance, fov_scale);
 
     // Collected with these CMake flags: -DWITH_IPP=OFF -DCV_ENABLE_INTRINSICS=OFF -DCV_DISABLE_OPTIMIZATION=ON -DCMAKE_BUILD_TYPE=Debug
     cv::Matx33d R1_ref(

From 752cc26ad62c4cc3e4897aebff65bc4017c32922 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.a.alekhin@gmail.com>
Date: Wed, 16 Dec 2020 22:22:17 +0000
Subject: [PATCH 07/10] dnn: use OpenVINO 2021.2 defines

original commit: 4699d2ba0c5447516ac89dc732704028838a3f86
---
 cmake/OpenCVDetectInferenceEngine.cmake | 4 ++--
 modules/dnn/src/op_inf_engine.hpp       | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/cmake/OpenCVDetectInferenceEngine.cmake b/cmake/OpenCVDetectInferenceEngine.cmake
index ceb7b7989c..9af1051fa0 100644
--- a/cmake/OpenCVDetectInferenceEngine.cmake
+++ b/cmake/OpenCVDetectInferenceEngine.cmake
@@ -135,9 +135,9 @@ endif()
 
 if(INF_ENGINE_TARGET)
   if(NOT INF_ENGINE_RELEASE)
-    message(WARNING "InferenceEngine version has not been set, 2021.1 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
+    message(WARNING "InferenceEngine version has not been set, 2021.2 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
   endif()
-  set(INF_ENGINE_RELEASE "2021010000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)")
+  set(INF_ENGINE_RELEASE "2021020000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)")
   set_target_properties(${INF_ENGINE_TARGET} PROPERTIES
     INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}"
   )
diff --git a/modules/dnn/src/op_inf_engine.hpp b/modules/dnn/src/op_inf_engine.hpp
index bb9563f4ac..f29af3e0b1 100644
--- a/modules/dnn/src/op_inf_engine.hpp
+++ b/modules/dnn/src/op_inf_engine.hpp
@@ -28,10 +28,11 @@
 #define INF_ENGINE_RELEASE_2020_3 2020030000
 #define INF_ENGINE_RELEASE_2020_4 2020040000
 #define INF_ENGINE_RELEASE_2021_1 2021010000
+#define INF_ENGINE_RELEASE_2021_2 2021020000
 
 #ifndef INF_ENGINE_RELEASE
-#warning("IE version have not been provided via command-line. Using 2021.1 by default")
-#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2021_1
+#warning("IE version have not been provided via command-line. Using 2021.2 by default")
+#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2021_2
 #endif
 
 #define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000))

From 28aab134dbecd7bcfe154548e17afc2a1710798a Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.a.alekhin@gmail.com>
Date: Thu, 17 Dec 2020 07:53:35 +0000
Subject: [PATCH 08/10] dnn(test): update tests for OpenVINO 2021.2

---
 modules/dnn/perf/perf_net.cpp              | 10 +++++-----
 modules/dnn/test/test_darknet_importer.cpp |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/modules/dnn/perf/perf_net.cpp b/modules/dnn/perf/perf_net.cpp
index 600193915d..929beb2ec5 100644
--- a/modules/dnn/perf/perf_net.cpp
+++ b/modules/dnn/perf/perf_net.cpp
@@ -206,7 +206,7 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv3)
     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
         throw SkipTestException("Test is disabled in OpenVINO 2020.4");
 #endif
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021010000)  // nGraph compilation failure
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000)  // nGraph compilation failure
     if (target == DNN_TARGET_MYRIAD)
         throw SkipTestException("");
 #endif
@@ -241,7 +241,7 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv4_tiny)
 {
     if (backend == DNN_BACKEND_HALIDE)
         throw SkipTestException("");
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021010000)  // nGraph compilation failure
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000)  // nGraph compilation failure
     if (target == DNN_TARGET_MYRIAD)
         throw SkipTestException("");
 #endif
@@ -276,9 +276,9 @@ PERF_TEST_P_(DNNTestNetwork, Inception_v2_Faster_RCNN)
     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
         throw SkipTestException("Test is disabled in OpenVINO 2019R2");
 #endif
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021010000)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
-        throw SkipTestException("Test is disabled in OpenVINO 2021.1 / MYRIAD");
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000)
+    if (target == DNN_TARGET_MYRIAD)
+        throw SkipTestException("Test is disabled in OpenVINO 2021.1+ / MYRIAD");
 #endif
     if (backend == DNN_BACKEND_HALIDE ||
         (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target != DNN_TARGET_CPU) ||
diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp
index a47e771084..0ecec8c49b 100644
--- a/modules/dnn/test/test_darknet_importer.cpp
+++ b/modules/dnn/test/test_darknet_importer.cpp
@@ -625,7 +625,7 @@ TEST_P(Test_Darknet_nets, YOLOv4_tiny)
         target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB
     );
 
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021010000)  // nGraph compilation failure
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000)  // nGraph compilation failure
     if (target == DNN_TARGET_MYRIAD)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif

From ec3ef520e67b0d6c166e0f83bfa14130cf2ee135 Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@google.com>
Date: Wed, 16 Dec 2020 17:27:55 +0100
Subject: [PATCH 09/10] Move big objects (>20k) from stack to heap.

---
 modules/imgproc/src/clahe.cpp    | 4 +++-
 modules/imgproc/src/imgwarp.cpp  | 3 ++-
 modules/imgproc/src/pyramids.cpp | 6 +++---
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/modules/imgproc/src/clahe.cpp b/modules/imgproc/src/clahe.cpp
index 45658ef561..677b6a0738 100644
--- a/modules/imgproc/src/clahe.cpp
+++ b/modules/imgproc/src/clahe.cpp
@@ -162,7 +162,9 @@ namespace
 
             // calc histogram
 
-            int tileHist[histSize] = {0, };
+            cv::AutoBuffer<int> _tileHist(histSize);
+            int* tileHist = _tileHist.data();
+            std::fill(tileHist, tileHist + histSize, 0);
 
             int height = tileROI.height;
             const size_t sstep = src_.step / sizeof(T);
diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp
index 1575ffb915..1665e0a40c 100644
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@@ -2167,7 +2167,8 @@ public:
     virtual void operator() (const Range& range) const CV_OVERRIDE
     {
         const int BLOCK_SZ = 64;
-        short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
+        AutoBuffer<short, 0> __XY(BLOCK_SZ * BLOCK_SZ * 2), __A(BLOCK_SZ * BLOCK_SZ);
+        short *XY = __XY.data(), *A = __A.data();
         const int AB_BITS = MAX(10, (int)INTER_BITS);
         const int AB_SCALE = 1 << AB_BITS;
         int round_delta = interpolation == INTER_NEAREST ? AB_SCALE/2 : AB_SCALE/INTER_TAB_SIZE/2, x, y, x1, y1;
diff --git a/modules/imgproc/src/pyramids.cpp b/modules/imgproc/src/pyramids.cpp
index ab6c8fdb6f..4f7732925c 100644
--- a/modules/imgproc/src/pyramids.cpp
+++ b/modules/imgproc/src/pyramids.cpp
@@ -750,9 +750,9 @@ pyrDown_( const Mat& _src, Mat& _dst, int borderType )
     Size ssize = _src.size(), dsize = _dst.size();
     int cn = _src.channels();
 
-    int tabL[CV_CN_MAX*(PD_SZ+2)], tabR[CV_CN_MAX*(PD_SZ+2)];
-    AutoBuffer<int> _tabM(dsize.width*cn);
-    int* tabM = _tabM.data();
+    AutoBuffer<int> _tabM(dsize.width * cn), _tabL(cn * (PD_SZ + 2)),
+        _tabR(cn * (PD_SZ + 2));
+    int *tabM = _tabM.data(), *tabL = _tabL.data(), *tabR = _tabR.data();
 
     CV_Assert( ssize.width > 0 && ssize.height > 0 &&
                std::abs(dsize.width*2 - ssize.width) <= 2 &&

From 8391a2360080571d15e1b4ad5dc383672105750b Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@google.com>
Date: Wed, 16 Dec 2020 16:06:58 +0100
Subject: [PATCH 10/10] Optimize calls to std::string::find() and friends for a
 single char.

The character literal overload is more efficient. More info at:

http://clang.llvm.org/extra/clang-tidy/checks/performance-faster-string-find.html
---
 modules/core/test/test_misc.cpp            | 2 +-
 modules/dnn/src/darknet/darknet_io.cpp     | 2 +-
 modules/dnn/src/tensorflow/tf_importer.cpp | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/core/test/test_misc.cpp b/modules/core/test/test_misc.cpp
index 3934ceb716..372aab7eb0 100644
--- a/modules/core/test/test_misc.cpp
+++ b/modules/core/test/test_misc.cpp
@@ -189,7 +189,7 @@ TEST(Core_OutputArrayCreate, _13772)
 TEST(Core_String, find_last_of__with__empty_string)
 {
     cv::String s;
-    size_t p = s.find_last_of("q", 0);
+    size_t p = s.find_last_of('q', 0);
     // npos is not exported: EXPECT_EQ(cv::String::npos, p);
     EXPECT_EQ(std::string::npos, p);
 }
diff --git a/modules/dnn/src/darknet/darknet_io.cpp b/modules/dnn/src/darknet/darknet_io.cpp
index c745d5f036..f6d71fd6d4 100644
--- a/modules/dnn/src/darknet/darknet_io.cpp
+++ b/modules/dnn/src/darknet/darknet_io.cpp
@@ -620,7 +620,7 @@ namespace cv {
                             // read section
                             read_net = false;
                             ++layers_counter;
-                            const size_t layer_type_size = line.find("]") - 1;
+                            const size_t layer_type_size = line.find(']') - 1;
                             CV_Assert(layer_type_size < line.size());
                             std::string layer_type = line.substr(1, layer_type_size);
                             net->layers_cfg[layers_counter]["layer_type"] = layer_type;
diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp
index 13e1f40cd6..493f0ec305 100644
--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@@ -389,7 +389,7 @@ Pin parsePin(const std::string &name)
 {
     Pin pin(name);
 
-    size_t delimiter_pos = name.find_first_of(":");
+    size_t delimiter_pos = name.find_first_of(':');
     if (delimiter_pos != std::string::npos)
     {
         pin.name = name.substr(0, delimiter_pos);