From 5cfa44d2a21717dbf70af1e40048f37e66917cb0 Mon Sep 17 00:00:00 2001
From: xndcn <xndchn@gmail.com>
Date: Fri, 21 Jun 2024 21:04:22 +0800
Subject: [PATCH 01/39] photo: doc: Fix window range for
 fastNlMeansDenoisingMulti

---
 modules/photo/include/opencv2/photo.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/photo/include/opencv2/photo.hpp b/modules/photo/include/opencv2/photo.hpp
index 392232851a..1c8a316b52 100644
--- a/modules/photo/include/opencv2/photo.hpp
+++ b/modules/photo/include/opencv2/photo.hpp
@@ -209,7 +209,7 @@ size.
 @param imgToDenoiseIndex Target image to denoise index in srcImgs sequence
 @param temporalWindowSize Number of surrounding images to use for target image denoising. Should
 be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to
-imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise
+imgToDenoiseIndex + temporalWindowSize / 2 from srcImgs will be used to denoise
 srcImgs[imgToDenoiseIndex] image.
 @param dst Output image with the same size and type as srcImgs images.
 @param templateWindowSize Size in pixels of the template patch that is used to compute weights.
@@ -236,7 +236,7 @@ have the same type and size.
 @param imgToDenoiseIndex Target image to denoise index in srcImgs sequence
 @param temporalWindowSize Number of surrounding images to use for target image denoising. Should
 be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to
-imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise
+imgToDenoiseIndex + temporalWindowSize / 2 from srcImgs will be used to denoise
 srcImgs[imgToDenoiseIndex] image.
 @param dst Output image with the same size and type as srcImgs images.
 @param templateWindowSize Size in pixels of the template patch that is used to compute weights.
@@ -263,7 +263,7 @@ size.
 @param imgToDenoiseIndex Target image to denoise index in srcImgs sequence
 @param temporalWindowSize Number of surrounding images to use for target image denoising. Should
 be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to
-imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise
+imgToDenoiseIndex + temporalWindowSize / 2 from srcImgs will be used to denoise
 srcImgs[imgToDenoiseIndex] image.
 @param dst Output image with the same size and type as srcImgs images.
 @param templateWindowSize Size in pixels of the template patch that is used to compute weights.

From 3d74d646d8c4c48e400e650fef9463f174414b96 Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@xperience.ai>
Date: Mon, 1 Jul 2024 17:33:24 +0300
Subject: [PATCH 02/39] Fixed CuDNN runtime version check for CuDNN 9+.

---
 modules/dnn/src/cuda4dnn/init.hpp | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/modules/dnn/src/cuda4dnn/init.hpp b/modules/dnn/src/cuda4dnn/init.hpp
index f5bb7714f8..a4177fce4f 100644
--- a/modules/dnn/src/cuda4dnn/init.hpp
+++ b/modules/dnn/src/cuda4dnn/init.hpp
@@ -23,8 +23,19 @@ namespace cv { namespace dnn { namespace cuda4dnn {
         //     Any patch release x.y.z is forward or backward-compatible with applications built against another cuDNN patch release x.y.w (meaning, of the same major and minor version number, but having w!=z).
         //     cuDNN minor releases beginning with cuDNN 7 are binary backward-compatible with applications built against the same or earlier patch release (meaning, an application built against cuDNN 7.x is binary compatible with cuDNN library 7.y, where y>=x).
         //     Applications compiled with a cuDNN version 7.y are not guaranteed to work with 7.x release when y > x.
-        auto cudnn_bversion = cudnnGetVersion();
-        auto cudnn_major_bversion = cudnn_bversion / 1000, cudnn_minor_bversion = cudnn_bversion % 1000 / 100;
+        int cudnn_bversion = cudnnGetVersion();
+        int cudnn_major_bversion = 0, cudnn_minor_bversion = 0;
+        // CuDNN changed major version multiplier in 9.0
+        if (cudnn_bversion >= 9*10000)
+        {
+            cudnn_major_bversion = cudnn_bversion / 10000;
+            cudnn_minor_bversion = cudnn_bversion % 10000 / 100;
+        }
+        else
+        {
+            cudnn_major_bversion = cudnn_bversion / 1000;
+            cudnn_minor_bversion = cudnn_bversion % 1000 / 100;
+        }
         if (cudnn_major_bversion != CUDNN_MAJOR || cudnn_minor_bversion < CUDNN_MINOR)
         {
             std::ostringstream oss;

From fd5efabdd9b0cfade1ce6aa9db7b46b581b794d4 Mon Sep 17 00:00:00 2001
From: Dietmar Schabus <dschabus@veritone.com>
Date: Tue, 2 Jul 2024 06:45:05 +0200
Subject: [PATCH 03/39] Don't rely on nb_frames to be correct

---
 modules/videoio/src/cap_ffmpeg_impl.hpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp
index 0546fbdfbe..1a4aa36d22 100644
--- a/modules/videoio/src/cap_ffmpeg_impl.hpp
+++ b/modules/videoio/src/cap_ffmpeg_impl.hpp
@@ -1485,10 +1485,6 @@ bool CvCapture_FFMPEG::grabFrame()
 
     if( !ic || !video_st || (!rawMode && !context) )  return false;
 
-    if( ic->streams[video_stream]->nb_frames > 0 &&
-        frame_number > ic->streams[video_stream]->nb_frames )
-        return false;
-
     picture_pts = AV_NOPTS_VALUE_;
 
 #if USE_AV_INTERRUPT_CALLBACK

From 883faf88711a035cc5fafe48eaa34ec8db14d29f Mon Sep 17 00:00:00 2001
From: kaingwade <jiawu83@gmail.com>
Date: Tue, 2 Jul 2024 17:23:20 +0800
Subject: [PATCH 04/39] Set using Orbbec SDK on MacOS OFF by default.

---
 modules/videoio/cmake/detect_obsensor.cmake | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/modules/videoio/cmake/detect_obsensor.cmake b/modules/videoio/cmake/detect_obsensor.cmake
index cf5a9063a9..f0b66015eb 100644
--- a/modules/videoio/cmake/detect_obsensor.cmake
+++ b/modules/videoio/cmake/detect_obsensor.cmake
@@ -1,10 +1,5 @@
 # --- obsensor ---
 if(NOT HAVE_OBSENSOR)
-  if(APPLE)
-    # force to use orbbec sdk on mac
-    set(OBSENSOR_USE_ORBBEC_SDK ON)
-  endif()
-
   if(OBSENSOR_USE_ORBBEC_SDK)
     include(${CMAKE_SOURCE_DIR}/3rdparty/orbbecsdk/orbbecsdk.cmake)
     download_orbbec_sdk(ORBBEC_SDK_ROOT_DIR)

From 6e1864e3fccde783626921e5776c5fc5edd56c74 Mon Sep 17 00:00:00 2001
From: Wanli <wanli.zhong.1999@gmail.com>
Date: Tue, 2 Jul 2024 17:32:49 +0800
Subject: [PATCH 05/39] Merge pull request #24941 from WanliZhong:v_exp

Add support for v_exp (exponential) #24941

This PR aims to implement `v_exp(v_float16 x)`, `v_exp(v_float32 x)` and `v_exp(v_float64 x)`.

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 .../core/include/opencv2/core/hal/intrin.hpp  |   1 +
 .../include/opencv2/core/hal/intrin_cpp.hpp   |  27 ++-
 .../include/opencv2/core/hal/intrin_math.hpp  | 200 ++++++++++++++++++
 modules/core/test/test_intrin_utils.hpp       | 101 ++++++++-
 .../dnn/src/layers/cpu_kernels/softmax.cpp    |  40 +---
 5 files changed, 328 insertions(+), 41 deletions(-)
 create mode 100644 modules/core/include/opencv2/core/hal/intrin_math.hpp

diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp
index 27beccd9ab..9a95b3a67b 100644
--- a/modules/core/include/opencv2/core/hal/intrin.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin.hpp
@@ -1239,6 +1239,7 @@ namespace CV__SIMD_NAMESPACE {
 #define CV_SIMD 0
 #endif
 
+#include "intrin_math.hpp"
 #include "simd_utils.impl.hpp"
 
 #ifndef CV_DOXYGEN
diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp
index 8619fec60c..40979f6c4b 100644
--- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp
@@ -263,7 +263,7 @@ Most of these operations return only one value.
 
 ### Other math
 
-- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude
+- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude, @ref v_exp
 - Absolute values: @ref v_abs, @ref v_absdiff, @ref v_absdiffs
 
 ### Conversions
@@ -363,6 +363,7 @@ Floating point:
 |reverse            | x | x |
 |extract_n          | x | x |
 |broadcast_element  | x |   |
+|exp                | x | x |
 
  @{ */
 
@@ -721,11 +722,33 @@ template<typename _Tp, int n> inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a)
 Only for floating point types.*/
 OPENCV_HAL_IMPL_MATH_FUNC(v_sqrt, std::sqrt, _Tp)
 
+/**
+ * @brief Exponential \f$ e^x \f$ of elements
+ *
+ * Only for floating point types. Core implementation steps:
+ * 1. Decompose Input: Convert the input to \f$ 2^{x \cdot \log_2e} \f$ and split its exponential into integer and fractional parts:
+ *    \f$ x \cdot \log_2e = n + f \f$, where \f$ n \f$ is the integer part and \f$ f \f$ is the fractional part.
+ * 2. Compute \f$ 2^n \f$: Calculated by shifting the bits.
+ * 3. Adjust Fractional Part: Compute \f$ f \cdot \ln2 \f$ to convert the fractional part to base \f$ e \f$.
+ *    \f$ C1 \f$ and \f$ C2 \f$ are used to adjust the fractional part.
+ * 4. Polynomial Approximation for \f$ e^{f \cdot \ln2} \f$: The closer the fractional part is to 0, the more accurate the result.
+ *    - For float16 and float32, use a Taylor Series with 6 terms.
+ *    - For float64, use Pade Polynomials Approximation with 4 terms.
+ * 5. Combine Results: Multiply the two parts together to get the final result:
+ *    \f$ e^x = 2^n \cdot e^{f \cdot \ln2} \f$.
+ *
+ * @note The precision of the calculation depends on the implementation and the data type of the input vector.
+ */
+OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp)
+#define OPENCV_HAL_MATH_HAVE_EXP 1
+
 //! @cond IGNORED
 OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp)
+#define OPENCV_HAL_MATH_HAVE_SIN 1
 OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp)
-OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp)
+#define OPENCV_HAL_MATH_HAVE_COS 1
 OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp)
+#define OPENCV_HAL_MATH_HAVE_LOG 1
 //! @endcond
 
 /** @brief Absolute value of elements
diff --git a/modules/core/include/opencv2/core/hal/intrin_math.hpp b/modules/core/include/opencv2/core/hal/intrin_math.hpp
new file mode 100644
index 0000000000..528166889b
--- /dev/null
+++ b/modules/core/include/opencv2/core/hal/intrin_math.hpp
@@ -0,0 +1,200 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// This header is not standalone. Don't include directly, use "intrin.hpp" instead.
+#ifdef OPENCV_HAL_INTRIN_HPP  // defined in intrin.hpp
+
+namespace CV__SIMD_NAMESPACE {
+
+/* Universal Intrinsics implementation of sin, cos, exp and log
+
+   Inspired by Intel Approximate Math library, and based on the
+   corresponding algorithms of the cephes math library
+*/
+
+/* Copyright (C) 2010,2011  RJVB - extensions */
+/* Copyright (C) 2011  Julien Pommier
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  (this is the zlib license)
+*/
+
+#ifndef OPENCV_HAL_MATH_HAVE_EXP
+
+//! @name Exponential
+//! @{
+#if defined(CV_SIMD_FP16) && CV_SIMD_FP16
+    // Implementation is the same as float32 vector.
+    inline v_float16 v_exp(const v_float16 &x) {
+        const v_float16 _vexp_lo_f16 = vx_setall_f16(-10.7421875f);
+        const v_float16 _vexp_hi_f16 = vx_setall_f16(11.f);
+        const v_float16 _vexp_half_fp16 = vx_setall_f16(0.5f);
+        const v_float16 _vexp_one_fp16 = vx_setall_f16(1.f);
+        const v_float16 _vexp_LOG2EF_f16 = vx_setall_f16(1.44269504088896341f);
+        const v_float16 _vexp_C1_f16 = vx_setall_f16(-6.93359375E-1f);
+        const v_float16 _vexp_C2_f16 = vx_setall_f16(2.12194440E-4f);
+        const v_float16 _vexp_p0_f16 = vx_setall_f16(1.9875691500E-4f);
+        const v_float16 _vexp_p1_f16 = vx_setall_f16(1.3981999507E-3f);
+        const v_float16 _vexp_p2_f16 = vx_setall_f16(8.3334519073E-3f);
+        const v_float16 _vexp_p3_f16 = vx_setall_f16(4.1665795894E-2f);
+        const v_float16 _vexp_p4_f16 = vx_setall_f16(1.6666665459E-1f);
+        const v_float16 _vexp_p5_f16 = vx_setall_f16(5.0000001201E-1f);
+        const v_int16 _vexp_bias_s16 = vx_setall_s16(0xf);
+
+        v_float16 _vexp_, _vexp_x, _vexp_y, _vexp_xx;
+        v_int16 _vexp_mm;
+
+        // compute exponential of x
+        _vexp_x = v_max(x, _vexp_lo_f16);
+        _vexp_x = v_min(_vexp_x, _vexp_hi_f16);
+
+        _vexp_ = v_fma(_vexp_x, _vexp_LOG2EF_f16, _vexp_half_fp16);
+        _vexp_mm = v_floor(_vexp_);
+        _vexp_ = v_cvt_f16(_vexp_mm);
+        _vexp_mm = v_add(_vexp_mm, _vexp_bias_s16);
+        _vexp_mm = v_shl(_vexp_mm, 10);
+
+        _vexp_x = v_fma(_vexp_, _vexp_C1_f16, _vexp_x);
+        _vexp_x = v_fma(_vexp_, _vexp_C2_f16, _vexp_x);
+        _vexp_xx = v_mul(_vexp_x, _vexp_x);
+
+        _vexp_y = v_fma(_vexp_x, _vexp_p0_f16, _vexp_p1_f16);
+        _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p2_f16);
+        _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p3_f16);
+        _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p4_f16);
+        _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p5_f16);
+
+        _vexp_y = v_fma(_vexp_y, _vexp_xx, _vexp_x);
+        _vexp_y = v_add(_vexp_y, _vexp_one_fp16);
+        _vexp_y = v_mul(_vexp_y, v_reinterpret_as_f16(_vexp_mm));
+
+        // exp(NAN) -> NAN
+        v_float16 mask_not_nan = v_not_nan(x);
+        return v_select(mask_not_nan, _vexp_y, v_reinterpret_as_f16(vx_setall_s16(0x7e00)));
+    }
+#endif
+
+    inline v_float32 v_exp(const v_float32 &x) {
+        const v_float32 _vexp_lo_f32 = vx_setall_f32(-88.3762626647949f);
+        const v_float32 _vexp_hi_f32 = vx_setall_f32(89.f);
+        const v_float32 _vexp_half_fp32 = vx_setall_f32(0.5f);
+        const v_float32 _vexp_one_fp32 = vx_setall_f32(1.f);
+        const v_float32 _vexp_LOG2EF_f32 = vx_setall_f32(1.44269504088896341f);
+        const v_float32 _vexp_C1_f32 = vx_setall_f32(-6.93359375E-1f);
+        const v_float32 _vexp_C2_f32 = vx_setall_f32(2.12194440E-4f);
+        const v_float32 _vexp_p0_f32 = vx_setall_f32(1.9875691500E-4f);
+        const v_float32 _vexp_p1_f32 = vx_setall_f32(1.3981999507E-3f);
+        const v_float32 _vexp_p2_f32 = vx_setall_f32(8.3334519073E-3f);
+        const v_float32 _vexp_p3_f32 = vx_setall_f32(4.1665795894E-2f);
+        const v_float32 _vexp_p4_f32 = vx_setall_f32(1.6666665459E-1f);
+        const v_float32 _vexp_p5_f32 = vx_setall_f32(5.0000001201E-1f);
+        const v_int32 _vexp_bias_s32 = vx_setall_s32(0x7f);
+
+        v_float32 _vexp_, _vexp_x, _vexp_y, _vexp_xx;
+        v_int32 _vexp_mm;
+
+        // compute exponential of x
+        _vexp_x = v_max(x, _vexp_lo_f32);
+        _vexp_x = v_min(_vexp_x, _vexp_hi_f32);
+
+        _vexp_ = v_fma(_vexp_x, _vexp_LOG2EF_f32, _vexp_half_fp32);
+        _vexp_mm = v_floor(_vexp_);
+        _vexp_ = v_cvt_f32(_vexp_mm);
+        _vexp_mm = v_add(_vexp_mm, _vexp_bias_s32);
+        _vexp_mm = v_shl(_vexp_mm, 23);
+
+        _vexp_x = v_fma(_vexp_, _vexp_C1_f32, _vexp_x);
+        _vexp_x = v_fma(_vexp_, _vexp_C2_f32, _vexp_x);
+        _vexp_xx = v_mul(_vexp_x, _vexp_x);
+
+        _vexp_y = v_fma(_vexp_x, _vexp_p0_f32, _vexp_p1_f32);
+        _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p2_f32);
+        _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p3_f32);
+        _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p4_f32);
+        _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p5_f32);
+
+        _vexp_y = v_fma(_vexp_y, _vexp_xx, _vexp_x);
+        _vexp_y = v_add(_vexp_y, _vexp_one_fp32);
+        _vexp_y = v_mul(_vexp_y, v_reinterpret_as_f32(_vexp_mm));
+
+        // exp(NAN) -> NAN
+        v_float32 mask_not_nan = v_not_nan(x);
+        return v_select(mask_not_nan, _vexp_y, v_reinterpret_as_f32(vx_setall_s32(0x7fc00000)));
+    }
+
+#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
+    inline v_float64 v_exp(const v_float64 &x) {
+        const v_float64 _vexp_lo_f64 = vx_setall_f64(-709.43613930310391424428);
+        const v_float64 _vexp_hi_f64 = vx_setall_f64(710.);
+        const v_float64 _vexp_half_f64 = vx_setall_f64(0.5);
+        const v_float64 _vexp_one_f64 = vx_setall_f64(1.0);
+        const v_float64 _vexp_two_f64 = vx_setall_f64(2.0);
+        const v_float64 _vexp_LOG2EF_f64 = vx_setall_f64(1.44269504088896340736);
+        const v_float64 _vexp_C1_f64 = vx_setall_f64(-6.93145751953125E-1);
+        const v_float64 _vexp_C2_f64 = vx_setall_f64(-1.42860682030941723212E-6);
+        const v_float64 _vexp_p0_f64 = vx_setall_f64(1.26177193074810590878E-4);
+        const v_float64 _vexp_p1_f64 = vx_setall_f64(3.02994407707441961300E-2);
+        const v_float64 _vexp_p2_f64 = vx_setall_f64(9.99999999999999999910E-1);
+        const v_float64 _vexp_q0_f64 = vx_setall_f64(3.00198505138664455042E-6);
+        const v_float64 _vexp_q1_f64 = vx_setall_f64(2.52448340349684104192E-3);
+        const v_float64 _vexp_q2_f64 = vx_setall_f64(2.27265548208155028766E-1);
+        const v_float64 _vexp_q3_f64 = vx_setall_f64(2.00000000000000000009E0);
+        const v_int64 _vexp_bias_s64 = vx_setall_s64(0x3ff);
+
+        v_float64 _vexp_, _vexp_x, _vexp_y, _vexp_z, _vexp_xx;
+        v_int64 _vexp_mm;
+
+        // compute exponential of x
+        _vexp_x = v_max(x, _vexp_lo_f64);
+        _vexp_x = v_min(_vexp_x, _vexp_hi_f64);
+
+        _vexp_ = v_fma(_vexp_x, _vexp_LOG2EF_f64, _vexp_half_f64);
+        _vexp_mm = v_expand_low(v_floor(_vexp_));
+        _vexp_ = v_cvt_f64(_vexp_mm);
+        _vexp_mm = v_add(_vexp_mm, _vexp_bias_s64);
+        _vexp_mm = v_shl(_vexp_mm, 52);
+
+        _vexp_x = v_fma(_vexp_, _vexp_C1_f64, _vexp_x);
+        _vexp_x = v_fma(_vexp_, _vexp_C2_f64, _vexp_x);
+        _vexp_xx = v_mul(_vexp_x, _vexp_x);
+
+        _vexp_y = v_fma(_vexp_xx, _vexp_p0_f64, _vexp_p1_f64);
+        _vexp_y = v_fma(_vexp_y, _vexp_xx, _vexp_p2_f64);
+        _vexp_y = v_mul(_vexp_y, _vexp_x);
+
+        _vexp_z = v_fma(_vexp_xx, _vexp_q0_f64, _vexp_q1_f64);
+        _vexp_z = v_fma(_vexp_xx, _vexp_z, _vexp_q2_f64);
+        _vexp_z = v_fma(_vexp_xx, _vexp_z, _vexp_q3_f64);
+
+        _vexp_z = v_div(_vexp_y, v_sub(_vexp_z, _vexp_y));
+        _vexp_z = v_fma(_vexp_two_f64, _vexp_z, _vexp_one_f64);
+        _vexp_z = v_mul(_vexp_z, v_reinterpret_as_f64(_vexp_mm));
+
+        // exp(NAN) -> NAN
+        v_float64 mask_not_nan = v_not_nan(x);
+        return v_select(mask_not_nan, _vexp_z, v_reinterpret_as_f64(vx_setall_s64(0x7FF8000000000000)));
+    }
+#endif
+
+#define OPENCV_HAL_MATH_HAVE_EXP 1
+//! @}
+
+#endif
+}
+#endif  // OPENCV_HAL_INTRIN_HPP
diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp
index 08138d194d..6e55a6ddd3 100644
--- a/modules/core/test/test_intrin_utils.hpp
+++ b/modules/core/test/test_intrin_utils.hpp
@@ -1698,6 +1698,103 @@ template<typename R> struct TheTest
         return *this;
     }
 
+    void __test_exp(LaneType dataMax, LaneType diff_thr, LaneType enlarge_factor, LaneType flt_min) {
+        int n = VTraits<R>::vlanes();
+
+        // Test overflow and underflow values with step
+        const LaneType step = (LaneType) 0.01;
+        for (LaneType i = dataMax + 1; i <= dataMax + 11;) {
+            Data<R> dataUpperBound, dataLowerBound, resOverflow, resUnderflow;
+            for (int j = 0; j < n; ++j) {
+                dataUpperBound[j] = i;
+                dataLowerBound[j] = -i;
+                i += step;
+            }
+            R upperBound = dataUpperBound, lowerBound = dataLowerBound;
+            resOverflow = v_exp(upperBound);
+            resUnderflow = v_exp(lowerBound);
+            for (int j = 0; j < n; ++j) {
+                SCOPED_TRACE(cv::format("Overflow/Underflow test value: %f", i));
+                EXPECT_TRUE(resOverflow[j] > 0 && std::isinf(resOverflow[j]));
+                EXPECT_GE(resUnderflow[j], 0);
+                EXPECT_LT(resUnderflow[j], flt_min);
+            }
+        }
+
+        // Test random values combined with special values
+        std::vector<LaneType> specialValues = {0, 1, INFINITY, -INFINITY, NAN, dataMax};
+        const int testRandNum = 10000;
+        const double specialValueProbability = 0.1; // 10% chance to insert a special value
+        cv::RNG_MT19937 rng;
+
+        for (int i = 0; i < testRandNum; i++) {
+            Data<R> dataRand, resRand;
+            for (int j = 0; j < n; ++j) {
+                if (rng.uniform(0.f, 1.f) <= specialValueProbability) {
+                    // Insert a special value
+                    int specialValueIndex = rng.uniform(0, (int) specialValues.size());
+                    dataRand[j] = specialValues[specialValueIndex];
+                } else {
+                    // Generate random data in [-dataMax*1.1, dataMax*1.1]
+                    dataRand[j] = (LaneType) rng.uniform(-dataMax * 1.1, dataMax * 1.1);
+                }
+            }
+            // Compare with std::exp
+            R x = dataRand;
+            resRand = v_exp(x);
+            for (int j = 0; j < n; ++j) {
+                SCOPED_TRACE(cv::format("Random test value: %f", dataRand[j]));
+                LaneType std_exp = std::exp(dataRand[j]);
+                if (dataRand[j] == 0) {
+                    // input 0 -> output 1
+                    EXPECT_EQ(resRand[j], 1);
+                } else if (dataRand[j] == 1) {
+                    // input 1 -> output e
+                    EXPECT_NEAR((LaneType) M_E, resRand[j], 1e-15);
+                } else if (dataRand[j] > 0 && std::isinf(dataRand[j])) {
+                    // input INF -> output INF
+                    EXPECT_TRUE(resRand[j] > 0 && std::isinf(resRand[j]));
+                } else if (dataRand[j] < 0 && std::isinf(dataRand[j])) {
+                    // input -INF -> output 0
+                    EXPECT_EQ(resRand[j], 0);
+                } else if (std::isnan(dataRand[j])) {
+                    // input NaN -> output NaN
+                    EXPECT_TRUE(std::isnan(resRand[j]));
+                } else if (dataRand[j] == dataMax) {
+                    // input dataMax -> output less than INFINITY
+                    EXPECT_LT(resRand[j], (LaneType) INFINITY);
+                } else if (std::isinf(resRand[j])) {
+                    // output INF -> input close to edge
+                    EXPECT_GT(dataRand[j], dataMax);
+                } else {
+                    EXPECT_GE(resRand[j], 0);
+                    EXPECT_LT(std::abs(resRand[j] - std_exp), diff_thr * (std_exp + flt_min * enlarge_factor));
+                }
+            }
+        }
+    }
+
+    TheTest &test_exp_fp16() {
+#if CV_SIMD_FP16
+        float16_t flt16_min;
+        uint16_t flt16_min_hex = 0x0400;
+        std::memcpy(&flt16_min, &flt16_min_hex, sizeof(float16_t));
+        __test_exp((float16_t) 10, (float16_t) 1e-2, (float16_t) 1e2, flt16_min);
+#endif
+        return *this;
+    }
+
+    TheTest &test_exp_fp32() {
+        __test_exp(88.0f, 1e-6f, 1e6f, FLT_MIN);
+        return *this;
+    }
+
+    TheTest &test_exp_fp64() {
+#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
+        __test_exp(709.0, 1e-15, 1e15, DBL_MIN);
+#endif
+        return *this;
+    }
 };
 
 #define DUMP_ENTRY(type) printf("SIMD%d: %s\n", 8*VTraits<v_uint8>::vlanes(), CV__TRACE_FUNCTION);
@@ -2011,6 +2108,7 @@ void test_hal_intrin_float32()
         .test_extract_highest()
         .test_broadcast_highest()
         .test_pack_triplets()
+        .test_exp_fp32()
 #if CV_SIMD_WIDTH == 32
         .test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>()
         .test_rotate<4>().test_rotate<5>().test_rotate<6>().test_rotate<7>()
@@ -2035,13 +2133,13 @@ void test_hal_intrin_float64()
         .test_mask()
         .test_unpack()
         .test_float_math()
-        .test_round_pair_f64()
         .test_float_cvt32()
         .test_reverse()
         .test_extract<0>().test_extract<1>()
         .test_rotate<0>().test_rotate<1>()
         .test_extract_n<0>().test_extract_n<1>()
         .test_extract_highest()
+        .test_exp_fp64()
         //.test_broadcast_element<0>().test_broadcast_element<1>()
 #if CV_SIMD_WIDTH == 32
         .test_extract<2>().test_extract<3>()
@@ -2062,6 +2160,7 @@ void test_hal_intrin_float16()
 #if CV_SIMD_FP16
         .test_loadstore_fp16()
         .test_float_cvt_fp16()
+        .test_exp_fp16()
 #endif
         ;
 #else
diff --git a/modules/dnn/src/layers/cpu_kernels/softmax.cpp b/modules/dnn/src/layers/cpu_kernels/softmax.cpp
index eb258ecfa2..fd55c1c1de 100644
--- a/modules/dnn/src/layers/cpu_kernels/softmax.cpp
+++ b/modules/dnn/src/layers/cpu_kernels/softmax.cpp
@@ -71,48 +71,12 @@ void softmax(Mat &dst, const Mat &src, int axis, int axisBias, int axisStep){
             // calculate the exp value along the axis
             v_float32 vs = vx_setzero_f32();
             vmax = vx_setall_f32(maxVal);
-            // initialize vexp constant
-            v_float32 _vexp_lo = vx_setall_f32(-88.3762626647949f);
-            v_float32 _vexp_hi = vx_setall_f32(88.3762626647949f);
-            v_float32 _vexp_half = vx_setall_f32(0.5f);
-            v_float32 _vexp_one = vx_setall_f32(1.f);
-            v_float32 _vexp_LOG2EF = vx_setall_f32(1.44269504088896341f);
-            v_float32 _vexp_C1 = vx_setall_f32(-0.693359375f);
-            v_float32 _vexp_C2 = vx_setall_f32(2.12194440e-4f);
-            v_float32 _vexp_p0 = vx_setall_f32(1.9875691500E-4f);
-            v_float32 _vexp_p1 = vx_setall_f32(1.3981999507E-3f);
-            v_float32 _vexp_p2 = vx_setall_f32(8.3334519073E-3f);
-            v_float32 _vexp_p3 = vx_setall_f32(4.1665795894E-2f);
-            v_float32 _vexp_p4 = vx_setall_f32(1.6666665459E-1f);
-            v_float32 _vexp_p5 = vx_setall_f32(5.0000001201E-1f);
-            // initialize temp vectors for vexp
-            v_float32 val, _vexp_, _vexp_x, _vexp_y, _vexp_z;
-            v_int32 _vexp_mm;
-
+            v_float32 val;
             // calculate and sum all data along axis
             for (size_t cnDim = 0; cnDim < axisStep; cnDim += nlanes) {
                 val = vx_load(axisBuf + cnDim);
                 val = v_sub(val, vmax);
-
-                // compute vexp of val
-                _vexp_x = v_min(val, _vexp_hi);
-                _vexp_x = v_max(_vexp_x, _vexp_lo);
-                _vexp_ = v_fma(_vexp_x, _vexp_LOG2EF, _vexp_half);
-                _vexp_mm = v_floor(_vexp_);
-                _vexp_ = v_cvt_f32(_vexp_mm);
-                _vexp_mm = v_add(_vexp_mm, vx_setall_s32(0x7f));
-                _vexp_mm = v_shl(_vexp_mm, 23);
-                _vexp_x = v_fma(_vexp_, _vexp_C1, _vexp_x);
-                _vexp_x = v_fma(_vexp_, _vexp_C2, _vexp_x);
-                _vexp_z = v_mul(_vexp_x, _vexp_x);
-                _vexp_y = v_fma(_vexp_x, _vexp_p0, _vexp_p1);
-                _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p2);
-                _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p3);
-                _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p4);
-                _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p5);
-                _vexp_y = v_fma(_vexp_y, _vexp_z, _vexp_x);
-                _vexp_y = v_add(_vexp_y, _vexp_one);
-                val = v_mul(_vexp_y, v_reinterpret_as_f32(_vexp_mm));
+                val = v_exp(val);
 
                 vs = v_add(vs, val);
                 v_store(axisBuf + cnDim, val);

From 2799c74d50372bdf1b7a5258e5f53c45edbd662a Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@xperience.ai>
Date: Thu, 20 Jun 2024 17:22:36 +0300
Subject: [PATCH 06/39] Use Carotene implementation of
 TEGRA_GaussianBlurBinomial 3x3 and 5x5 on ARM.

---
 3rdparty/carotene/hal/tegra_hal.hpp | 75 ++++++++++++++++++++++++++++-
 1 file changed, 74 insertions(+), 1 deletion(-)

diff --git a/3rdparty/carotene/hal/tegra_hal.hpp b/3rdparty/carotene/hal/tegra_hal.hpp
index ce8fa90982..7f67ecf1a3 100644
--- a/3rdparty/carotene/hal/tegra_hal.hpp
+++ b/3rdparty/carotene/hal/tegra_hal.hpp
@@ -1286,7 +1286,6 @@ inline int TEGRA_SEPFILTERFREE(cvhalFilter2D *context)
 #undef cv_hal_sepFilterFree
 #define cv_hal_sepFilterFree TEGRA_SEPFILTERFREE
 
-
 struct MorphCtx
 {
     int operation;
@@ -1857,6 +1856,80 @@ TegraCvtColor_Invoker(bgrx2hsvf, bgrx2hsv, src_data + static_cast<size_t>(range.
 #define cv_hal_cvtTwoPlaneYUVtoBGREx TEGRA_CVT2PYUVTOBGR_EX
 #endif
 
+// The optimized branch was developed for old armv7 processors and leads to perf degradation on armv8
+#if defined(DCAROTENE_NEON_ARCH) && (DCAROTENE_NEON_ARCH == 7)
+inline CAROTENE_NS::BORDER_MODE borderCV2Carotene(int borderType)
+{
+    switch(borderType)
+    {
+    case CV_HAL_BORDER_CONSTANT:
+        return CAROTENE_NS::BORDER_MODE_CONSTANT;
+    case CV_HAL_BORDER_REPLICATE:
+        return CAROTENE_NS::BORDER_MODE_REPLICATE;
+    case CV_HAL_BORDER_REFLECT:
+        return CAROTENE_NS::BORDER_MODE_REFLECT;
+    case CV_HAL_BORDER_WRAP:
+        return CAROTENE_NS::BORDER_MODE_WRAP;
+    case CV_HAL_BORDER_REFLECT_101:
+        return CAROTENE_NS::BORDER_MODE_REFLECT101;
+    }
+
+    return CAROTENE_NS::BORDER_MODE_UNDEFINED;
+}
+
+inline int TEGRA_GaussianBlurBinomial(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step,
+                         int width, int height, int depth, int cn, size_t margin_left, size_t margin_top,
+                         size_t margin_right, size_t margin_bottom, size_t ksize, int border_type)
+{
+    CAROTENE_NS::Size2D sz(width, height);
+    CAROTENE_NS::BORDER_MODE border = borderCV2Carotene(border_type);
+    CAROTENE_NS::Margin mg(margin_left, margin_right, margin_top, margin_bottom);
+
+    if (ksize == 3)
+    {
+        if ((depth != CV_8U) || (cn != 1))
+            return CV_HAL_ERROR_NOT_IMPLEMENTED;
+
+        if (CAROTENE_NS::isGaussianBlur3x3MarginSupported(sz, border, mg))
+        {
+            CAROTENE_NS::gaussianBlur3x3Margin(sz, src_data, src_step, dst_data, dst_step,
+                                  border, 0, mg);
+            return CV_HAL_ERROR_OK;
+        }
+    }
+    else if (ksize == 5)
+    {
+        if (!CAROTENE_NS::isGaussianBlur5x5Supported(sz, cn, border))
+            return CV_HAL_ERROR_NOT_IMPLEMENTED;
+
+        if (depth == CV_8U)
+        {
+            CAROTENE_NS::gaussianBlur5x5(sz, cn, (uint8_t*)src_data, src_step,
+                                         (uint8_t*)dst_data, dst_step, border, 0, mg);
+            return CV_HAL_ERROR_OK;
+        }
+        else if (depth == CV_16U)
+        {
+            CAROTENE_NS::gaussianBlur5x5(sz, cn, (uint16_t*)src_data, src_step,
+                                         (uint16_t*)dst_data, dst_step, border, 0, mg);
+            return CV_HAL_ERROR_OK;
+        }
+        else if (depth == CV_16S)
+        {
+            CAROTENE_NS::gaussianBlur5x5(sz, cn, (int16_t*)src_data, src_step,
+                                         (int16_t*)dst_data, dst_step, border, 0, mg);
+           return CV_HAL_ERROR_OK;
+        }
+    }
+
+    return CV_HAL_ERROR_NOT_IMPLEMENTED;
+}
+
+#undef cv_hal_gaussianBlurBinomial
+#define cv_hal_gaussianBlurBinomial TEGRA_GaussianBlurBinomial
+
+#endif // DCAROTENE_NEON_ARCH=7
+
 #endif // OPENCV_IMGPROC_HAL_INTERFACE_H
 
 #endif

From a8d13739196e54d14d423ed9f94dbb4bba8adf2b Mon Sep 17 00:00:00 2001
From: Abduragim Shtanchaev <44877829+Abdurrahheem@users.noreply.github.com>
Date: Tue, 2 Jul 2024 18:26:34 +0300
Subject: [PATCH 07/39] Merge pull request #25794 from
 Abdurrahheem:ash/yolov10-support

Add sample support of YOLOv9 and YOLOv10 in OpenCV #25794

This PR adds sample support of  [`YOLOv9`](https://github.com/WongKinYiu/yolov9) and [`YOLOv10`](https://github.com/THU-MIG/yolov10/tree/main)) in OpenCV. Models for this test are located in this [PR](https://github.com/opencv/opencv_extra/pull/1186).

**Running YOLOv10 using OpenCV.**
1. In oder to run `YOLOv10` one needs to cut off postporcessing with dynamic shapes from torch and then convert it to ONNX. If someone is looking for ready solution, there is [this forked branch](https://github.com/Abdurrahheem/yolov10/tree/ash/opencv-export) from official YOLOv10.  Particularty follow this proceduce.

```bash
git clone git@github.com:Abdurrahheem/yolov10.git
conda create -n yolov10 python=3.9
conda activate yolov10
pip install -r requirements.txt
python export_opencv.py --model=<model-name> --imgsz=<input-img-size>
```
By default `model="yolov10s"` and `imgsz=(480,640)`. This will generate file `yolov10s.onnx`, which can be use for inference in OpenCV

2. For inference part on OpenCV.  one can use `yolo_detector.cpp` [sample](https://github.com/opencv/opencv/blob/4.x/samples/dnn/yolo_detector.cpp). If you have followed above exporting procedure, then you can use following command to run the model.

``` bash
build opencv from source
cd build
./bin/example_dnn_yolo_detector --model=<path-to-yolov10s.onnx-file> --yolo=yolov10 --width=640 --height=480 --input=<path-to-image> --scale=0.003921568627 --padvalue=114
```
If you do not specify `--input` argument, OpenCV will grab first camera that is avaliable on your platform.
For more deatils on how to run the `yolo_detector.cpp` file see this [guide](https://docs.opencv.org/4.x/da/d9d/tutorial_dnn_yolo.html#autotoc_md443)


**Running YOLOv9 using OpenCV**

1. Export model following [official guide](https://github.com/WongKinYiu/yolov9)of the YOLOv9 repository. Particularly you can do following for converting.

```bash
git clone https://github.com/WongKinYiu/yolov9.git
cd yolov9
conda create -n yolov9 python=3.9
conda activate yolov9
pip install -r requirements.txt
wget https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-t-converted.pt
python export.py --weights=./yolov9-t-converted.pt --include=onnx --img-size=(480,640)
```

This will generate <yolov9-t-converted.onnx> file.

2.  Inference on OpenCV.

```bash
build opencv from source
cd build
./bin/example_dnn_yolo_detector --model=<path-to-yolov9-t-converted.onnx> --yolo=yolov9 --width=640 --height=480 --scale=0.003921568627 --padvalue=114 --path=<path-to-image>
```

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown | 51 +++++++++--
 modules/dnn/test/test_onnx_importer.cpp      | 96 ++++++++++++++++++--
 samples/dnn/yolo_detector.cpp                | 32 +++++--
 3 files changed, 151 insertions(+), 28 deletions(-)

diff --git a/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown b/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown
index a2d4b2a306..ce95234f88 100644
--- a/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown
+++ b/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown
@@ -24,7 +24,9 @@ model, but the methodology applies to other supported models.
 
 @note Currently, OpenCV supports the following YOLO models:
 - [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX/blob/main),
-- [YoloNas](https://github.com/Deci-AI/super-gradients/tree/master),
+- [YOLONas](https://github.com/Deci-AI/super-gradients/tree/master),
+- [YOLOv10](https://github.com/THU-MIG/yolov10/tree/main),
+- [YOLOv9](https://github.com/WongKinYiu/yolov9),
 - [YOLOv8](https://github.com/ultralytics/ultralytics/tree/main),
 - [YOLOv7](https://github.com/WongKinYiu/yolov7/tree/main),
 - [YOLOv6](https://github.com/meituan/YOLOv6/blob/main),
@@ -79,7 +81,7 @@ the ONNX graph, a process that we will detail further in the subsequent sections
 
 Now that we know know the parameters of the pre-precessing we can go on and export the model from
 Pytorch to ONNX graph. Since in this tutorial we are using YOLOX as our sample model, lets use its
-export for demonstration purposes (the process is  identical for the rest of the YOLO detectors).
+export for demonstration purposes (the process is  identical for the rest of the YOLO detectors except `YOLOv10` model, see details on how to export it later in the post).
 To exporting YOLOX we can just use [export script](https://github.com/Megvii-BaseDetection/YOLOX/blob/ac58e0a5e68e57454b7b9ac822aced493b553c53/tools/export_onnx.py). Particularly we need following commands:
 
 @code{.bash}
@@ -125,6 +127,20 @@ than YOLOX) in case it is needed. However, usually each YOLO repository has pred
     onnx.save(model_simp, args.output_name)
 @endcode
 
+#### Exporting YOLOv10 model
+
+In oder to run YOLOv10 one needs to cut off postporcessing with dynamic shapes from torch and then convert it to ONNX. If someone is looking for on how to cut off the postprocessing, there is this [forked branch](https://github.com/Abdurrahheem/yolov10/tree/ash/opencv-export) from official YOLOv10. The forked branch cuts of the postprocessing by [returning output](https://github.com/Abdurrahheem/yolov10/blob/4fdaafd912c8891642bfbe85751ea66ec20f05ad/ultralytics/nn/modules/head.py#L522) of the model before postprocessing procedure itself. To convert torch model to ONNX follow this proceduce.
+
+@code{.bash}
+git clone git@github.com:Abdurrahheem/yolov10.git
+conda create -n yolov10 python=3.9
+conda activate yolov10
+pip install -r requirements.txt
+python export_opencv.py --model=<model-name> --imgsz=<input-img-size>
+@endcode
+
+By default `--model="yolov10s"` and `--imgsz=(480,640)`. This will generate file `yolov10s.onnx`, which can be use for inference in OpenCV
+
 ### Running Yolo ONNX detector with OpenCV Sample
 
 Once we have our ONNX graph of the model, we just simply can run with OpenCV's sample. To that we need to make sure:
@@ -144,24 +160,25 @@ Once we have our ONNX graph of the model, we just simply can run with OpenCV's s
                                 --padvalue=<padding_value> \
                                 --paddingmode=<padding_mode> \
                                 --backend=<computation_backend> \
-                                --target=<target_computation_device>
+                                --target=<target_computation_device> \
+                                --width=<model_input_width> \
+                                --height=<model_input_height> \
 @endcode
 
-VIDEO DEMO:
-@youtube{NHtRlndE2cg}
-
 - --input: File path to your input image or video. If omitted, it will capture frames from a camera.
 - --classes: File path to a text file containing class names for object detection.
 - --thr: Confidence threshold for detection (e.g., 0.5).
 - --nms: Non-maximum suppression threshold (e.g., 0.4).
 - --mean: Mean normalization value (e.g., 0.0 for no mean normalization).
-- --scale: Scale factor for input normalization (e.g., 1.0).
+- --scale: Scale factor for input normalization (e.g., 1.0, 1/255.0, etc).
 - --yolo: YOLO model version (e.g., YOLOv3, YOLOv4, etc.).
 - --padvalue: Padding value used in pre-processing (e.g., 114.0).
 - --paddingmode: Method for handling image resizing and padding. Options: 0 (resize without extra processing), 1 (crop after resize), 2 (resize with aspect ratio preservation).
 - --backend: Selection of computation backend (0 for automatic, 1 for Halide, 2 for OpenVINO, etc.).
 - --target: Selection of target computation device (0 for CPU, 1 for OpenCL, etc.).
 - --device: Camera device number (0 for default camera). If `--input` is not provided camera with index 0 will used by default.
+- --width: Model input width. Not to be confused with the image width. (e.g., 416, 480, 640, 1280, etc).
+- --height: Model input height. Not to be confused with the image height. (e.g., 416, 480, 640, 1280, etc).
 
 Here `mean`, `scale`, `padvalue`, `paddingmode` should exactly match those that we discussed
 in pre-processing section in order for the model to match result in PyTorch
@@ -183,7 +200,8 @@ cd <build directory of OpenCV>
 ./bin/example_dnn_yolo_detector
 @endcode
 
-This will execute the YOLOX detector with your camera. For YOLOv8 (for instance), follow these additional steps:
+This will execute the YOLOX detector with your camera.
+For YOLOv8 (for instance), follow these additional steps:
 
 @code{.sh}
 cd opencv_extra/testdata/dnn
@@ -195,6 +213,23 @@ cd <build directory of OpenCV>
 ./bin/example_dnn_yolo_detector --model=onnx/models/yolov8n.onnx --yolo=yolov8 --mean=0.0 --scale=0.003921568627 --paddingmode=2 --padvalue=144.0 --thr=0.5 --nms=0.4 --rgb=0
 @endcode
 
+For YOLOv10, follow these steps:
+
+@code{.sh}
+cd opencv_extra/testdata/dnn
+python download_models.py yolov10
+cd ..
+export OPENCV_TEST_DATA_PATH=$(pwd)
+cd <build directory of OpenCV>
+
+./bin/example_dnn_yolo_detector --model=onnx/models/yolov8n.onnx --yolo=yolov10 --width=640 --height=480  --scale=0.003921568627 --padvalue=114
+@endcode
+
+This will run `YOLOv10` detector on first camera found on your system. If you want to run it on a image/video file, you can use `--input` option to specify the path to the file.
+
+
+VIDEO DEMO:
+@youtube{NHtRlndE2cg}
 
 ### Building a Custom Pipeline
 
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
index 82b10fb1ba..35ac0be56a 100644
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -19,7 +19,8 @@ void yoloPostProcessing(
     std::vector<Rect2d>& keep_boxes,
     float conf_threshold,
     float iou_threshold,
-    const std::string& test_name);
+    const std::string& model_name,
+    const int nc=80);
 
 template<typename TString>
 static std::string _tf(TString filename, bool required = true)
@@ -2670,7 +2671,8 @@ void yoloPostProcessing(
     std::vector<Rect2d>& keep_boxes,
     float conf_threshold,
     float iou_threshold,
-    const std::string& test_name
+    const std::string& model_name,
+    const int nc
 ){
 
     // Retrieve
@@ -2678,11 +2680,13 @@ void yoloPostProcessing(
     std::vector<float> confidences;
     std::vector<Rect2d> boxes;
 
-    if (test_name == "yolov8"){
+    if (model_name == "yolov8" || model_name == "yolov10" ||
+        model_name == "yolov9")
+    {
         cv::transposeND(outs[0], {0, 2, 1}, outs[0]);
     }
 
-    if (test_name == "yolonas"){
+    if (model_name == "yolonas"){
         // outs contains 2 elemets of shape [1, 8400, 80] and [1, 8400, 4]. Concat them to get [1, 8400, 84]
         Mat concat_out;
         // squeeze the first dimension
@@ -2696,22 +2700,27 @@ void yoloPostProcessing(
         outs[0] = outs[0].reshape(0, std::vector<int>{1, 8400, 84});
     }
 
+    // assert if last dim is 85 or 84
+    CV_CheckEQ(outs[0].dims, 3, "Invalid output shape. The shape should be [1, #anchors, 85 or 84]");
+    CV_CheckEQ((outs[0].size[2] == nc + 5 || outs[0].size[2] == 80 + 4), true, "Invalid output shape: ");
+
     for (auto preds : outs){
 
         preds = preds.reshape(1, preds.size[1]); // [1, 8400, 85] -> [8400, 85]
         for (int i = 0; i < preds.rows; ++i)
         {
             // filter out non object
-            float obj_conf = (test_name == "yolov8" || test_name == "yolonas") ? 1.0f : preds.at<float>(i, 4) ;
+            float obj_conf = (model_name == "yolov8" || model_name == "yolonas" ||
+                              model_name == "yolov9" || model_name == "yolov10") ? 1.0f : preds.at<float>(i, 4) ;
             if (obj_conf < conf_threshold)
                 continue;
 
-            Mat scores = preds.row(i).colRange((test_name == "yolov8" || test_name == "yolonas") ? 4 : 5, preds.cols);
+            Mat scores = preds.row(i).colRange((model_name == "yolov8" || model_name == "yolonas" || model_name == "yolov9" || model_name == "yolov10") ? 4 : 5, preds.cols);
             double conf;
             Point maxLoc;
             minMaxLoc(scores, 0, &conf, 0, &maxLoc);
 
-            conf = (test_name == "yolov8" || test_name == "yolonas") ? conf : conf * obj_conf;
+            conf = (model_name == "yolov8" || model_name == "yolonas" || model_name == "yolov9" || model_name == "yolov10") ? conf : conf * obj_conf;
             if (conf < conf_threshold)
                 continue;
 
@@ -2722,15 +2731,14 @@ void yoloPostProcessing(
             double w = det[2];
             double h = det[3];
 
-            // std::cout << "cx: " << cx << " cy: " << cy << " w: " << w << " h: " << h << " conf: " << conf << " idx: " << maxLoc.x << std::endl;
             // [x1, y1, x2, y2]
-            if (test_name == "yolonas"){
+            if (model_name == "yolonas" || model_name == "yolov10"){
                 boxes.push_back(Rect2d(cx, cy, w, h));
             } else {
                 boxes.push_back(Rect2d(cx - 0.5 * w, cy - 0.5 * h,
                                         cx + 0.5 * w, cy + 0.5 * h));
             }
-           classIds.push_back(maxLoc.x);
+            classIds.push_back(maxLoc.x);
             confidences.push_back(conf);
         }
     }
@@ -2747,7 +2755,75 @@ void yoloPostProcessing(
     }
 }
 
+TEST_P(Test_ONNX_nets, YOLOv10)
+{
+
+    std::string weightPath = _tf("models/yolov10s.onnx", false);
+
+    Size targetSize{640, 480};
+    float conf_threshold = 0.50;
+    float iou_threshold = 0.50;
+
+    std::vector<int> refClassIds{1, 16, 7};
+    std::vector<float> refScores{0.9510f, 0.9454f, 0.8404f};
+
+    std::vector<Rect2d> refBoxes{
+        Rect2d(105.5014, 112.8838, 472.9274, 350.0603),
+        Rect2d(109.8231, 185.7994, 258.5916, 452.9302),
+        Rect2d(388.5018,  62.1034, 576.6399, 143.3986)
+        };
+
+    Image2BlobParams imgParams(
+        Scalar::all(1 / 255.0),
+        targetSize,
+        Scalar::all(0),
+        true,
+        CV_32F,
+        DNN_LAYOUT_NCHW,
+        DNN_PMODE_LETTERBOX,
+        Scalar::all(114)
+        );
 
+    testYOLO(
+        weightPath, refClassIds, refScores, refBoxes,
+        imgParams, conf_threshold, iou_threshold,
+        1.0e-4, 1.0e-4, "yolov10");
+}
+
+TEST_P(Test_ONNX_nets, YOLOv9)
+{
+
+    std::string weightPath = _tf("models/yolov9t.onnx", false);
+
+    Size targetSize{640, 480};
+    float conf_threshold = 0.50;
+    float iou_threshold = 0.50;
+
+    std::vector<int> refClassIds{1, 16, 2}; // wrong class mapping for yolov9
+    std::vector<float> refScores{0.959274f, 0.901125, 0.559396f};
+
+    std::vector<Rect2d> refBoxes{
+        Rect2d(106.255, 107.927, 472.497, 350.309),
+        Rect2d(108.633, 185.256, 259.287, 450.672),
+        Rect2d(390.701, 62.1454, 576.928, 141.795)
+        };
+
+    Image2BlobParams imgParams(
+        Scalar::all(1 / 255.0),
+        targetSize,
+        Scalar::all(0),
+        true,
+        CV_32F,
+        DNN_LAYOUT_NCHW,
+        DNN_PMODE_LETTERBOX,
+        Scalar::all(114)
+        );
+
+    testYOLO(
+        weightPath, refClassIds, refScores, refBoxes,
+        imgParams, conf_threshold, iou_threshold,
+        1.0e-4, 1.0e-4, "yolov9");
+}
 TEST_P(Test_ONNX_nets, YOLOX)
 {
     applyTestTag(CV_TEST_TAG_DEBUG_VERYLONG);
diff --git a/samples/dnn/yolo_detector.cpp b/samples/dnn/yolo_detector.cpp
index b439b0d4bc..bd82acff4a 100644
--- a/samples/dnn/yolo_detector.cpp
+++ b/samples/dnn/yolo_detector.cpp
@@ -27,7 +27,8 @@ void yoloPostProcessing(
     std::vector<Rect2d>& keep_boxes,
     float conf_threshold,
     float iou_threshold,
-    const std::string& test_name
+    const std::string& model_name,
+    const int nc
 );
 
 std::vector<std::string> classes;
@@ -40,6 +41,7 @@ std::string keys =
     "{ yolo        | yolox | yolo model version. }"
     "{ input i     | | Path to input image or video file. Skip this argument to capture frames from a camera. }"
     "{ classes     | | Optional path to a text file with names of classes to label detected objects. }"
+    "{ nc          | 80 | Number of classes. Default is 80 (coming from COCO dataset). }"
     "{ thr         | .5 | Confidence threshold. }"
     "{ nms         | .4 | Non-maximum suppression threshold. }"
     "{ mean        | 0.0 | Normalization constant. }"
@@ -107,19 +109,21 @@ void yoloPostProcessing(
     std::vector<Rect2d>& keep_boxes,
     float conf_threshold,
     float iou_threshold,
-    const std::string& test_name)
+    const std::string& model_name,
+    const int nc=80)
 {
     // Retrieve
     std::vector<int> classIds;
     std::vector<float> confidences;
     std::vector<Rect2d> boxes;
 
-    if (test_name == "yolov8")
+    if (model_name == "yolov8" || model_name == "yolov10" ||
+        model_name == "yolov9")
     {
         cv::transposeND(outs[0], {0, 2, 1}, outs[0]);
     }
 
-    if (test_name == "yolonas")
+    if (model_name == "yolonas")
     {
         // outs contains 2 elemets of shape [1, 8400, 80] and [1, 8400, 4]. Concat them to get [1, 8400, 84]
         Mat concat_out;
@@ -131,25 +135,30 @@ void yoloPostProcessing(
         // remove the second element
         outs.pop_back();
         // unsqueeze the first dimension
-        outs[0] = outs[0].reshape(0, std::vector<int>{1, 8400, 84});
+        outs[0] = outs[0].reshape(0, std::vector<int>{1, 8400, nc + 4});
     }
 
+    // assert if last dim is 85 or 84
+    CV_CheckEQ(outs[0].dims, 3, "Invalid output shape. The shape should be [1, #anchors, 85 or 84]");
+    CV_CheckEQ((outs[0].size[2] == nc + 5 || outs[0].size[2] == 80 + 4), true, "Invalid output shape: ");
+
     for (auto preds : outs)
     {
         preds = preds.reshape(1, preds.size[1]); // [1, 8400, 85] -> [8400, 85]
         for (int i = 0; i < preds.rows; ++i)
         {
             // filter out non object
-            float obj_conf = (test_name == "yolov8" || test_name == "yolonas") ? 1.0f : preds.at<float>(i, 4) ;
+            float obj_conf = (model_name == "yolov8" || model_name == "yolonas" ||
+                              model_name == "yolov9" || model_name == "yolov10") ? 1.0f : preds.at<float>(i, 4) ;
             if (obj_conf < conf_threshold)
                 continue;
 
-            Mat scores = preds.row(i).colRange((test_name == "yolov8" || test_name == "yolonas") ? 4 : 5, preds.cols);
+            Mat scores = preds.row(i).colRange((model_name == "yolov8" || model_name == "yolonas" || model_name == "yolov9" || model_name == "yolov10") ? 4 : 5, preds.cols);
             double conf;
             Point maxLoc;
             minMaxLoc(scores, 0, &conf, 0, &maxLoc);
 
-            conf = (test_name == "yolov8" || test_name == "yolonas") ? conf : conf * obj_conf;
+            conf = (model_name == "yolov8" || model_name == "yolonas" || model_name == "yolov9" || model_name == "yolov10") ? conf : conf * obj_conf;
             if (conf < conf_threshold)
                 continue;
 
@@ -161,7 +170,7 @@ void yoloPostProcessing(
             double h = det[3];
 
             // [x1, y1, x2, y2]
-            if (test_name == "yolonas"){
+            if (model_name == "yolonas" || model_name == "yolov10"){
                 boxes.push_back(Rect2d(cx, cy, w, h));
             } else {
                 boxes.push_back(Rect2d(cx - 0.5 * w, cy - 0.5 * h,
@@ -203,6 +212,7 @@ int main(int argc, char** argv)
     // if model is default, use findFile to get the full path otherwise use the given path
     std::string weightPath = findFile(parser.get<String>("model"));
     std::string yolo_model = parser.get<String>("yolo");
+    int nc = parser.get<int>("nc");
 
     float confThreshold = parser.get<float>("thr");
     float nmsThreshold = parser.get<float>("nms");
@@ -219,6 +229,7 @@ int main(int argc, char** argv)
     // check if yolo model is valid
     if (yolo_model != "yolov5" && yolo_model != "yolov6"
         && yolo_model != "yolov7" && yolo_model != "yolov8"
+        && yolo_model != "yolov10" && yolo_model !="yolov9"
         && yolo_model != "yolox" && yolo_model != "yolonas")
         CV_Error(Error::StsError, "Invalid yolo model: " + yolo_model);
 
@@ -331,7 +342,8 @@ int main(int argc, char** argv)
         yoloPostProcessing(
             outs, keep_classIds, keep_confidences, keep_boxes,
             confThreshold, nmsThreshold,
-            yolo_model);
+            yolo_model,
+            nc);
         //![postprocess]
 
         // covert Rect2d to Rect

From a7fd9446cf5deddb9394cad35a60164c245f6b98 Mon Sep 17 00:00:00 2001
From: Yuantao Feng <yuantao.feng@opencv.org.cn>
Date: Wed, 3 Jul 2024 15:09:05 +0800
Subject: [PATCH 08/39] Merge pull request #25630 from
 fengyuentau:nary-multi-thread

dnn: parallelize nary elementwise forward implementation & enable related conformance tests #25630

This PR introduces the following changes:

- [x] Parallelize binary forward impl
- [x] Parallelize ternary forward impl (Where)
- [x] Parallelize nary (Operator that can take >=1 operands)
- [x] Enable conformance tests if workable

## Performance

### i7-12700K, RAM 64GB, Ubuntu 22.04

```
Geometric mean (ms)

                Name of Test                     opencv        opencv        opencv
                                                  perf          perf          perf
                                              core.x64.0606 core.x64.0606 core.x64.0606
                                                                               vs
                                                                             opencv
                                                                              perf
                                                                          core.x64.0606
                                                                           (x-factor)
NCHW_C_sum::Layer_NaryEltwise::OCV/CPU           16.116        11.161         1.44
NCHW_NCHW_add::Layer_NaryEltwise::OCV/CPU        17.469        11.446         1.53
NCHW_NCHW_div::Layer_NaryEltwise::OCV/CPU        17.531        11.469         1.53
NCHW_NCHW_equal::Layer_NaryEltwise::OCV/CPU      28.653        13.682         2.09
NCHW_NCHW_greater::Layer_NaryEltwise::OCV/CPU    21.899        13.422         1.63
NCHW_NCHW_less::Layer_NaryEltwise::OCV/CPU       21.738        13.185         1.65
NCHW_NCHW_max::Layer_NaryEltwise::OCV/CPU        16.172        11.473         1.41
NCHW_NCHW_mean::Layer_NaryEltwise::OCV/CPU       16.309        11.565         1.41
NCHW_NCHW_min::Layer_NaryEltwise::OCV/CPU        16.166        11.454         1.41
NCHW_NCHW_mul::Layer_NaryEltwise::OCV/CPU        16.157        11.443         1.41
NCHW_NCHW_pow::Layer_NaryEltwise::OCV/CPU        163.459       15.234         10.73
NCHW_NCHW_ref_div::Layer_NaryEltwise::OCV/CPU    10.880        10.868         1.00
NCHW_NCHW_ref_max::Layer_NaryEltwise::OCV/CPU    10.947        11.058         0.99
NCHW_NCHW_ref_min::Layer_NaryEltwise::OCV/CPU    10.948        10.910         1.00
NCHW_NCHW_ref_mul::Layer_NaryEltwise::OCV/CPU    10.874        10.871         1.00
NCHW_NCHW_ref_sum::Layer_NaryEltwise::OCV/CPU    10.971        10.920         1.00
NCHW_NCHW_sub::Layer_NaryEltwise::OCV/CPU        17.546        11.462         1.53
NCHW_NCHW_sum::Layer_NaryEltwise::OCV/CPU        16.175        11.475         1.41
NHWC_C::Layer_NaryEltwise::OCV/CPU               11.339        11.333         1.00
NHWC_H::Layer_NaryEltwise::OCV/CPU               16.154        11.102         1.46
```

### Apple M1, RAM 16GB, macOS 14.4.1

```
Geometric mean (ms)

                Name of Test                     opencv          opencv             opencv
                                                  perf            perf               perf
                                              core.m1.0606 core.m1.0606.patch core.m1.0606.patch
                                                                                      vs
                                                                                    opencv
                                                                                     perf
                                                                                 core.m1.0606
                                                                                  (x-factor)
NCHW_C_sum::Layer_NaryEltwise::OCV/CPU           28.418          3.768               7.54
NCHW_NCHW_add::Layer_NaryEltwise::OCV/CPU        6.942           5.679               1.22
NCHW_NCHW_div::Layer_NaryEltwise::OCV/CPU        5.822           5.653               1.03
NCHW_NCHW_equal::Layer_NaryEltwise::OCV/CPU      5.751           5.628               1.02
NCHW_NCHW_greater::Layer_NaryEltwise::OCV/CPU    5.797           5.599               1.04
NCHW_NCHW_less::Layer_NaryEltwise::OCV/CPU       7.272           5.578               1.30
NCHW_NCHW_max::Layer_NaryEltwise::OCV/CPU        5.777           5.562               1.04
NCHW_NCHW_mean::Layer_NaryEltwise::OCV/CPU       5.819           5.559               1.05
NCHW_NCHW_min::Layer_NaryEltwise::OCV/CPU        5.830           5.574               1.05
NCHW_NCHW_mul::Layer_NaryEltwise::OCV/CPU        5.759           5.567               1.03
NCHW_NCHW_pow::Layer_NaryEltwise::OCV/CPU       342.260          74.655              4.58
NCHW_NCHW_ref_div::Layer_NaryEltwise::OCV/CPU    8.338           8.280               1.01
NCHW_NCHW_ref_max::Layer_NaryEltwise::OCV/CPU    8.359           8.309               1.01
NCHW_NCHW_ref_min::Layer_NaryEltwise::OCV/CPU    8.412           8.295               1.01
NCHW_NCHW_ref_mul::Layer_NaryEltwise::OCV/CPU    8.380           8.297               1.01
NCHW_NCHW_ref_sum::Layer_NaryEltwise::OCV/CPU    8.356           8.323               1.00
NCHW_NCHW_sub::Layer_NaryEltwise::OCV/CPU        6.818           5.561               1.23
NCHW_NCHW_sum::Layer_NaryEltwise::OCV/CPU        5.805           5.570               1.04
NHWC_C::Layer_NaryEltwise::OCV/CPU               3.834           4.817               0.80
NHWC_H::Layer_NaryEltwise::OCV/CPU               28.402          3.771               7.53
```

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 modules/dnn/src/cuda/eltwise_ops.cu           |   7 +
 modules/dnn/src/cuda/functors.hpp             |  15 +
 .../dnn/src/cuda4dnn/kernels/eltwise_ops.hpp  |   3 +
 .../dnn/src/cuda4dnn/primitives/eltwise.hpp   |  11 +-
 .../dnn/src/layers/nary_eltwise_layers.cpp    | 757 ++++++++++--------
 modules/dnn/src/onnx/onnx_importer.cpp        |  10 +-
 modules/dnn/test/test_onnx_conformance.cpp    |  15 +-
 ...rmance_layer_filter__cuda_denylist.inl.hpp |  12 -
 ...e_layer_filter__cuda_fp16_denylist.inl.hpp |  19 +
 ...conformance_layer_filter__openvino.inl.hpp |  14 +-
 ...ance_layer_filter__vulkan_denylist.inl.hpp |   3 +
 ...e_layer_filter_opencv_all_denylist.inl.hpp |   2 +-
 ...er_filter_opencv_ocl_fp16_denylist.inl.hpp |   1 +
 ..._conformance_layer_parser_denylist.inl.hpp |  98 +--
 14 files changed, 563 insertions(+), 404 deletions(-)
 create mode 100644 modules/dnn/test/test_onnx_conformance_layer_filter__cuda_fp16_denylist.inl.hpp

diff --git a/modules/dnn/src/cuda/eltwise_ops.cu b/modules/dnn/src/cuda/eltwise_ops.cu
index e2a7cc9a67..2949782138 100644
--- a/modules/dnn/src/cuda/eltwise_ops.cu
+++ b/modules/dnn/src/cuda/eltwise_ops.cu
@@ -350,6 +350,11 @@ void eltwise_fmod_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x,
     eltwise_op<T, FModFunctor<T>>(stream, output, x, y);
 }
 
+template <class T>
+void eltwise_pow_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x, TensorView<T> y) {
+    eltwise_op<T, PowFunctor<T>>(stream, output, x, y);
+}
+
 #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
     template void eltwise_mod_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
     template void eltwise_fmod_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
@@ -360,6 +365,7 @@ void eltwise_fmod_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x,
     template void eltwise_sum_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
     template void eltwise_max_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
     template void eltwise_min_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
+    template void eltwise_pow_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
 #endif
     template void eltwise_mod_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
     template void eltwise_fmod_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
@@ -370,5 +376,6 @@ void eltwise_fmod_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x,
     template void eltwise_sum_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
     template void eltwise_max_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
     template void eltwise_min_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
+    template void eltwise_pow_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
 
 }}}} /* namespace cv::dnn::cuda4dnn::kernels */
diff --git a/modules/dnn/src/cuda/functors.hpp b/modules/dnn/src/cuda/functors.hpp
index cada43387e..5aa271bdf4 100644
--- a/modules/dnn/src/cuda/functors.hpp
+++ b/modules/dnn/src/cuda/functors.hpp
@@ -833,6 +833,21 @@ struct FModFunctor {
     }
 };
 
+template <class T>
+struct PowFunctor {
+    struct Params {
+        CUDA4DNN_HOST_DEVICE Params() {}
+    };
+
+    CUDA4DNN_DEVICE PowFunctor() { }
+    CUDA4DNN_DEVICE PowFunctor(const Params& params) { }
+
+    CUDA4DNN_DEVICE T operator()(T x, T y) {
+        using csl::device::pow;
+        return pow(x, y);
+    }
+};
+
 }}}} /* namespace cv::dnn::cuda4dnn::kernels */
 
 #endif /* OPENCV_DNN_SRC_CUDA_FUNCTORS_HPP */
diff --git a/modules/dnn/src/cuda4dnn/kernels/eltwise_ops.hpp b/modules/dnn/src/cuda4dnn/kernels/eltwise_ops.hpp
index e80db943ae..452d23da64 100644
--- a/modules/dnn/src/cuda4dnn/kernels/eltwise_ops.hpp
+++ b/modules/dnn/src/cuda4dnn/kernels/eltwise_ops.hpp
@@ -39,6 +39,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
     template <class T>
     void eltwise_fmod_2(const csl::Stream& stream, csl::TensorSpan<T> output, csl::TensorView<T> x, csl::TensorView<T> y);
 
+    template <class T>
+    void eltwise_pow_2(const csl::Stream& stream, csl::TensorSpan<T> output, csl::TensorView<T> x, csl::TensorView<T> y);
+
 }}}} /* namespace cv::dnn::cuda4dnn::kernels */
 
 #endif /* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_ELTWISE_OPS_HPP */
diff --git a/modules/dnn/src/cuda4dnn/primitives/eltwise.hpp b/modules/dnn/src/cuda4dnn/primitives/eltwise.hpp
index 5822f48061..1dfab63136 100644
--- a/modules/dnn/src/cuda4dnn/primitives/eltwise.hpp
+++ b/modules/dnn/src/cuda4dnn/primitives/eltwise.hpp
@@ -30,6 +30,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
         SUB,
         MOD,
         FMOD,
+        POW,
     };
 
     class EltwiseOpBase : public CUDABackendNode {
@@ -62,7 +63,6 @@ namespace cv { namespace dnn { namespace cuda4dnn {
             const std::vector<cv::Ptr<BackendWrapper>>& outputs,
             csl::Workspace& workspace) override
         {
-            CV_Assert(inputs.size() >= 2);
             CV_Assert(outputs.size() == 1);
 
             CV_Assert(coeffs.size() == 0 || op == EltwiseOpType::SUM);
@@ -94,9 +94,13 @@ namespace cv { namespace dnn { namespace cuda4dnn {
                 case EltwiseOpType::SUB: kernels::eltwise_sub_2<T>(stream, output, input_x, input_y); break;
                 case EltwiseOpType::MOD: kernels::eltwise_mod_2<T>(stream, output, input_x, input_y); break;
                 case EltwiseOpType::FMOD: kernels::eltwise_fmod_2<T>(stream, output, input_x, input_y); break;
+                case EltwiseOpType::POW: kernels::eltwise_pow_2<T>(stream, output, input_x, input_y); break;
                 }
-            }
-            else
+            } else if (inputs.size() == 1) {
+                auto input_wrapper_0 = inputs[0].dynamicCast<wrapper_type>();
+                auto input_0 = input_wrapper_0->getView();
+                csl::tensor_ops::copy(stream, output, input_0);
+            } else
             {
                 auto input_wrapper_0 = inputs[0].dynamicCast<wrapper_type>();
                 auto input_0 = input_wrapper_0->getView();
@@ -128,6 +132,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
                     case EltwiseOpType::SUB: kernels::eltwise_sub_2<T>(stream, output, output, input); break;
                     case EltwiseOpType::MOD: kernels::eltwise_mod_2<T>(stream, output, output, input); break;
                     case EltwiseOpType::FMOD: kernels::eltwise_fmod_2<T>(stream, output, output, input); break;
+                    case EltwiseOpType::POW: kernels::eltwise_pow_2<T>(stream, output, output, input); break;
                     }
                 }
             }
diff --git a/modules/dnn/src/layers/nary_eltwise_layers.cpp b/modules/dnn/src/layers/nary_eltwise_layers.cpp
index e3a8b2a583..659e7e29a8 100644
--- a/modules/dnn/src/layers/nary_eltwise_layers.cpp
+++ b/modules/dnn/src/layers/nary_eltwise_layers.cpp
@@ -44,13 +44,11 @@ public:
     std::vector<int> all_ndims;
     std::vector<std::vector<int>> orig_shapes;
     std::vector<std::vector<size_t>> orig_steps;
-    std::vector<char*> ptrs;
     std::vector<std::vector<int>> shapes;
     std::vector<std::vector<size_t>> steps;
     std::vector<size_t> elemsize;
 
-    NaryEltwiseHelper() {
-    }
+    NaryEltwiseHelper() {}
 
     void init(const std::vector<Mat>& inputs, const std::vector<Mat>& outputs)
     {
@@ -59,7 +57,6 @@ public:
         all_ndims.clear();
         orig_shapes.clear();
         orig_steps.clear();
-        ptrs.clear();
         shapes.clear();
         steps.clear();
         elemsize.clear();
@@ -81,7 +78,6 @@ public:
 
         shapes = std::vector<std::vector<int>>(narrays, std::vector<int>(max_ndims, 0));
         steps = std::vector<std::vector<size_t>>(narrays, std::vector<size_t>(max_ndims, 0));
-        ptrs = std::vector<char*>(narrays, nullptr);
 
         for(i = 0; i <= ninputs; i++) {
             all_ndims.push_back(i == 0 ? out_ndims : inp_ndims[i-1]);
@@ -183,6 +179,7 @@ public:
                 this->shapes[k][i] = 1;
             }
         }
+
         return true;
     }
 };
@@ -288,7 +285,7 @@ public:
 #ifdef HAVE_VULKAN
         if (backendId == DNN_BACKEND_VKCOM)
             return op == OPERATION::ADD || op == OPERATION::PROD || op == OPERATION::SUB ||
-                   op == OPERATION::DIV ;
+                   op == OPERATION::DIV;
 #endif
 
         if (backendId == DNN_BACKEND_CUDA) {
@@ -333,8 +330,16 @@ public:
         inputs_arr.getMatVector(inputs);
         outputs_arr.getMatVector(outputs);
 
+        if (op != OPERATION::POW) {
+            for (size_t i = 0; i < inputs.size(); i++) {
+                if (inputs[i].depth() != outputs[0].depth()) {
+                    CV_Error(Error::BadDepth, cv::format("NaryEltwiseLayer: Data type mismatch, input %zu of type %d, output of type %d", i, inputs[i].depth(), outputs[0].depth()));
+                }
+            }
+        }
+
         helper.init(inputs, outputs);
-        CV_Assert(helper.prepare_for_broadcast_op());
+        CV_CheckTrue(helper.prepare_for_broadcast_op(), "NaryEltwiseLayer: Preparation for broadcasting failed");
     }
 
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@ -342,168 +347,234 @@ public:
                          std::vector<MatShape> &outputs,
                          std::vector<MatShape> &internals) const CV_OVERRIDE
     {
-        MatShape outShape = findCommonShape(inputs);
-        outputs.assign(1, outShape);
+        if (inputs.size() == 1) {
+            outputs.assign(1, inputs.front());
+        } else {
+            MatShape outShape = findCommonShape(inputs);
+            outputs.assign(1, outShape);
+        }
         return false;
     }
 
     template <typename T, typename Functor>
-    void binary_forward_impl(
-            int ndims, const std::vector<int>& shape,
-            const char* data1, const std::vector<size_t>& step1,
-            const char* data2, const std::vector<size_t>& step2,
-            char* data, const std::vector<size_t>& step,
-            const Functor& op)
-    {
+    void binary_forward_impl(const Functor& op, int ndims, const std::vector<int>& shape,
+                             const char* data1, const std::vector<size_t>& step1,
+                             const char* data2, const std::vector<size_t>& step2,
+                             char* data, const std::vector<size_t>& step, size_t block_size) {
         assert(ndims >= 2);
-        size_t dp1 = step1[ndims-1]/sizeof(T);
-        size_t dp2 = step2[ndims-1]/sizeof(T);
-        size_t dp = step[ndims-1]/sizeof(T);
-        int k, n1 = shape[ndims-1], n2 = shape[ndims-2];
-        size_t plane_idx, nplanes = 1;
-        for (k = 0; k < ndims-2; k++) nplanes *= shape[k];
-
-        for (plane_idx = 0; plane_idx < nplanes; plane_idx++) {
-            const char* ptr1_ = data1;
-            const char* ptr2_ = data2;
-            char* ptr_ = data;
-            size_t idx = plane_idx;
-            for (k = ndims-3; k >= 0; k--) {
-                size_t next_idx = idx/shape[k];
-                int i_k = (int)(idx - next_idx*shape[k]);
-                ptr1_ += i_k*step1[k];
-                ptr2_ += i_k*step2[k];
-                ptr_ += i_k*step[k];
-                idx = next_idx;
-            }
-            for (int i2 = 0; i2 < n2; i2++, ptr1_ += step1[ndims-2],
-                                            ptr2_ += step2[ndims-2],
-                                            ptr_ += step[ndims-2])
-            {
-                const T* ptr1 = (const T*)ptr1_;
-                const T* ptr2 = (const T*)ptr2_;
-                T* ptr = (T*)ptr_;
+        size_t dp1 = step1.back() / sizeof(T);
+        size_t dp2 = step2.back() / sizeof(T);
+        size_t dp = step.back() / sizeof(T);
+        int plane_size = shape.back();
+        int nplanes = std::accumulate(shape.begin(), shape.end() - 1, 1, std::multiplies<int>());
+
+        if (nplanes == 1) { // parallelize within the plane
+            const T* ptr1 = (const T*)data1;
+            const T* ptr2 = (const T*)data2;
+            T* ptr = (T*)data;
+            auto worker = [&](const Range &r) {
                 if (dp1 == 1 && dp2 == 1 && dp == 1) {
-                    for(int i1 = 0; i1 < n1; i1++)
-                        ptr[i1] = op(ptr1[i1], ptr2[i1]);
+                    for(int i = r.start; i < r.end; i++) {
+                        ptr[i] = op(ptr1[i], ptr2[i]);
+                    }
                 } else if (dp1 == 1 && dp2 == 0 && dp == 1){
                     T x2 = *ptr2;
-                    for(int i1 = 0; i1 < n1; i1++)
-                        ptr[i1] = op(ptr1[i1], x2);
+                    for(int i = r.start; i < r.end; i++) {
+                        ptr[i] = op(ptr1[i], x2);
+                    }
                 } else if (dp1 == 0 && dp2 == 1 && dp == 1){
                     T x1 = *ptr1;
-                    for(int i1 = 0; i1 < n1; i1++)
-                        ptr[i1] = op(x1, ptr2[i1]);
+                    for(int i = r.start; i < r.end; i++) {
+                        ptr[i] = op(x1, ptr2[i]);
+                    }
                 } else {
-                    for(int i1 = 0; i1 < n1; i1++, ptr1 += dp1, ptr2 += dp2, ptr += dp)
+                    for(int i = r.start; i < r.end; i++, ptr1 += dp1, ptr2 += dp2, ptr += dp) {
                         *ptr = op(*ptr1, *ptr2);
+                    }
                 }
-            }
+            };
+
+            double nstripes = plane_size * (1.0 / double(block_size));
+            parallel_for_(Range(0, plane_size), worker, nstripes);
+        } else { // parallelize across planes
+            auto worker = [&](const Range &r) {
+                for (int plane_idx = r.start; plane_idx < r.end; plane_idx++) {
+                    const char* ptr1_ = data1;
+                    const char* ptr2_ = data2;
+                    char* ptr_ = data;
+                    size_t idx = plane_idx;
+                    for (int k = ndims - 2; k >= 0; k--) {
+                        size_t next_idx = idx / shape[k];
+                        size_t i_k = (int)(idx - next_idx * shape[k]);
+                        ptr1_ += i_k * step1[k];
+                        ptr2_ += i_k * step2[k];
+                        ptr_ += i_k * step[k];
+                        idx = next_idx;
+                    }
+
+                    const T* ptr1 = (const T*)ptr1_;
+                    const T* ptr2 = (const T*)ptr2_;
+                    T* ptr = (T*)ptr_;
+                    if (dp1 == 1 && dp2 == 1 && dp == 1) {
+                        for(int i = 0; i < plane_size; i++) {
+                            ptr[i] = op(ptr1[i], ptr2[i]);
+                        }
+                    } else if (dp1 == 1 && dp2 == 0 && dp == 1){
+                        T x2 = *ptr2;
+                        for(int i = 0; i < plane_size; i++) {
+                            ptr[i] = op(ptr1[i], x2);
+                        }
+                    } else if (dp1 == 0 && dp2 == 1 && dp == 1){
+                        T x1 = *ptr1;
+                        for(int i = 0; i < plane_size; i++) {
+                            ptr[i] = op(x1, ptr2[i]);
+                        }
+                    } else {
+                        for(int i = 0; i < plane_size; i++, ptr1 += dp1, ptr2 += dp2, ptr += dp) {
+                            *ptr = op(*ptr1, *ptr2);
+                        }
+                    }
+                }
+            };
+            double nstripes = nplanes * (1.0 / double(block_size));
+            parallel_for_(Range(0, nplanes), worker, nstripes);
         }
     }
 
+    /*
+        Elementwise binary operator (like +, -, x, /, etc.) which takes two operands
+    */
     template <typename T, typename Functor>
-    void binary_forward(const Functor& f, const std::vector<Mat>& inputs, std::vector<Mat>& outputs)
-    {
+    void binary_forward(const Functor& f, const std::vector<Mat>& inputs, std::vector<Mat>& outputs, size_t block_size = 6e6) {
         const Mat& a = inputs[0];
         const Mat& b = inputs[1];
         Mat& out = outputs[0];
         CV_Assert(helper.shapes.size() == 3 && helper.steps.size() == 3);
-        binary_forward_impl<T, Functor>(
-                helper.max_ndims, helper.shapes[0], a.ptr<char>(), helper.steps[1],
-                b.ptr<char>(), helper.steps[2], out.ptr<char>(), helper.steps[0],
-                f);
+        binary_forward_impl<T, Functor>(f, helper.max_ndims, helper.shapes[0], a.ptr<char>(), helper.steps[1],
+                                        b.ptr<char>(), helper.steps[2], out.ptr<char>(), helper.steps[0], block_size);
     }
 
     template<typename T, typename Functor>
-    void nary_forward_impl(
-        const Functor& f, const T scale, int ninputs, int ndims, const std::vector<int>& shape,
-        const char** inp, char* out,
-        const std::vector<std::vector<size_t>>& steps, std::vector<char*>& ptrs)
-    {
+    void nary_forward_impl(const Functor& op, const T scale, int ninputs, int ndims, const std::vector<int>& shape,
+                           const char** inp, char* out, const std::vector<std::vector<size_t>>& steps, size_t block_size) {
         CV_Assert(ndims >= 2);
-        size_t dp = steps[0][ndims-1]/sizeof(T);
-        size_t dp1 = steps[1][ndims-1]/sizeof(T);
-        size_t dp2 = steps[2][ndims-1]/sizeof(T);
-
-        enum { BLOCK_SIZE = 1024 };
-        T blck[BLOCK_SIZE];
+        size_t dp  = steps[0].back() / sizeof(T);
+        size_t dp1 = steps[1].back() / sizeof(T);
+        size_t dp2 = steps[2].back() / sizeof(T);
 
-        int k, i, di1=0, n1 = shape[ndims-1], n2 = shape[ndims-2];
-        int second = ninputs == 1 ? 1 : 2;
-        size_t plane_idx, nplanes = 1;
-        for (k = 0; k < ndims-2; k++) nplanes *= shape[k];
+        int plane_size = shape.back();
+        int nplanes = std::accumulate(shape.begin(), shape.end() - 1, 1, std::multiplies<int>());
 
-        for (plane_idx = 0; plane_idx < nplanes; plane_idx++) {
+        if (nplanes == 1) { // parallelize within the plane
+            AutoBuffer<char> buf_ptrs(steps.size());
+            auto ptrs = (char**)buf_ptrs.data();
             ptrs[0] = out;
-            for (i = 0; i < ninputs; i++) ptrs[i+1] = (char*)inp[i];
-            size_t idx = plane_idx;
-            for (k = ndims-3; k >= 0; k--) {
-                size_t next_idx = idx/shape[k];
-                int i_k = (int)(idx - next_idx*shape[k]);
-                for (i = 0; i < ninputs; i++)
-                    ptrs[i] += i_k*steps[i][k];
-                idx = next_idx;
+            for (int i = 0; i < ninputs; i++) {
+                ptrs[i+1] = (char*)inp[i];
             }
-            for (int i2 = 0; i2 < n2; i2++)
-            {
-                const T* ptr1 = (const T*)(ptrs[1] + steps[1][ndims-2]*i2);
-                const T* ptr2 = (const T*)(ptrs[second] + steps[second][ndims-2]*i2);
-                T* ptr = (T*)(ptrs[0] + steps[0][ndims-2]*i2);
-                if (ninputs <= 2) {
-                    if (dp1 == 1 && dp2 == 1) {
-                        for (int i1 = 0; i1 < n1; i1++)
-                            ptr[i1] = saturate_cast<T>(f(ptr1[i1], ptr2[i1])*scale);
-                    } else {
-                        for(int i1 = 0; i1 < n1; i1++, ptr1 += dp1, ptr2 += dp2, ptr += dp)
-                            *ptr = saturate_cast<T>(f(*ptr1, *ptr2)*scale);
+            const T* ptr1 = (const T*)(ptrs[1]);
+            const T* ptr2 = (const T*)(ptrs[2]);
+            T* ptr = (T*)(ptrs[0]);
+            auto worker = [&](const Range &r) {
+                if (dp == 1 && dp1 == 1 && dp2 == 1) {
+                    for (int i = r.start; i < r.end; i++) {
+                        ptr[i] = op(ptr1[i], ptr2[i]);
                     }
-                } else {
-                    for (int i1 = 0; i1 < n1; i1 += di1, ptr += di1) {
-                        di1 = BLOCK_SIZE < n1-i1 ? BLOCK_SIZE : n1-i1;
-                        if (dp1 == 1 && dp2 == 1) {
-                            for (int j = 0; j < di1; j++)
-                                blck[j] = f(ptr1[j], ptr2[j]);
-                            ptr1 += di1;
-                            ptr2 += di1;
+                    for (int j = 2; j < ninputs; j++) {
+                        int dpj = steps[j + 1].back();
+                        const T* ptrj = (const T*)(ptrs[j + 1]);
+                        if (dpj == 1) {
+                            for (int i = r.start; i < r.end; i++) {
+                                ptr[i] = saturate_cast<T>(op(ptr[i], ptrj[i]) * scale);
+                            }
                         } else {
-                            for(int j = 0; j < di1; j++, ptr1 += dp1, ptr2 += dp2)
-                                blck[j] = f(*ptr1, *ptr2);
+                            for (int i = r.start; i < r.end; i++, ptrj += dpj) {
+                                ptr[i] = saturate_cast<T>(op(ptr[i], *ptrj) * scale);
+                            }
                         }
-                        for(i = 2; i < ninputs; i++) {
-                            int dp_i = steps[i+1][ndims-1]/sizeof(T);
-                            const T* ptr_i = (const T*)(ptrs[i+1] +
-                                    steps[i+1][ndims-2]*i2) + i1*dp_i;
-                            if (dp_i == 1) {
-                                if (i < ninputs-1) {
-                                    for (int j = 0; j < di1; j++)
-                                        blck[j] = f(blck[j], ptr_i[j]);
-                                } else {
-                                    for (int j = 0; j < di1; j++)
-                                        ptr[j] = saturate_cast<T>(f(blck[j], ptr_i[j]) * scale);
+                    }
+                } else {
+                    auto *tmp = ptr;
+                    for (int i = r.start; i < r.end; i++, ptr += dp, ptr1 += dp1, ptr2 += dp2) {
+                        *ptr = op(*ptr1, *ptr2);
+                    }
+                    ptr = tmp;
+                    for (int j = 2; j < ninputs; j++) {
+                        int dpj = steps[j + 1].back();
+                        const T* ptr_j = (const T*)(ptrs[j + 1]);
+                        for (int i = r.start; i < r.end; i++, ptr += dp, ptr_j += dpj) {
+                            *ptr = saturate_cast<T>(op(*ptr, *ptr_j) * scale);
+                        }
+                    }
+                }
+            };
+            double nstripes = plane_size * (1.0 / double(block_size));
+            parallel_for_(Range(0, plane_size), worker, nstripes);
+        } else { // parallelize across the plane
+            auto worker = [&](const Range &r) {
+                AutoBuffer<char> buf_ptrs(steps.size());
+                auto ptrs = (char**)buf_ptrs.data();
+                for (int plane_idx = r.start; plane_idx < r.end; plane_idx++) {
+                    ptrs[0] = out;
+                    for (int i = 0; i < ninputs; i++) ptrs[i+1] = (char*)inp[i];
+                    size_t idx = plane_idx;
+                    for (int k = ndims - 2; k >= 0; k--) {
+                        size_t next_idx = idx / shape[k];
+                        int i_k = (int)(idx - next_idx * shape[k]);
+                        for (int i = 0; i <= ninputs; i++) {
+                            ptrs[i] += i_k * steps[i][k];
+                        }
+                        idx = next_idx;
+                    }
+
+                    const T* ptr1 = (const T*)(ptrs[1]);
+                    const T* ptr2 = (const T*)(ptrs[2]);
+                    T* ptr = (T*)(ptrs[0]);
+                    if (dp == 1 && dp1 == 1 && dp2 == 1) {
+                        for (int i = 0; i < plane_size; i++) {
+                            ptr[i] = saturate_cast<T>(op(ptr1[i], ptr2[i]) * scale);
+                        }
+                        for (int j = 2; j < ninputs; j++) {
+                            int dpj = steps[j + 1].back();
+                            const T* ptrj = (const T*)(ptrs[j + 1]);
+                            if (dpj == 1) {
+                                for (int i = 0; i < plane_size; i++) {
+                                    ptr[i] = op(ptr[i], saturate_cast<T>(ptrj[i] * scale));
                                 }
                             } else {
-                                if (i < ninputs-1) {
-                                    for (int j = 0; j < di1; j++, ptr_i += dp_i)
-                                        blck[j] = f(blck[j], *ptr_i);
-                                } else {
-                                    for (int j = 0; j < di1; j++, ptr_i += dp_i)
-                                        ptr[j] = saturate_cast<T>(f(blck[j], *ptr_i) * scale);
+                                for (int i = 0; i < plane_size; i++, ptrj += dpj) {
+                                    ptr[i] = op(ptr[i], saturate_cast<T>(*ptrj * scale));
                                 }
                             }
                         }
+                    } else {
+                        auto *tmp = ptr;
+                        for (int i = 0; i < plane_size; i++, ptr += dp, ptr1 += dp1, ptr2 += dp2) {
+                            *ptr = saturate_cast<T>(op(*ptr1, *ptr2) * scale);
+                        }
+                        ptr = tmp;
+                        for (int j = 2; j < ninputs; j++) {
+                            int dpj = steps[j + 1].back();
+                            const T* ptrj = (const T*)(ptrs[j + 1]);
+                            for (int i = 0; i < plane_size; i++, ptr += dp, ptrj += dpj) {
+                                *ptr = op(*ptr, saturate_cast<T>(*ptrj * scale));
+                            }
+                        }
                     }
                 }
-            }
+            };
+            double nstripes = nplanes * (1.0 / double(block_size));
+            parallel_for_(Range(0, nplanes), worker, nstripes);
         }
     }
 
+    /*
+        Elementwise nary operator (like sum, mean, etc.) which takes at least one operand
+    */
     template <typename T, typename Functor>
-    void nary_forward(
-        const Functor& f, T scale,
-        const std::vector<Mat>& inputs, std::vector<Mat>& outputs
-        )
-    {
+    void nary_forward(const Functor& f, T scale,
+                      const std::vector<Mat>& inputs, std::vector<Mat>& outputs,
+                      size_t block_size = 6e6) {
         // collect all input info
         std::vector<const char*> v_inp;
         std::transform(inputs.begin(), inputs.end(), std::back_inserter(v_inp), [] (const Mat& m) { return m.template ptr<const char>(); });
@@ -512,13 +583,14 @@ public:
         // collect output info
         char* out = outputs[0].ptr<char>();
 
-        nary_forward_impl<T>(
-                f, scale, helper.ninputs, helper.max_ndims, helper.shapes[0], inp, out, helper.steps, helper.ptrs);
+        nary_forward_impl<T, Functor>(f, scale, helper.ninputs, helper.max_ndims, helper.shapes[0], inp, out, helper.steps, block_size);
     }
 
+    /*
+        Elementwise ternary operator (like where) which takes three operands
+    */
     template <typename T, typename Functor>
-    void trinary_forward(const Functor& f, const std::vector<Mat>& inputs, std::vector<Mat>& outputs)
-    {
+    void ternary_forward(const Functor& f, const std::vector<Mat>& inputs, std::vector<Mat>& outputs, size_t block_size = 6e6) {
         const Mat& a = inputs[0];
         const Mat& b = inputs[1];
         const Mat& c = inputs[2];
@@ -526,69 +598,112 @@ public:
 
         CV_Assert(helper.shapes.size() == 4 && helper.steps.size() == 4);
 
-        trinary_forward_impl<T, Functor>(
-                helper.max_ndims, helper.shapes[0], a.ptr<char>(), helper.steps[1], b.ptr<char>(), helper.steps[2],
-                c.ptr<char>(), helper.steps[3], out.ptr<char>(), helper.steps[0],
-                f);
+        ternary_forward_impl<T, Functor>(f, helper.max_ndims, helper.shapes[0],
+                                         a.ptr<char>(), helper.steps[1],
+                                         b.ptr<char>(), helper.steps[2],
+                                         c.ptr<char>(), helper.steps[3],
+                                         out.ptr<char>(), helper.steps[0], block_size);
     }
 
     template <typename T, typename Functor>
-    void trinary_forward_impl(
-            int ndims, const std::vector<int>& shape,
+    void ternary_forward_impl(
+            const Functor& op, int ndims, const std::vector<int>& shape,
             const char* data1, const std::vector<size_t>& step1,
             const char* data2, const std::vector<size_t>& step2,
             const char* data3, const std::vector<size_t>& step3,
-            char* data, const std::vector<size_t>& step,
-            const Functor& op)
-    {
-        assert(ndims >= 2);
-        size_t dp1 = step1[ndims-1]/sizeof(T);
-        size_t dp2 = step2[ndims-1]/sizeof(T);
-        size_t dp3 = step3[ndims-1]/sizeof(T);
-        size_t dp = step[ndims-1]/sizeof(T);
-        int k, n1 = shape[ndims-1], n2 = shape[ndims-2];
-        size_t plane_idx, nplanes = 1;
-        for (k = 0; k < ndims-2; k++) nplanes *= shape[k];
-
-        for (plane_idx = 0; plane_idx < nplanes; plane_idx++)
-        {
-            const char* ptr1_ = data1;
-            const char* ptr2_ = data2;
-            const char* ptr3_ = data3;
-            char* ptr_ = data;
-            size_t idx = plane_idx;
-            for (k = ndims-3; k >= 0; k--)
-            {
-                size_t next_idx = idx/shape[k];
-                int i_k = (int)(idx - next_idx*shape[k]);
-                ptr1_ += i_k*step1[k];
-                ptr2_ += i_k*step2[k];
-                ptr3_ += i_k*step3[k];
-                ptr_ += i_k*step[k];
-                idx = next_idx;
-            }
-
-            for (int i2 = 0; i2 < n2; i2++, ptr1_ += step1[ndims-2],
-                                            ptr2_ += step2[ndims-2],
-                                            ptr3_ += step3[ndims-2],
-                                            ptr_ += step[ndims-2])
-            {
-                const T* ptr1 = (const T*)ptr1_;
-                const T* ptr2 = (const T*)ptr2_;
-                const T* ptr3 = (const T*)ptr3_;
-                T* ptr = (T*)ptr_;
-
-                if (dp1 == 1 && dp2 == 1 && dp3 == 1 && dp == 1)
-                {
-                    for(int i1 = 0; i1 < n1; i1++)
-                        ptr[i1] = op(ptr1[i1], ptr2[i1], ptr3[i1]);
-                }
-                else
-                {
-                    for(int i1 = 0; i1 < n1; i1++, ptr1 += dp1, ptr2 += dp2, ptr3 += dp3, ptr += dp)
+            char* data, const std::vector<size_t>& step, size_t block_size) {
+        CV_Assert(ndims >= 2);
+        size_t dp1 = step1.back() / sizeof(T);
+        size_t dp2 = step2.back() / sizeof(T);
+        size_t dp3 = step3.back() / sizeof(T);
+        size_t dp = step.back() / sizeof(T);
+        int plane_size = shape.back();
+        int nplanes = std::accumulate(shape.begin(), shape.end() - 1, 1, std::multiplies<int>());
+
+        if (nplanes == 1) { // parallelize within the plane
+            const T *ptr1 = (const T*)data1;
+            const T *ptr2 = (const T*)data2;
+            const T *ptr3 = (const T*)data3;
+            T* ptr = (T*)data;
+            auto worker = [&](const Range &r) {
+                if (dp1 == 1 && dp2 == 1 && dp3 == 1 && dp == 1) {
+                    for (int i = r.start; i < r.end; i++) {
+                        ptr[i] = op(ptr1[i], ptr2[i], ptr3[i]);
+                    }
+                } else if (dp1 == 0 && dp2 == 1 && dp3 == 1 && dp == 1){
+                    T x1 = *ptr1;
+                    for (int i = r.start; i < r.end; i++) {
+                        ptr[i] = op(x1, ptr2[i], ptr3[i]);
+                    }
+                } else if (dp1 == 1 && dp2 == 0 && dp3 == 1 && dp == 1){
+                    T x2 = *ptr2;
+                    for (int i = r.start; i < r.end; i++) {
+                        ptr[i] = op(ptr1[i], x2, ptr3[i]);
+                    }
+                } else if (dp1 == 1 && dp2 == 1 && dp3 == 1 && dp == 1) {
+                    T x3 = *ptr3;
+                    for (int i = r.start; i < r.end; i++) {
+                        ptr[i] = op(ptr1[i], ptr2[i], x3);
+                    }
+                } else {
+                    for(int i = r.start; i < r.end; i++, ptr1 += dp1, ptr2 += dp2, ptr3 += dp3, ptr += dp) {
                         *ptr = op(*ptr1, *ptr2, *ptr3);
+                    }
                 }
-            }
+            };
+            double nstripes = plane_size * (1.0 / double(block_size));
+            parallel_for_(Range(0, plane_size), worker, nstripes);
+        } else { // parallelize across planes
+            auto worker = [&](const Range &r) {
+                for (int plane_idx = r.start; plane_idx < r.end; plane_idx++) {
+                    const char* ptr1_ = data1;
+                    const char* ptr2_ = data2;
+                    const char* ptr3_ = data3;
+                    char* ptr_ = data;
+                    size_t idx = plane_idx;
+                    for (int k = ndims - 2; k >= 0; k--)
+                    {
+                        size_t next_idx = idx / shape[k];
+                        int i_k = (int)(idx - next_idx * shape[k]);
+                        ptr1_ += i_k * step1[k];
+                        ptr2_ += i_k * step2[k];
+                        ptr3_ += i_k * step3[k];
+                        ptr_ += i_k * step[k];
+                        idx = next_idx;
+                    }
+
+                    const T *ptr1 = (const T*)ptr1_;
+                    const T *ptr2 = (const T*)ptr2_;
+                    const T *ptr3 = (const T*)ptr3_;
+                    T* ptr = (T*)ptr_;
+                    if (dp1 == 1 && dp2 == 1 && dp3 == 1 && dp == 1) {
+                        for (int i = 0; i < plane_size; i++) {
+                            ptr[i] = op(ptr1[i], ptr2[i], ptr3[i]);
+                        }
+                    } else if (dp1 == 0 && dp2 == 1 && dp3 == 1 && dp == 1){
+                        T x1 = *ptr1;
+                        for (int i = 0; i < plane_size; i++) {
+                            ptr[i] = op(x1, ptr2[i], ptr3[i]);
+                        }
+                    } else if (dp1 == 1 && dp2 == 0 && dp3 == 1 && dp == 1){
+                        T x2 = *ptr2;
+                        for (int i = 0; i < plane_size; i++) {
+                            ptr[i] = op(ptr1[i], x2, ptr3[i]);
+                        }
+                    } else if (dp1 == 1 && dp2 == 1 && dp3 == 1 && dp == 1) {
+                        T x3 = *ptr3;
+                        for (int i = 0; i < plane_size; i++) {
+                            ptr[i] = op(ptr1[i], ptr2[i], x3);
+                        }
+                    } else {
+                        for(int i = 0; i < plane_size; i++, ptr1 += dp1, ptr2 += dp2, ptr3 += dp3, ptr += dp) {
+                            *ptr = op(*ptr1, *ptr2, *ptr3);
+                        }
+                    }
+                }
+            };
+            double nstripes = nplanes * (1.0 / double(block_size));
+            parallel_for_(Range(0, nplanes), worker, nstripes);
         }
     }
 
@@ -608,143 +723,147 @@ public:
         inputs_arr.getMatVector(inputs);
         outputs_arr.getMatVector(outputs);
 
-        // TODO: assert types
+        if (inputs.size() == 1) {
+            inputs[0].copyTo(outputs[0]);
+            return;
+        }
+
         typeDispatch(outputs[0].type(), inputs.size(), inputs, outputs);
     }
 
     template<typename T, typename... Args>
     inline void opDispatch(size_t ninputs, Args&&... args)
     {
-        switch (op)
-        {
-            case OPERATION::EQUAL:
-            {
-                auto equal = [](const T &a, const T &b) { return a == b; };
-                binary_forward<T>(equal, std::forward<Args>(args)...);
-                break;
-            }
-            case OPERATION::GREATER:
-            {
-                auto greater = [](const T &a, const T &b) { return a > b; };
-                binary_forward<T>(greater, std::forward<Args>(args)...);
-                break;
-            }
-            case OPERATION::GREATER_EQUAL:
-            {
-                auto greater_equal = [](const T &a, const T &b) { return a >= b; };
-                binary_forward<T>(greater_equal, std::forward<Args>(args)...);
-                break;
-            }
-            case OPERATION::LESS:
-            {
-                auto less = [](const T &a, const T &b) { return a < b; };
-                binary_forward<T>(less, std::forward<Args>(args)...);
-                break;
-            }
-            case OPERATION::LESS_EQUAL:
-            {
-                auto less_equal = [](const T &a, const T &b) { return a <= b; };
-                binary_forward<T>(less_equal, std::forward<Args>(args)...);
-                break;
-            }
-            case OPERATION::POW:
-            {
-                auto pow = [] (const T& a, const T& b) { return std::pow(a, b); };
-                binary_forward<T>(pow, std::forward<Args>(args)...);
-                break;
-            }
-            case OPERATION::BITSHIFT:
-            {
-                auto bitshift = [] (const uint8_t &a, const uint8_t &b) { return a << b; };
-                binary_forward<T>(bitshift, std::forward<Args>(args)...);
-                break;
-            }
-            case OPERATION::MAX:
-            {
-                auto max = [](const T &a, const T &b) { return std::max(a, b); };
-                nary_forward<T>(max, T{1}, std::forward<Args>(args)...);
-                break;
-            }
-            case OPERATION::MEAN:
-            {
-                auto mean = [](const T &a, const T &b) { return (a + b) / T{2}; };
-                nary_forward<T>(mean, T{1} / ninputs, std::forward<Args>(args)...);
-                break;
-            }
-            case OPERATION::MIN:
-            {
-                auto min = [](const T &a, const T &b) { return std::min(a, b); };
-                nary_forward<T>(min, T{1}, std::forward<Args>(args)...);
-                break;
-            }
-            case OPERATION::MOD:
-            {
-                auto mod = [] (const T &a, const T &b) { return static_cast<T>(_mod(int(a), int(b))); };
-                binary_forward<T>(mod, std::forward<Args>(args)...);
-                break;
-            }
-            case OPERATION::FMOD:
-            {
-                auto fmod = [](const T &a, const T &b) { return std::fmod(a, b); };
-                binary_forward<T>(fmod, std::forward<Args>(args)...);
-                break;
-            }
-            case OPERATION::PROD:
-            {
-                auto prod = [](const T &a, const T &b) { return a * b; };
-                binary_forward<T>(prod, std::forward<Args>(args)...);
-                break;
-            }
-            case OPERATION::SUB:
-            {
-                auto sub = [](const T &a, const T &b) { return a - b; };
-                binary_forward<T>(sub, std::forward<Args>(args)...);
-                break;
-            }
-            case OPERATION::SUM:
-            {
-                auto sum = [](const T &a, const T &b) { return a + b; };
-                nary_forward<T>(sum, T{1}, std::forward<Args>(args)...);
-                break;
-            }
-            case OPERATION::ADD:
-            {
-                auto add = [](const T &a, const T &b) { return a + b; };
-                binary_forward<T>(add, std::forward<Args>(args)...);
-                break;
-            }
-            case OPERATION::DIV:
-            {
-                auto div = [](const T &a, const T &b) { return a / b; };
-                binary_forward<T>(div, std::forward<Args>(args)...);
-                break;
-            }
-            case OPERATION::AND:
-            {
-                auto op_and = [](const uint8_t &a, const uint8_t &b) { return a & b; };
-                binary_forward<T>(op_and, std::forward<Args>(args)...);
-                break;
-            }
-            case OPERATION::OR:
-            {
-                auto op_or = [](const uint8_t &a, const uint8_t &b) { return a | b; };
-                binary_forward<T>(op_or, std::forward<Args>(args)...);
-                break;
-            }
-            case OPERATION::XOR:
-            {
-                auto op_xor = [](const uint8_t &a, const uint8_t &b) { return a ^ b; };
-                binary_forward<T>(op_xor, std::forward<Args>(args)...);
-                break;
+        if (ninputs == 2) { // Operators that take two operands
+            switch (op) {
+                case OPERATION::AND: {
+                    auto op_and = [](const uint8_t &a, const uint8_t &b) { return a & b; };
+                    binary_forward<T>(op_and, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::EQUAL: {
+                    auto equal = [](const T &a, const T &b) { return a == b; };
+                    binary_forward<T>(equal, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::GREATER: {
+                    auto greater = [](const T &a, const T &b) { return a > b; };
+                    binary_forward<T>(greater, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::GREATER_EQUAL: {
+                    auto greater_equal = [](const T &a, const T &b) { return a >= b; };
+                    binary_forward<T>(greater_equal, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::LESS: {
+                    auto less = [](const T &a, const T &b) { return a < b; };
+                    binary_forward<T>(less, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::LESS_EQUAL: {
+                    auto less_equal = [](const T &a, const T &b) { return a <= b; };
+                    binary_forward<T>(less_equal, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::OR: {
+                    auto op_or = [](const uint8_t &a, const uint8_t &b) { return a | b; };
+                    binary_forward<T>(op_or, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::POW: {
+                    auto pow = [] (const T& a, const T& b) { return std::pow(a, b); };
+                    binary_forward<T>(pow, std::forward<Args>(args)..., 1e5);
+                    break;
+                }
+                case OPERATION::XOR: {
+                    auto op_xor = [](const uint8_t &a, const uint8_t &b) { return a ^ b; };
+                    binary_forward<T>(op_xor, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::BITSHIFT: {
+                    auto bitshift = [] (const uint8_t &a, const uint8_t &b) { return a << b; };
+                    binary_forward<T>(bitshift, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::MAX: {
+                    auto max = [](const T &a, const T &b) { return std::max(a, b); };
+                    binary_forward<T>(max, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::MEAN: {
+                    auto mean = [](const T &a, const T &b) { return (a + b) / T{2}; };
+                    binary_forward<T>(mean, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::MIN: {
+                    auto min = [](const T &a, const T &b) { return std::min(a, b); };
+                    binary_forward<T>(min, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::MOD: {
+                    auto mod = [] (const T &a, const T &b) { return static_cast<T>(_mod(int(a), int(b))); };
+                    binary_forward<T>(mod, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::FMOD: {
+                    auto fmod = [](const T &a, const T &b) { return std::fmod(a, b); };
+                    binary_forward<T>(fmod, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::PROD: {
+                    auto prod = [](const T &a, const T &b) { return a * b; };
+                    binary_forward<T>(prod, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::SUB: {
+                    auto sub = [](const T &a, const T &b) { return a - b; };
+                    binary_forward<T>(sub, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::ADD:
+                case OPERATION::SUM: {
+                    auto sum = [](const T &a, const T &b) { return a + b; };
+                    binary_forward<T>(sum, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::DIV: {
+                    auto div = [](const T &a, const T &b) { return a / b; };
+                    binary_forward<T>(div, std::forward<Args>(args)...);
+                    break;
+                }
+                default: CV_Error(Error::StsBadArg, "Unsupported operation");
             }
-            case OPERATION::WHERE:
+        } else if (ninputs == 3 && op == OPERATION::WHERE) { // Operators that take three operands
+            auto where = [](const T &a, const T &b, const T &c) { return a ? b : c; };
+            ternary_forward<T>(where, std::forward<Args>(args)...);
+        } else { // Operators that can take multiple (>= 3) operands
+            switch (op)
             {
-                auto op_where = [](const T &a, const T &b, const T &c) { return a ? b : c; };
-                trinary_forward<T>(op_where, std::forward<Args>(args)...);
-                break;
+                case OPERATION::MAX: {
+                    auto max = [](const T &a, const T &b) { return std::max(a, b); };
+                    nary_forward<T>(max, T{1}, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::MEAN: {
+                    // Sum up inputs and then calculate mean by scale = 1 / ninputs
+                    auto sum = [](const T &a, const T &b) { return a + b; };
+                    nary_forward<T>(sum, T{1} / ninputs, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::MIN: {
+                    auto min = [](const T &a, const T &b) { return std::min(a, b); };
+                    nary_forward<T>(min, T{1}, std::forward<Args>(args)...);
+                    break;
+                }
+                case OPERATION::SUM: {
+                    auto sum = [](const T &a, const T &b) { return a + b; };
+                    nary_forward<T>(sum, T{1}, std::forward<Args>(args)...);
+                    break;
+                }
+                default:
+                    CV_Error(Error::StsBadArg, "Unsupported operation.");
             }
-            default:
-                CV_Error(Error::StsBadArg, "Unsupported operation.");
         };
     }
 
@@ -811,6 +930,9 @@ public:
             case OPERATION::FMOD:
                 op_ = cuda4dnn::EltwiseOpType::FMOD;
                 break;
+            case OPERATION::POW:
+                op_ = cuda4dnn::EltwiseOpType::POW;
+                break;
             default: return Ptr<BackendNode>(); // return empty cuda_node if the EltwiseOpType is unsupported type.
         };
 
@@ -881,6 +1003,15 @@ public:
 #ifdef HAVE_DNN_NGRAPH
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
     {
+        // In case only one input
+        if (inputs.size() == 1) {
+            auto &ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
+            ngraph::OutputVector inp{ieInpNode};
+            auto blank = std::make_shared<ov::op::v0::Concat>(inp, 0);
+            return Ptr<BackendNode>(new InfEngineNgraphNode(blank));
+        }
+
+        // TODO: Support multiple (>=3) inputs
         CV_Assert(inputs.size() == 2);
         auto& inp0 = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
         auto& inp1 = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp
index 565a88b760..3745d7ed86 100644
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -2851,14 +2851,6 @@ void ONNXImporter::parseElementWise(LayerParams& layerParams, const opencv_onnx:
         };
     }
 
-    // element-wise layers that can have >=1 inputs but actually have one input
-    if (node_proto.input_size() == 1 && (op_type == "max" || op_type == "min" || op_type == "mean" || op_type == "sum"))
-    {
-        layerParams.type = "Identity";
-        addLayer(layerParams, node_proto);
-        return;
-    }
-
     auto pre_broadcast_transform = [](Mat& t, int t_real_ndims) {
         if (t.dims == 2 && t_real_ndims == 1 && t.size[1] == 1)
             transpose(t, t);
@@ -3938,7 +3930,7 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version)
             dispatch["Sub"] = dispatch["Mul"] = dispatch["Div"] = dispatch["GreaterOrEqual"] =
             dispatch["LessOrEqual"] = dispatch["Mod"] = &ONNXImporter::parseElementWise;
 
-    dispatch["Sum"] = dispatch["Min"] = dispatch["Max"] = &ONNXImporter::parseElementWise;
+    dispatch["Sum"] = dispatch["Min"] = dispatch["Max"] = dispatch["Mean"] = &ONNXImporter::parseElementWise;
     dispatch["Where"] = &ONNXImporter::parseElementWise;
     dispatch["Range"] = &ONNXImporter::parseRange;
     dispatch["Einsum"] = &ONNXImporter::parseEinsum;
diff --git a/modules/dnn/test/test_onnx_conformance.cpp b/modules/dnn/test/test_onnx_conformance.cpp
index 1ca3f2f75b..bd892adb2f 100644
--- a/modules/dnn/test/test_onnx_conformance.cpp
+++ b/modules/dnn/test/test_onnx_conformance.cpp
@@ -970,6 +970,7 @@ public:
 #endif
 #ifdef HAVE_CUDA
     static std::set<std::string> cuda_deny_list;
+    static std::set<std::string> cuda_fp16_deny_list;
 #endif
 
     Test_ONNX_conformance()
@@ -1055,6 +1056,9 @@ public:
         cuda_deny_list = {
             #include "test_onnx_conformance_layer_filter__cuda_denylist.inl.hpp"
         };
+        cuda_fp16_deny_list = {
+            #include "test_onnx_conformance_layer_filter__cuda_fp16_denylist.inl.hpp"
+        };
 #endif
     }
 
@@ -1074,6 +1078,7 @@ std::set<std::string> Test_ONNX_conformance::vulkan_deny_list;
 #endif
 #ifdef HAVE_CUDA
 std::set<std::string> Test_ONNX_conformance::cuda_deny_list;
+std::set<std::string> Test_ONNX_conformance::cuda_fp16_deny_list;
 #endif
 
 TEST_P(Test_ONNX_conformance, Layer_Test)
@@ -1114,6 +1119,10 @@ TEST_P(Test_ONNX_conformance, Layer_Test)
         {
             applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU, CV_TEST_TAG_DNN_SKIP_OPENCV_BACKEND, CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE);
         }
+
+        if (name == "test_pow") {
+            default_lInf = 0.00013; // Expected: (normInf) <= (lInf), actual: 0.00012207 vs 0.0001
+        }
     }
 #ifdef HAVE_HALIDE
     else if (backend == DNN_BACKEND_HALIDE)
@@ -1142,10 +1151,14 @@ TEST_P(Test_ONNX_conformance, Layer_Test)
 #ifdef HAVE_CUDA
     else if (backend == DNN_BACKEND_CUDA)
     {
-        if (cuda_deny_list.find(name) != cuda_deny_list.end())
+        if (target == DNN_TARGET_CUDA && cuda_deny_list.find(name) != cuda_deny_list.end())
         {
             applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA, CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE);
         }
+        if (target == DNN_TARGET_CUDA_FP16 && cuda_fp16_deny_list.find(name) != cuda_fp16_deny_list.end())
+        {
+            applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16, CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE);
+        }
     }
 #endif
     else
diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter__cuda_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter__cuda_denylist.inl.hpp
index 96778ef5d4..42968ef721 100644
--- a/modules/dnn/test/test_onnx_conformance_layer_filter__cuda_denylist.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_filter__cuda_denylist.inl.hpp
@@ -73,21 +73,9 @@
 "test_maxunpool_export_with_output_shape",
 "test_mul_bcast",
 "test_mul_uint8",
-"test_reduce_prod_default_axes_keepdims_example", // FP16 only
-"test_reduce_prod_default_axes_keepdims_random", // FP16 only
-"test_reduce_prod_do_not_keepdims_random", // FP16 only
-"test_reduce_prod_keepdims_random", // FP16 only
-"test_reduce_prod_negative_axes_keepdims_random", // FP16 only
-"test_reduce_sum_square_default_axes_keepdims_random", // FP16 only
-"test_reduce_sum_square_do_not_keepdims_random", // FP16 only
-"test_reduce_sum_square_keepdims_random", // FP16 only
-"test_reduce_sum_square_negative_axes_keepdims_random", // FP16 only
 "test_softmax_default_axis",
-"test_softmax_large_number",  // FP16 only
-"test_softmax_large_number_expanded",  // FP16 only
 "test_sub_bcast",
 "test_sub_uint8",
-"test_tan",  // FP16 only
 "test_upsample_nearest",
 "test_scatter_elements_with_axis",
 "test_scatter_elements_with_duplicate_indices",
diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter__cuda_fp16_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter__cuda_fp16_denylist.inl.hpp
new file mode 100644
index 0000000000..4fe0825632
--- /dev/null
+++ b/modules/dnn/test/test_onnx_conformance_layer_filter__cuda_fp16_denylist.inl.hpp
@@ -0,0 +1,19 @@
+"test_basic_conv_with_padding", // (assert failed) !blobs.empty() in initCUDA
+"test_basic_conv_without_padding", // (assert failed) !blobs.empty() in initCUDA
+"test_conv_with_autopad_same", // (assert failed) !blobs.empty() in initCUDA
+"test_conv_with_strides_and_asymmetric_padding", // (assert failed) !blobs.empty() in initCUDA
+"test_conv_with_strides_no_padding", // (assert failed) !blobs.empty() in initCUDA
+"test_conv_with_strides_padding", // (assert failed) !blobs.empty() in initCUDA
+"test_dropout_default_ratio",
+"test_logsoftmax_large_number", // fp16 accuracy issue
+"test_logsoftmax_large_number_expanded", // fp16 accuracy issue
+"test_reduce_prod_default_axes_keepdims_example", // fallback to cpu, accuracy
+"test_reduce_prod_default_axes_keepdims_random", // fallback to cpu, accuracy
+"test_reduce_sum_square_default_axes_keepdims_random", // fallback to cpu, accuracy
+"test_reduce_sum_square_do_not_keepdims_random", // fallback to cpu, accuracy
+"test_reduce_sum_square_keepdims_random", // fallback to cpu, accuracy
+"test_reduce_sum_square_negative_axes_keepdims_random", // fallback to cpu, accuracy
+"test_pow", // fp16 accuracy issue
+"test_softmax_large_number", // fp16 accuracy issue
+"test_softmax_large_number_expanded", // fp16 accuracy issue
+"test_tan", // fp16 accuracy issue
diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp
index 000e867217..229bb9ca82 100644
--- a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp
@@ -86,7 +86,11 @@ CASE(test_adam)
 CASE(test_adam_multiple)
     // no filter
 CASE(test_add)
-    // no filter
+    if (target == DNN_TARGET_OPENCL)
+    {
+        default_l1 = 0.00024;  // Expected: (normL1) <= (l1), actual: 0.000234754 vs 1e-05
+        default_lInf = 0.0011;  // Expected: (normInf) <= (lInf), actual: 0.00106502 vs 0.0001
+    }
 CASE(test_add_bcast)
 #if SKIP_SET_1
     SKIP;
@@ -1110,7 +1114,11 @@ CASE(test_momentum)
 CASE(test_momentum_multiple)
     // no filter
 CASE(test_mul)
-    // no filter
+    if (target == DNN_TARGET_OPENCL)
+    {
+        default_l1 = 0.00024; // Expected: (normL1) <= (l1), actual: 0.00023824 vs 1e-05
+        default_lInf = 0.0015; // Expected: (normInf) <= (lInf), actual: 0.00145674 vs 0.0001
+    }
 CASE(test_mul_bcast)
 #if SKIP_SET_1
     SKIP;
@@ -1262,7 +1270,7 @@ CASE(test_or_bcast4v3d)
 CASE(test_or_bcast4v4d)
     // no filter
 CASE(test_pow)
-    // no filter
+    SKIP_OPENCL_FP16;
 CASE(test_pow_bcast_array)
     // no filter
 CASE(test_pow_bcast_scalar)
diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter__vulkan_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter__vulkan_denylist.inl.hpp
index f87e16a42f..968dd1e025 100644
--- a/modules/dnn/test/test_onnx_conformance_layer_filter__vulkan_denylist.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_filter__vulkan_denylist.inl.hpp
@@ -68,6 +68,9 @@
 "test_maxunpool_export_with_output_shape",
 "test_maxunpool_export_without_output_shape",
 "test_mul_uint8",
+"test_pow_types_float32_int32", // vulkan backend does not take tensor other than float32 data type
+"test_pow_types_float32_int64", // vulkan backend does not take tensor other than float32 data type
+"test_pow_types_int", // vulkan backend does not take tensor other than float32 data type
 "test_softmax_default_axis",
 "test_sub_bcast",
 "test_sub_uint8",
diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_all_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_all_denylist.inl.hpp
index 0da0111990..0370b22764 100644
--- a/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_all_denylist.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_all_denylist.inl.hpp
@@ -50,7 +50,7 @@
 "test_maxpool_with_argmax_2d_precomputed_strides",
 "test_maxunpool_export_with_output_shape",  // exception during net.forward() call
 "test_mul_uint8",  // output type mismatch
-"test_sub_bcast",
+"test_sub_bcast", // 1d support is required
 "test_sub_uint8",  // output type mismatch
 "test_upsample_nearest",
 "test_div_bcast", // remove when 1D Mat is supported
diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp
index 9b6b2414db..7303348d10 100644
--- a/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp
@@ -14,6 +14,7 @@
 "test_maxpool_2d_same_upper",
 "test_maxpool_2d_strides",
 "test_maxpool_3d_default",
+"test_pow", // fp16 accuracy issue
 "test_softmax_large_number",
 "test_softmax_large_number_expanded",
 "test_split_equal_parts_1d",
diff --git a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp
index cb008e9670..243c7e704d 100644
--- a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp
@@ -93,7 +93,6 @@
 "test_dequantizelinear_axis",
 "test_det_2d",
 "test_det_nd",
-"test_div_example",
 "test_dropout_default_mask",
 "test_dropout_default_mask_ratio",
 "test_dynamicquantizelinear",
@@ -175,50 +174,34 @@
 "test_lstm_with_initial_bias",
 "test_lstm_with_peepholes",
 "test_matmulinteger",
-"test_max_example",
-"test_max_float16",
-"test_max_float32",
-"test_max_float64",
-"test_max_int16",
-"test_max_int32",
-"test_max_int64",
-"test_max_int8",
-"test_max_one_input",
-"test_max_two_inputs",
-"test_max_uint16",
-"test_max_uint32",
-"test_max_uint64",
-"test_max_uint8",
-"test_mean_example",
-"test_mean_one_input",
-"test_mean_two_inputs",
-"test_min_example",
-"test_min_float16",
-"test_min_float32",
-"test_min_float64",
-"test_min_int16",
-"test_min_int32",
-"test_min_int64",
-"test_min_int8",
-"test_min_one_input",
-"test_min_two_inputs",
-"test_min_uint16",
-"test_min_uint32",
-"test_min_uint64",
-"test_min_uint8",
-"test_mod_broadcast",
-"test_mod_int64_fmod",
-"test_mod_mixed_sign_int16",
-"test_mod_mixed_sign_int32",
-"test_mod_mixed_sign_int64",
-"test_mod_mixed_sign_int8",
-"test_mod_uint16",
-"test_mod_uint32",
-"test_mod_uint64",
-"test_mod_uint8",
+"test_max_int16", // output type (int16) mismatched
+"test_max_int32", // output type (int32) mismatched
+"test_max_int64", // output type (int64) mismatched
+"test_max_int8",  // output type (int8) mismatched
+"test_max_uint16", // output type (uint16) mismatched
+"test_max_uint32", // output type (uint32) mismatched
+"test_max_uint64", // output type (uint64) mismatched
+"test_max_uint8",  // output type (uint8) mismatched
+"test_min_int16", // output type (int16) mismatched
+"test_min_int32", // output type (int32) mismatched
+"test_min_int64", // output type (int64) mismatched
+"test_min_int8",  // output type (int8) mismatched
+"test_min_uint16", // output type (uint16) mismatched
+"test_min_uint32", // output type (uint32) mismatched
+"test_min_uint64", // output type (uint64) mismatched
+"test_min_uint8",  // output type (uint8) mismatched
+"test_mod_broadcast",  // output type (int32) mismatched
+"test_mod_int64_fmod", // output type (int64) mismatched
+"test_mod_mixed_sign_int16", // unsupported data type (int16)
+"test_mod_mixed_sign_int32", // output type (int32) mismatched
+"test_mod_mixed_sign_int64", // output type (int64) mismatched
+"test_mod_mixed_sign_int8",  // output type (int8) mismatched
+"test_mod_uint16", // unsupported data type (uint16)
+"test_mod_uint32", // unsupported data type (uint32)
+"test_mod_uint64", // unsupported data type (uint32)
+"test_mod_uint8",  // output type (int8) mismatched
 "test_momentum",
 "test_momentum_multiple",
-"test_mul_example",
 "test_mvn",
 "test_mvn_expanded",
 "test_nesterov_momentum",
@@ -287,20 +270,14 @@
 "test_or_bcast4v2d",
 "test_or_bcast4v3d",
 "test_or_bcast4v4d",
-"test_pow",
-"test_pow_bcast_array",
-"test_pow_bcast_scalar",
-"test_pow_example",
-"test_pow_types_float",
-"test_pow_types_float32_int32",
-"test_pow_types_float32_int64",
-"test_pow_types_float32_uint32",
-"test_pow_types_float32_uint64",
-"test_pow_types_int",
-"test_pow_types_int32_float32",
-"test_pow_types_int32_int32",
-"test_pow_types_int64_float32",
-"test_pow_types_int64_int64",
+"test_pow_bcast_array", // 1d support is required
+"test_pow_types_float", // output type (int64) mismatched
+"test_pow_types_float32_uint32", // exponent of unsupported data type (uint32)
+"test_pow_types_float32_uint64", // exponent of unsupported data type (uint64)
+"test_pow_types_int32_float32", // output type (int32) mismatched
+"test_pow_types_int32_int32", // output type (int32) mismatched
+"test_pow_types_int64_float32", // output type (int64) mismatched
+"test_pow_types_int64_int64", // output type (int64) mismatched
 "test_prelu_broadcast",
 "test_prelu_example",
 "test_qlinearconv",
@@ -468,9 +445,6 @@
 "test_strnormalizer_export_monday_empty_output",
 "test_strnormalizer_export_monday_insensintive_upper_twodim",
 "test_strnormalizer_nostopwords_nochangecase",
-"test_sub_example",
-"test_sum_example",
-"test_sum_two_inputs",
 "test_tfidfvectorizer_tf_batch_onlybigrams_skip0",
 "test_tfidfvectorizer_tf_batch_onlybigrams_skip5",
 "test_tfidfvectorizer_tf_batch_uniandbigrams_skip5",
@@ -519,8 +493,8 @@
 "test_unsqueeze_three_axes",
 "test_unsqueeze_two_axes",
 "test_unsqueeze_unsorted_axes",
-"test_where_example",
-"test_where_long_example",
+"test_where_example", // input of unsupported data type (bool)
+"test_where_long_example", // input of unsupported data type (bool)
 "test_xor2d",
 "test_xor3d",
 "test_xor4d",

From 934e6899f87badc50145c2cef5941d8422ac8e0f Mon Sep 17 00:00:00 2001
From: zihaomu <zihaomu@outlook.com>
Date: Wed, 3 Jul 2024 15:58:25 +0800
Subject: [PATCH 09/39] Merge pull request #25809 from zihaomu:imread_rgb_flag

imgcodecs: Add rgb flag for imread and imdecode #25809

Try to `imread` images by RGB to save R-B swapping costs.

## How to use it?
```
img_rgb = cv2.imread("PATH", IMREAD_COLOR_RGB) # OpenCV decode the image by RGB format.
```

## TODO
- [x] Fix the broken code
- [x] Add imread rgb test
- [x] Speed test of rgb mode.

## Performance test

| file name | IMREAD_COLOR  | IMREAD_COLOR_RGB |
| --------- | ------ | --------- |
| jpg01     | 284 ms | 277 ms    |
| jpg02     | 376 ms | 366 ms    |
| png01     | 62 ms  | 60 ms     |
| Png02     | 97 ms  | 94 ms     |

Test with [image_test.zip](https://github.com/user-attachments/files/15982949/image_test.zip)
```.cpp
string img_path = "/Users/mzh/work/data/image_test/png02.png";
int loop = 20;

TickMeter t;

double t0 = 10000;
for (int i = 0; i < loop; i++)
{
    t.reset();
    t.start();
    img_bgr = imread(img_path, IMREAD_COLOR);
    t.stop();

    if (t.getTimeMilli() < t0) t0 = t.getTimeMilli();
}

std::cout<<"bgr time = "<<t0<<std::endl;

t0 = 10000;
for (int i = 0; i < loop; i++)
{
    t.reset();
    t.start();
    img_rgb = imread(img_path, IMREAD_COLOR_RGB);
    t.stop();
    if (t.getTimeMilli() < t0) t0 = t.getTimeMilli();
}
std::cout<<"rgb time = "<<t0<<std::endl;
```
### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 .../imgcodecs/include/opencv2/imgcodecs.hpp   |  10 +-
 modules/imgcodecs/perf/perf_jpeg.cpp          |  17 +++
 modules/imgcodecs/perf/perf_png.cpp           |  17 +++
 modules/imgcodecs/src/grfmt_avif.cpp          |   9 +-
 modules/imgcodecs/src/grfmt_base.cpp          |   6 +
 modules/imgcodecs/src/grfmt_base.hpp          |   3 +
 modules/imgcodecs/src/grfmt_bmp.cpp           |   5 +
 modules/imgcodecs/src/grfmt_exr.cpp           | 103 +++++++++++++++---
 modules/imgcodecs/src/grfmt_exr.hpp           |   1 +
 modules/imgcodecs/src/grfmt_gdal.cpp          |   4 +-
 modules/imgcodecs/src/grfmt_hdr.cpp           |   8 +-
 modules/imgcodecs/src/grfmt_jpeg.cpp          |  20 +++-
 modules/imgcodecs/src/grfmt_jpeg2000.cpp      |   7 +-
 .../imgcodecs/src/grfmt_jpeg2000_openjpeg.cpp |  18 +--
 modules/imgcodecs/src/grfmt_pam.cpp           |  62 +++++++----
 modules/imgcodecs/src/grfmt_pfm.cpp           |   2 +-
 modules/imgcodecs/src/grfmt_png.cpp           |   2 +-
 modules/imgcodecs/src/grfmt_pxm.cpp           |   4 +-
 modules/imgcodecs/src/grfmt_spng.cpp          |  16 +--
 modules/imgcodecs/src/grfmt_sunras.cpp        |   4 +-
 modules/imgcodecs/src/grfmt_tiff.cpp          |  33 ++++--
 modules/imgcodecs/src/grfmt_webp.cpp          |  16 ++-
 modules/imgcodecs/src/loadsave.cpp            |  23 +++-
 modules/imgcodecs/src/utils.cpp               |  19 ++++
 modules/imgcodecs/src/utils.hpp               |   2 +
 modules/imgcodecs/test/test_avif.cpp          |   8 +-
 modules/imgcodecs/test/test_exr.impl.hpp      |   9 ++
 modules/imgcodecs/test/test_grfmt.cpp         |   5 +
 modules/imgcodecs/test/test_jpeg.cpp          |   8 ++
 modules/imgcodecs/test/test_png.cpp           |  16 +++
 modules/imgcodecs/test/test_precomp.hpp       |   6 +
 modules/imgcodecs/test/test_read_write.cpp    |  11 ++
 modules/imgcodecs/test/test_tiff.cpp          |  23 +++-
 modules/imgcodecs/test/test_webp.cpp          |   6 +
 34 files changed, 408 insertions(+), 95 deletions(-)

diff --git a/modules/imgcodecs/include/opencv2/imgcodecs.hpp b/modules/imgcodecs/include/opencv2/imgcodecs.hpp
index eba25ce1cf..c3a1d4b082 100644
--- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp
+++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp
@@ -68,7 +68,8 @@ namespace cv
 enum ImreadModes {
        IMREAD_UNCHANGED            = -1, //!< If set, return the loaded image as is (with alpha channel, otherwise it gets cropped). Ignore EXIF orientation.
        IMREAD_GRAYSCALE            = 0,  //!< If set, always convert image to the single channel grayscale image (codec internal conversion).
-       IMREAD_COLOR                = 1,  //!< If set, always convert image to the 3 channel BGR color image.
+       IMREAD_COLOR_BGR            = 1,  //!< If set, always convert image to the 3 channel BGR color image.
+       IMREAD_COLOR                = 1,  //!< Same as IMREAD_COLOR_BGR.
        IMREAD_ANYDEPTH             = 2,  //!< If set, return 16-bit/32-bit image when the input has the corresponding depth, otherwise convert it to 8-bit.
        IMREAD_ANYCOLOR             = 4,  //!< If set, the image is read in any possible color format.
        IMREAD_LOAD_GDAL            = 8,  //!< If set, use the gdal driver for loading the image.
@@ -78,7 +79,8 @@ enum ImreadModes {
        IMREAD_REDUCED_COLOR_4      = 33, //!< If set, always convert image to the 3 channel BGR color image and the image size reduced 1/4.
        IMREAD_REDUCED_GRAYSCALE_8  = 64, //!< If set, always convert image to the single channel grayscale image and the image size reduced 1/8.
        IMREAD_REDUCED_COLOR_8      = 65, //!< If set, always convert image to the 3 channel BGR color image and the image size reduced 1/8.
-       IMREAD_IGNORE_ORIENTATION   = 128 //!< If set, do not rotate the image according to EXIF's orientation flag.
+       IMREAD_IGNORE_ORIENTATION   = 128, //!< If set, do not rotate the image according to EXIF's orientation flag.
+       IMREAD_COLOR_RGB            = 256, //!< If set, always convert image to the 3 channel RGB color image.
      };
 
 //! Imwrite flags
@@ -268,7 +270,7 @@ Currently, the following file formats are supported:
 @param filename Name of file to be loaded.
 @param flags Flag that can take values of cv::ImreadModes
 */
-CV_EXPORTS_W Mat imread( const String& filename, int flags = IMREAD_COLOR );
+CV_EXPORTS_W Mat imread( const String& filename, int flags = IMREAD_COLOR_BGR );
 
 /** @brief Loads an image from a file.
 
@@ -279,7 +281,7 @@ This is an overloaded member function, provided for convenience. It differs from
 @note
 The image passing through the img parameter can be pre-allocated. The memory is reused if the shape and the type match with the load image.
  */
-CV_EXPORTS_W void imread( const String& filename, OutputArray dst, int flags = IMREAD_COLOR );
+CV_EXPORTS_W void imread( const String& filename, OutputArray dst, int flags = IMREAD_COLOR_BGR );
 
 /** @brief Loads a multi-page image from a file.
 
diff --git a/modules/imgcodecs/perf/perf_jpeg.cpp b/modules/imgcodecs/perf/perf_jpeg.cpp
index 7063ca909c..16610a3044 100644
--- a/modules/imgcodecs/perf/perf_jpeg.cpp
+++ b/modules/imgcodecs/perf/perf_jpeg.cpp
@@ -27,6 +27,23 @@ PERF_TEST(JPEG, Decode)
     SANITY_CHECK_NOTHING();
 }
 
+PERF_TEST(JPEG, Decode_rgb)
+{
+    String filename = getDataPath("stitching/boat1.jpg");
+
+    FILE *f = fopen(filename.c_str(), "rb");
+    fseek(f, 0, SEEK_END);
+    long len = ftell(f);
+    fseek(f, 0, SEEK_SET);
+    vector<uchar> file_buf((size_t)len);
+    EXPECT_EQ(len, (long)fread(&file_buf[0], 1, (size_t)len, f));
+    fclose(f); f = NULL;
+
+    TEST_CYCLE() imdecode(file_buf, IMREAD_COLOR_RGB);
+
+    SANITY_CHECK_NOTHING();
+}
+
 PERF_TEST(JPEG, Encode)
 {
     String filename = getDataPath("stitching/boat1.jpg");
diff --git a/modules/imgcodecs/perf/perf_png.cpp b/modules/imgcodecs/perf/perf_png.cpp
index 1af4780882..5bbef590bd 100644
--- a/modules/imgcodecs/perf/perf_png.cpp
+++ b/modules/imgcodecs/perf/perf_png.cpp
@@ -30,6 +30,23 @@ PERF_TEST(PNG, decode)
     SANITY_CHECK_NOTHING();
 }
 
+PERF_TEST(PNG, decode_rgb)
+{
+    String filename = getDataPath("perf/2560x1600.png");
+
+    FILE *f = fopen(filename.c_str(), "rb");
+    fseek(f, 0, SEEK_END);
+    long len = ftell(f);
+    fseek(f, 0, SEEK_SET);
+    vector<uchar> file_buf((size_t)len);
+    EXPECT_EQ(len, (long)fread(&file_buf[0], 1, (size_t)len, f));
+    fclose(f); f = NULL;
+
+    TEST_CYCLE() imdecode(file_buf, IMREAD_COLOR_RGB);
+
+    SANITY_CHECK_NOTHING();
+}
+
 PERF_TEST(PNG, encode)
 {
     String filename = getDataPath("perf/2560x1600.png");
diff --git a/modules/imgcodecs/src/grfmt_avif.cpp b/modules/imgcodecs/src/grfmt_avif.cpp
index 4b39ada60a..d71843c3f0 100644
--- a/modules/imgcodecs/src/grfmt_avif.cpp
+++ b/modules/imgcodecs/src/grfmt_avif.cpp
@@ -33,7 +33,7 @@ struct AvifImageDeleter {
 
 using AvifImageUniquePtr = std::unique_ptr<avifImage, AvifImageDeleter>;
 
-avifResult CopyToMat(const avifImage *image, int channels, Mat *mat) {
+avifResult CopyToMat(const avifImage *image, int channels, bool useRGB , Mat *mat) {
   CV_Assert((int)image->height == mat->rows);
   CV_Assert((int)image->width == mat->cols);
   if (channels == 1) {
@@ -53,7 +53,10 @@ avifResult CopyToMat(const avifImage *image, int channels, Mat *mat) {
   avifRGBImage rgba;
   avifRGBImageSetDefaults(&rgba, image);
   if (channels == 3) {
-    rgba.format = AVIF_RGB_FORMAT_BGR;
+      if (useRGB)
+          rgba.format = AVIF_RGB_FORMAT_RGB;
+      else
+          rgba.format = AVIF_RGB_FORMAT_BGR;
   } else {
     CV_Assert(channels == 4);
     rgba.format = AVIF_RGB_FORMAT_BGRA;
@@ -227,7 +230,7 @@ bool AvifDecoder::readData(Mat &img) {
     is_first_image_ = false;
   }
 
-  if (CopyToMat(decoder_->image, channels_, &read_img) != AVIF_RESULT_OK) {
+  if (CopyToMat(decoder_->image, channels_, m_use_rgb, &read_img) != AVIF_RESULT_OK) {
     CV_Error(Error::StsInternal, "Cannot convert from AVIF to Mat");
     return false;
   }
diff --git a/modules/imgcodecs/src/grfmt_base.cpp b/modules/imgcodecs/src/grfmt_base.cpp
index 88f1c04f30..da343941bd 100644
--- a/modules/imgcodecs/src/grfmt_base.cpp
+++ b/modules/imgcodecs/src/grfmt_base.cpp
@@ -53,6 +53,7 @@ BaseImageDecoder::BaseImageDecoder()
     m_type = -1;
     m_buf_supported = false;
     m_scale_denom = 1;
+    m_use_rgb = false;
 }
 
 
@@ -94,6 +95,11 @@ int BaseImageDecoder::setScale( const int& scale_denom )
     return temp;
 }
 
+void BaseImageDecoder::setRGB(bool useRGB)
+{
+    m_use_rgb = useRGB;
+}
+
 ImageDecoder BaseImageDecoder::newDecoder() const
 {
     return ImageDecoder();
diff --git a/modules/imgcodecs/src/grfmt_base.hpp b/modules/imgcodecs/src/grfmt_base.hpp
index 816bef98fb..0d98c51ae2 100644
--- a/modules/imgcodecs/src/grfmt_base.hpp
+++ b/modules/imgcodecs/src/grfmt_base.hpp
@@ -73,6 +73,8 @@ public:
     virtual bool readHeader() = 0;
     virtual bool readData( Mat& img ) = 0;
 
+    virtual void setRGB(bool useRGB);
+
     /// Called after readData to advance to the next page, if any.
     virtual bool nextPage() { return false; }
 
@@ -89,6 +91,7 @@ protected:
     String m_signature;
     Mat m_buf;
     bool m_buf_supported;
+    bool m_use_rgb;       // flag of decode image as RGB order instead of BGR.
     ExifReader m_exif;
 };
 
diff --git a/modules/imgcodecs/src/grfmt_bmp.cpp b/modules/imgcodecs/src/grfmt_bmp.cpp
index 9c76597908..91ef23cc3f 100644
--- a/modules/imgcodecs/src/grfmt_bmp.cpp
+++ b/modules/imgcodecs/src/grfmt_bmp.cpp
@@ -544,6 +544,11 @@ decode_rle8_bad: ;
         throw;
     }
 
+    if (m_use_rgb && color && img.channels() == 3)
+    {
+        cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
+    }
+
     return result;
 }
 
diff --git a/modules/imgcodecs/src/grfmt_exr.cpp b/modules/imgcodecs/src/grfmt_exr.cpp
index 65a0e5e03b..d48e067edd 100644
--- a/modules/imgcodecs/src/grfmt_exr.cpp
+++ b/modules/imgcodecs/src/grfmt_exr.cpp
@@ -373,18 +373,35 @@ bool  ExrDecoder::readData( Mat& img )
 
         if( m_iscolor )
         {
-            if( m_blue && (m_blue->xSampling != 1 || m_blue->ySampling != 1) )
-                UpSample( data, channelstoread, step / xstep, m_blue->xSampling, m_blue->ySampling );
-            if( m_green && (m_green->xSampling != 1 || m_green->ySampling != 1) )
-                UpSample( data + xstep, channelstoread, step / xstep, m_green->xSampling, m_green->ySampling );
-            if( m_red && (m_red->xSampling != 1 || m_red->ySampling != 1) )
-                UpSample( data + 2 * xstep, channelstoread, step / xstep, m_red->xSampling, m_red->ySampling );
+            if (m_use_rgb)
+            {
+                if( m_red && (m_red->xSampling != 1 || m_red->ySampling != 1) )
+                    UpSample( data, channelstoread, step / xstep, m_red->xSampling, m_red->ySampling );
+                if( m_green && (m_green->xSampling != 1 || m_green->ySampling != 1) )
+                    UpSample( data + xstep, channelstoread, step / xstep, m_green->xSampling, m_green->ySampling );
+                if( m_blue && (m_blue->xSampling != 1 || m_blue->ySampling != 1) )
+                    UpSample( data + 2 * xstep, channelstoread, step / xstep, m_blue->xSampling, m_blue->ySampling );
+            }
+            else
+            {
+                if( m_blue && (m_blue->xSampling != 1 || m_blue->ySampling != 1) )
+                    UpSample( data, channelstoread, step / xstep, m_blue->xSampling, m_blue->ySampling );
+                if( m_green && (m_green->xSampling != 1 || m_green->ySampling != 1) )
+                    UpSample( data + xstep, channelstoread, step / xstep, m_green->xSampling, m_green->ySampling );
+                if( m_red && (m_red->xSampling != 1 || m_red->ySampling != 1) )
+                    UpSample( data + 2 * xstep, channelstoread, step / xstep, m_red->xSampling, m_red->ySampling );
+            }
         }
         else if( m_green && (m_green->xSampling != 1 || m_green->ySampling != 1) )
             UpSample( data, channelstoread, step / xstep, m_green->xSampling, m_green->ySampling );
 
         if( chromatorgb )
-            ChromaToBGR( (float *)data, m_height, channelstoread, step / xstep );
+        {
+            if (m_use_rgb)
+                ChromaToRGB( (float *)data, m_height, channelstoread, step / xstep );
+            else
+                ChromaToBGR( (float *)data, m_height, channelstoread, step / xstep );
+        }
     }
     else
     {
@@ -406,7 +423,12 @@ bool  ExrDecoder::readData( Mat& img )
             else
             {
                 if( chromatorgb )
-                    ChromaToBGR( (float *)buffer, 1, defaultchannels, step );
+                {
+                    if (m_use_rgb)
+                        ChromaToRGB( (float *)buffer, 1, defaultchannels, step );
+                    else
+                        ChromaToBGR( (float *)buffer, 1, defaultchannels, step );
+                }
 
                 if( m_type == FLOAT )
                 {
@@ -430,12 +452,24 @@ bool  ExrDecoder::readData( Mat& img )
         }
         if( color )
         {
-            if( m_blue && (m_blue->xSampling != 1 || m_blue->ySampling != 1) )
-                UpSampleY( data, defaultchannels, step / xstep, m_blue->ySampling );
-            if( m_green && (m_green->xSampling != 1 || m_green->ySampling != 1) )
-                UpSampleY( data + xstep, defaultchannels, step / xstep, m_green->ySampling );
-            if( m_red && (m_red->xSampling != 1 || m_red->ySampling != 1) )
-                UpSampleY( data + 2 * xstep, defaultchannels, step / xstep, m_red->ySampling );
+            if (m_use_rgb)
+            {
+                if( m_red && (m_red->xSampling != 1 || m_red->ySampling != 1) )
+                    UpSampleY( data, defaultchannels, step / xstep, m_red->ySampling );
+                if( m_green && (m_green->xSampling != 1 || m_green->ySampling != 1) )
+                    UpSampleY( data + xstep, defaultchannels, step / xstep, m_green->ySampling );
+                if( m_blue && (m_blue->xSampling != 1 || m_blue->ySampling != 1) )
+                    UpSampleY( data + 2 * xstep, defaultchannels, step / xstep, m_blue->ySampling );
+            }
+            else
+            {
+                if( m_blue && (m_blue->xSampling != 1 || m_blue->ySampling != 1) )
+                    UpSampleY( data, defaultchannels, step / xstep, m_blue->ySampling );
+                if( m_green && (m_green->xSampling != 1 || m_green->ySampling != 1) )
+                    UpSampleY( data + xstep, defaultchannels, step / xstep, m_green->ySampling );
+                if( m_red && (m_red->xSampling != 1 || m_red->ySampling != 1) )
+                    UpSampleY( data + 2 * xstep, defaultchannels, step / xstep, m_red->ySampling );
+            }
         }
         else if( m_green && (m_green->xSampling != 1 || m_green->ySampling != 1) )
             UpSampleY( data, 1, step / xstep, m_green->ySampling );
@@ -558,6 +592,47 @@ void  ExrDecoder::ChromaToBGR( float *data, int numlines, int xstep, int ystep )
     }
 }
 
+void  ExrDecoder::ChromaToRGB(float *data, int numlines, int xstep, int ystep)
+{
+    for( int y = 0; y < numlines; y++ )
+    {
+        for( int x = 0; x < m_width; x++ )
+        {
+            double b, Y, r;
+            if( m_type == FLOAT )
+            {
+                b = data[y * ystep + x * xstep];
+                Y = data[y * ystep + x * xstep + 1];
+                r = data[y * ystep + x * xstep + 2];
+            }
+            else
+            {
+                b = ((unsigned *)data)[y * ystep + x * xstep];
+                Y = ((unsigned *)data)[y * ystep + x * xstep + 1];
+                r = ((unsigned *)data)[y * ystep + x * xstep + 2];
+            }
+            r = (r + 1) * Y;
+            b = (b + 1) * Y;
+            Y = (Y - b * m_chroma.blue[1] - r * m_chroma.red[1]) / m_chroma.green[1];
+
+            if( m_type == FLOAT )
+            {
+                data[y * ystep + x * xstep] = (float)r;
+                data[y * ystep + x * xstep + 1] = (float)Y;
+                data[y * ystep + x * xstep + 2] = (float)b;
+            }
+            else
+            {
+                int t = cvRound(r);
+                ((unsigned *)data)[y * ystep + x * xstep + 0] = (unsigned)MAX(t, 0);
+                t = cvRound(Y);
+                ((unsigned *)data)[y * ystep + x * xstep + 1] = (unsigned)MAX(t, 0);
+                t = cvRound(b);
+                ((unsigned *)data)[y * ystep + x * xstep + 2] = (unsigned)MAX(t, 0);
+            }
+        }
+    }
+}
 
 /**
 // convert one row to gray
diff --git a/modules/imgcodecs/src/grfmt_exr.hpp b/modules/imgcodecs/src/grfmt_exr.hpp
index a86874d228..48ca09acd8 100644
--- a/modules/imgcodecs/src/grfmt_exr.hpp
+++ b/modules/imgcodecs/src/grfmt_exr.hpp
@@ -83,6 +83,7 @@ protected:
     void  UpSampleX( float *data, int xstep, int xsample );
     void  UpSampleY( uchar *data, int xstep, int ystep, int ysample );
     void  ChromaToBGR( float *data, int numlines, int xstep, int ystep );
+    void  ChromaToRGB( float *data, int numlines, int xstep, int ystep );
     void  RGBToGray( float *in, float *out );
 
     InputFile      *m_file;
diff --git a/modules/imgcodecs/src/grfmt_gdal.cpp b/modules/imgcodecs/src/grfmt_gdal.cpp
index 17581b576a..ff059338cf 100644
--- a/modules/imgcodecs/src/grfmt_gdal.cpp
+++ b/modules/imgcodecs/src/grfmt_gdal.cpp
@@ -397,13 +397,13 @@ bool GdalDecoder::readData( Mat& img ){
         case GCI_PaletteIndex:
         case GCI_GrayIndex:
         case GCI_BlueBand:
-            color = 0;
+            color = m_use_rgb ? 2 : 0;
             break;
         case GCI_GreenBand:
             color = 1;
             break;
         case GCI_RedBand:
-            color = 2;
+            color = m_use_rgb ? 0 : 2;
             break;
         case GCI_AlphaBand:
             color = 3;
diff --git a/modules/imgcodecs/src/grfmt_hdr.cpp b/modules/imgcodecs/src/grfmt_hdr.cpp
index c9fec94aa3..3039626121 100644
--- a/modules/imgcodecs/src/grfmt_hdr.cpp
+++ b/modules/imgcodecs/src/grfmt_hdr.cpp
@@ -106,7 +106,13 @@ bool HdrDecoder::readData(Mat& _img)
     switch (_img.channels())
     {
         case 1: cvtColor(img, _img, COLOR_BGR2GRAY); break;
-        case 3: img.copyTo(_img); break;
+        case 3:
+        // TODO, try to modify RGBE_ReadPixels_RLE to load rgb data directly.
+        if (m_use_rgb)
+            cv::cvtColor(img, _img, cv::COLOR_BGR2RGB);
+        else
+            img.copyTo(_img);
+        break;
         default: CV_Error(Error::StsError, "Wrong expected image channels, allowed: 1 and 3");
     }
     return true;
diff --git a/modules/imgcodecs/src/grfmt_jpeg.cpp b/modules/imgcodecs/src/grfmt_jpeg.cpp
index 4e3b1df48d..98019cc48a 100644
--- a/modules/imgcodecs/src/grfmt_jpeg.cpp
+++ b/modules/imgcodecs/src/grfmt_jpeg.cpp
@@ -437,13 +437,13 @@ bool  JpegDecoder::readData( Mat& img )
                 if( cinfo->num_components != 4 )
                 {
 #ifdef JCS_EXTENSIONS
-                    cinfo->out_color_space = JCS_EXT_BGR;
+                    cinfo->out_color_space = m_use_rgb ? JCS_EXT_RGB : JCS_EXT_BGR;
                     cinfo->out_color_components = 3;
                     doDirectRead = true; // BGR -> BGR
 #else
                     cinfo->out_color_space = JCS_RGB;
                     cinfo->out_color_components = 3;
-                    doDirectRead = false; // RGB -> BGR
+                    doDirectRead = m_use_rgb ? true : false; // RGB -> BGR
 #endif
                 }
                 else
@@ -514,10 +514,20 @@ bool  JpegDecoder::readData( Mat& img )
 
                     if( color )
                     {
-                        if( cinfo->out_color_components == 3 )
-                            icvCvt_RGB2BGR_8u_C3R( buffer[0], 0, data, 0, Size(m_width,1) );
+                        if (m_use_rgb)
+                        {
+                            if( cinfo->out_color_components == 3 )
+                                icvCvt_BGR2RGB_8u_C3R( buffer[0], 0, data, 0, Size(m_width,1) );
+                            else
+                                icvCvt_CMYK2RGB_8u_C4C3R( buffer[0], 0, data, 0, Size(m_width,1) );
+                        }
                         else
-                            icvCvt_CMYK2BGR_8u_C4C3R( buffer[0], 0, data, 0, Size(m_width,1) );
+                        {
+                            if( cinfo->out_color_components == 3 )
+                                icvCvt_RGB2BGR_8u_C3R( buffer[0], 0, data, 0, Size(m_width,1) );
+                            else
+                                icvCvt_CMYK2BGR_8u_C4C3R( buffer[0], 0, data, 0, Size(m_width,1) );
+                        }
                     }
                     else
                     {
diff --git a/modules/imgcodecs/src/grfmt_jpeg2000.cpp b/modules/imgcodecs/src/grfmt_jpeg2000.cpp
index 0f80d89c8d..9b8680ac1f 100644
--- a/modules/imgcodecs/src/grfmt_jpeg2000.cpp
+++ b/modules/imgcodecs/src/grfmt_jpeg2000.cpp
@@ -286,11 +286,12 @@ bool  Jpeg2KDecoder::readData( Mat& img )
         {
             int ncmpts;
             int cmptlut[3];
+            int swap_rb = m_use_rgb ? 0 : 2;
             if( color )
             {
-                cmptlut[0] = jas_image_getcmptbytype( image, JAS_IMAGE_CT_RGB_B );
-                cmptlut[1] = jas_image_getcmptbytype( image, JAS_IMAGE_CT_RGB_G );
-                cmptlut[2] = jas_image_getcmptbytype( image, JAS_IMAGE_CT_RGB_R );
+                cmptlut[0] = jas_image_getcmptbytype( image, swap_rb );
+                cmptlut[1] = jas_image_getcmptbytype( image, 1 );
+                cmptlut[2] = jas_image_getcmptbytype( image, swap_rb^2 );
                 if( cmptlut[0] < 0 || cmptlut[1] < 0 || cmptlut[2] < 0 )
                     result = false;
                 ncmpts = 3;
diff --git a/modules/imgcodecs/src/grfmt_jpeg2000_openjpeg.cpp b/modules/imgcodecs/src/grfmt_jpeg2000_openjpeg.cpp
index c5b1a292cc..c05c0bca0e 100644
--- a/modules/imgcodecs/src/grfmt_jpeg2000_openjpeg.cpp
+++ b/modules/imgcodecs/src/grfmt_jpeg2000_openjpeg.cpp
@@ -350,7 +350,7 @@ opj_cparameters setupEncoderParameters(const std::vector<int>& params)
     return parameters;
 }
 
-bool decodeSRGBData(const opj_image_t& inImg, cv::Mat& outImg, uint8_t shift)
+bool decodeSRGBData(const opj_image_t& inImg, cv::Mat& outImg, uint8_t shift, bool use_rgb)
 {
     using ImageComponents = std::vector<const OPJ_INT32*>;
 
@@ -377,8 +377,9 @@ bool decodeSRGBData(const opj_image_t& inImg, cv::Mat& outImg, uint8_t shift)
 
     if (inChannels >= 3)
     {
+        int swap_rb = use_rgb ? 0 : 2;
         // Assume RGB (+ alpha) for 3 channels -> BGR
-        ImageComponents incomps { inImg.comps[2].data, inImg.comps[1].data, inImg.comps[0].data };
+        ImageComponents incomps { inImg.comps[swap_rb].data, inImg.comps[1].data, inImg.comps[swap_rb^2].data };
         // Assume RGBA for 4 channels -> BGRA
         if (outChannels > 3)
         {
@@ -393,7 +394,7 @@ bool decodeSRGBData(const opj_image_t& inImg, cv::Mat& outImg, uint8_t shift)
     return false;
 }
 
-bool decodeGrayscaleData(const opj_image_t& inImg, cv::Mat& outImg, uint8_t shift)
+bool decodeGrayscaleData(const opj_image_t& inImg, cv::Mat& outImg, uint8_t shift, bool)
 {
     using ImageComponents = std::vector<const OPJ_INT32*>;
 
@@ -411,7 +412,7 @@ bool decodeGrayscaleData(const opj_image_t& inImg, cv::Mat& outImg, uint8_t shif
     return false;
 }
 
-bool decodeSYCCData(const opj_image_t& inImg, cv::Mat& outImg, uint8_t shift)
+bool decodeSYCCData(const opj_image_t& inImg, cv::Mat& outImg, uint8_t shift, bool use_rgb)
 {
     using ImageComponents = std::vector<const OPJ_INT32*>;
 
@@ -426,7 +427,10 @@ bool decodeSYCCData(const opj_image_t& inImg, cv::Mat& outImg, uint8_t shift)
     if (outChannels == 3 && inChannels >= 3) {
         copyToMat(ImageComponents { inImg.comps[0].data, inImg.comps[1].data, inImg.comps[2].data },
                   outImg, shift);
-        cvtColor(outImg, outImg, COLOR_YUV2BGR);
+        if (use_rgb)
+            cvtColor(outImg, outImg, COLOR_YUV2RGB);
+        else
+            cvtColor(outImg, outImg, COLOR_YUV2BGR);
         return true;
     }
 
@@ -585,7 +589,7 @@ bool Jpeg2KOpjDecoderBase::readHeader()
 
 bool Jpeg2KOpjDecoderBase::readData( Mat& img )
 {
-    using DecodeFunc = bool(*)(const opj_image_t&, cv::Mat&, uint8_t shift);
+    using DecodeFunc = bool(*)(const opj_image_t&, cv::Mat&, uint8_t shift, bool use_rgb);
 
     if (!opj_decode(codec_.get(), stream_.get(), image_.get()))
     {
@@ -647,7 +651,7 @@ bool Jpeg2KOpjDecoderBase::readData( Mat& img )
         CV_Assert(comp.data && "OpenJPEG2000: missing component data (unsupported / broken input)");
     }
 
-    return decode(*image_, img, shift);
+    return decode(*image_, img, shift, m_use_rgb);
 }
 
 } // namespace detail
diff --git a/modules/imgcodecs/src/grfmt_pam.cpp b/modules/imgcodecs/src/grfmt_pam.cpp
index 979a31ca91..2c15ab244c 100644
--- a/modules/imgcodecs/src/grfmt_pam.cpp
+++ b/modules/imgcodecs/src/grfmt_pam.cpp
@@ -90,7 +90,7 @@ const static struct pam_header_field fields[] = {
 #define PAM_FIELDS_NO (sizeof (fields) / sizeof ((fields)[0]))
 
 typedef bool (*cvtFunc) (void *src, void *target, int width, int target_channels,
-    int target_depth);
+    int target_depth, bool use_rgb);
 
 struct channel_layout {
     uint rchan, gchan, bchan, graychan;
@@ -108,7 +108,7 @@ struct pam_format {
 };
 
 static bool rgb_convert (void *src, void *target, int width, int target_channels,
-    int target_depth);
+    int target_depth, bool use_rgb);
 
 const static struct pam_format formats[] = {
     {IMWRITE_PAM_FORMAT_NULL, "", NULL, {0, 0, 0, 0} },
@@ -125,19 +125,25 @@ const static struct pam_format formats[] = {
  */
 
 static bool
-rgb_convert (void *src, void *target, int width, int target_channels, int target_depth)
+rgb_convert (void *src, void *target, int width, int target_channels, int target_depth, bool use_rgb)
 {
     bool ret = false;
     if (target_channels == 3) {
         switch (target_depth) {
             case CV_8U:
-                icvCvt_RGB2BGR_8u_C3R( (uchar*) src, 0, (uchar*) target, 0,
-                    Size(width,1) );
+                if (use_rgb)
+                    memcpy(target, src, sizeof(uchar) * width);
+                else
+                    icvCvt_RGB2BGR_8u_C3R( (uchar*) src, 0, (uchar*) target, 0,
+                                           Size(width,1) );
                 ret = true;
                 break;
             case CV_16U:
-                icvCvt_RGB2BGR_16u_C3R( (ushort *)src, 0, (ushort *)target, 0,
-                    Size(width,1) );
+                if (use_rgb)
+                    memcpy(target, src, sizeof(ushort) * width);
+                else
+                    icvCvt_RGB2BGR_16u_C3R( (ushort *)src, 0, (ushort *)target, 0,
+                                            Size(width,1) );
                 ret = true;
                 break;
             default:
@@ -169,7 +175,7 @@ rgb_convert (void *src, void *target, int width, int target_channels, int target
 
 static void
 basic_conversion (void *src, const struct channel_layout *layout, int src_sampe_size,
-    int src_width, void *target, int target_channels, int target_depth)
+    int src_width, void *target, int target_channels, int target_depth, bool use_rgb)
 {
     switch (target_depth) {
         case CV_8U:
@@ -182,11 +188,18 @@ basic_conversion (void *src, const struct channel_layout *layout, int src_sampe_
                         d[0] = d[1] = d[2] = s[layout->graychan];
                     break;
                 case 3:
-                    for( ; s < end; d += 3, s += src_sampe_size ) {
-                        d[0] = s[layout->bchan];
-                        d[1] = s[layout->gchan];
-                        d[2] = s[layout->rchan];
-                    }
+                    if (use_rgb)
+                        for( ; s < end; d += 3, s += src_sampe_size ) {
+                            d[0] = s[layout->rchan];
+                            d[1] = s[layout->gchan];
+                            d[2] = s[layout->bchan];
+                        }
+                    else
+                        for( ; s < end; d += 3, s += src_sampe_size ) {
+                            d[0] = s[layout->bchan];
+                            d[1] = s[layout->gchan];
+                            d[2] = s[layout->rchan];
+                        }
                     break;
                 default:
                     CV_Error(Error::StsInternal, "");
@@ -203,11 +216,18 @@ basic_conversion (void *src, const struct channel_layout *layout, int src_sampe_
                         d[0] = d[1] = d[2] = s[layout->graychan];
                     break;
                 case 3:
-                    for( ; s < end; d += 3, s += src_sampe_size ) {
-                        d[0] = s[layout->bchan];
-                        d[1] = s[layout->gchan];
-                        d[2] = s[layout->rchan];
-                    }
+                    if (use_rgb)
+                        for( ; s < end; d += 3, s += src_sampe_size ) {
+                            d[0] = s[layout->rchan];
+                            d[1] = s[layout->gchan];
+                            d[2] = s[layout->bchan];
+                        }
+                    else
+                        for( ; s < end; d += 3, s += src_sampe_size ) {
+                            d[0] = s[layout->bchan];
+                            d[1] = s[layout->gchan];
+                            d[2] = s[layout->rchan];
+                        }
                     break;
                 default:
                     CV_Error(Error::StsInternal, "");
@@ -610,18 +630,18 @@ bool PAMDecoder::readData(Mat& img)
                         bool funcout = false;
                         if (fmt->cvt_func)
                             funcout = fmt->cvt_func (src, data, m_width, target_channels,
-                                img.depth());
+                                img.depth(), m_use_rgb);
                         /* fall back to default if there is no conversion function or it
                          * can't handle the specified characteristics
                          */
                         if (!funcout)
                             basic_conversion (src, &fmt->layout, m_channels,
-                                m_width, data, target_channels, img.depth());
+                                m_width, data, target_channels, img.depth(), m_use_rgb);
 
                     /* default to selecting the first available channels */
                     } else {
                         basic_conversion (src, &layout, m_channels,
-                            m_width, data, target_channels, img.depth());
+                            m_width, data, target_channels, img.depth(), m_use_rgb);
                     }
                 }
             }
diff --git a/modules/imgcodecs/src/grfmt_pfm.cpp b/modules/imgcodecs/src/grfmt_pfm.cpp
index addae34b4f..b213d18fde 100644
--- a/modules/imgcodecs/src/grfmt_pfm.cpp
+++ b/modules/imgcodecs/src/grfmt_pfm.cpp
@@ -142,7 +142,7 @@ bool PFMDecoder::readData(Mat& mat)
     }
   }
 
-  if (buffer.channels() == 3) {
+  if (buffer.channels() == 3 && !m_use_rgb) {
     cv::cvtColor(buffer, buffer, cv::COLOR_BGR2RGB);
   }
 
diff --git a/modules/imgcodecs/src/grfmt_png.cpp b/modules/imgcodecs/src/grfmt_png.cpp
index aca73bd000..726c8b90b7 100644
--- a/modules/imgcodecs/src/grfmt_png.cpp
+++ b/modules/imgcodecs/src/grfmt_png.cpp
@@ -261,7 +261,7 @@ bool  PngDecoder::readData( Mat& img )
                 png_set_gray_1_2_4_to_8( png_ptr );
 #endif
 
-            if( (m_color_type & PNG_COLOR_MASK_COLOR) && color )
+            if( (m_color_type & PNG_COLOR_MASK_COLOR) && color && !m_use_rgb)
                 png_set_bgr( png_ptr ); // convert RGB to BGR
             else if( color )
                 png_set_gray_to_rgb( png_ptr ); // Gray->RGB
diff --git a/modules/imgcodecs/src/grfmt_pxm.cpp b/modules/imgcodecs/src/grfmt_pxm.cpp
index 76290c43de..d2ce60c743 100644
--- a/modules/imgcodecs/src/grfmt_pxm.cpp
+++ b/modules/imgcodecs/src/grfmt_pxm.cpp
@@ -340,7 +340,9 @@ bool PxMDecoder::readData( Mat& img )
                 {
                     if( color )
                     {
-                        if( img.depth() == CV_8U )
+                        if (m_use_rgb)
+                            memcpy(data, src, m_width * CV_ELEM_SIZE(img.type()));
+                        else if( img.depth() == CV_8U )
                             icvCvt_RGB2BGR_8u_C3R( src, 0, data, 0, Size(m_width,1) );
                         else
                             icvCvt_RGB2BGR_16u_C3R( (ushort *)src, 0, (ushort *)data, 0, Size(m_width,1) );
diff --git a/modules/imgcodecs/src/grfmt_spng.cpp b/modules/imgcodecs/src/grfmt_spng.cpp
index fa15bd46c7..59b2decc6e 100644
--- a/modules/imgcodecs/src/grfmt_spng.cpp
+++ b/modules/imgcodecs/src/grfmt_spng.cpp
@@ -381,14 +381,14 @@ bool SPngDecoder::readData(Mat &img)
                                 break;
 
                             ret = spng_decode_row(png_ptr, buffer[row_info.row_num], image_width);
-                            if (ihdr.interlace_method == 0)
+                            if (ihdr.interlace_method == 0 && !m_use_rgb)
                             {
                                 icvCvt_RGBA2BGRA_16u_C4R(reinterpret_cast<const ushort *>(buffer[row_info.row_num]), 0,
                                                          reinterpret_cast<ushort *>(buffer[row_info.row_num]), 0,
                                                          Size(m_width, 1));
                             }
                         } while (ret == SPNG_OK);
-                        if (ihdr.interlace_method)
+                        if (ihdr.interlace_method && !m_use_rgb)
                         {
                             icvCvt_RGBA2BGRA_16u_C4R(reinterpret_cast<const ushort *>(img.data), step * 2, reinterpret_cast<ushort *>(img.data), step * 2, Size(m_width, m_height));
                         }
@@ -402,12 +402,12 @@ bool SPngDecoder::readData(Mat &img)
                                 break;
 
                             ret = spng_decode_row(png_ptr, buffer[row_info.row_num], image_width);
-                            if (ihdr.interlace_method == 0)
+                            if (ihdr.interlace_method == 0 && !m_use_rgb)
                             {
                                 icvCvt_RGBA2BGRA_8u_C4R(buffer[row_info.row_num], 0, buffer[row_info.row_num], 0, Size(m_width, 1));
                             }
                         } while (ret == SPNG_OK);
-                        if (ihdr.interlace_method)
+                        if (ihdr.interlace_method && !m_use_rgb)
                         {
                             icvCvt_RGBA2BGRA_8u_C4R(img.data, step, img.data, step, Size(m_width, m_height));
                         }
@@ -421,13 +421,13 @@ bool SPngDecoder::readData(Mat &img)
                                 break;
 
                             ret = spng_decode_row(png_ptr, buffer[row_info.row_num], image_width);
-                            if (ihdr.interlace_method == 0)
+                            if (ihdr.interlace_method == 0 && !m_use_rgb)
                             {
                                 icvCvt_RGB2BGR_16u_C3R(reinterpret_cast<const ushort *>(buffer[row_info.row_num]), 0,
                                                        reinterpret_cast<ushort *>(buffer[row_info.row_num]), 0, Size(m_width, 1));
                             }
                         } while (ret == SPNG_OK);
-                        if (ihdr.interlace_method)
+                        if (ihdr.interlace_method && !m_use_rgb)
                         {
                             icvCvt_RGB2BGR_16u_C3R(reinterpret_cast<const ushort *>(img.data), step,
                                                    reinterpret_cast<ushort *>(img.data), step, Size(m_width, m_height));
@@ -442,12 +442,12 @@ bool SPngDecoder::readData(Mat &img)
                                 break;
 
                             ret = spng_decode_row(png_ptr, buffer[row_info.row_num], image_width);
-                            if (ihdr.interlace_method == 0)
+                            if (ihdr.interlace_method == 0 && !m_use_rgb)
                             {
                                 icvCvt_RGB2BGR_8u_C3R(buffer[row_info.row_num], 0, buffer[row_info.row_num], 0, Size(m_width, 1));
                             }
                         } while (ret == SPNG_OK);
-                        if (ihdr.interlace_method)
+                        if (ihdr.interlace_method && !m_use_rgb)
                         {
                             icvCvt_RGB2BGR_8u_C3R(img.data, step, img.data, step, Size(m_width, m_height));
                         }
diff --git a/modules/imgcodecs/src/grfmt_sunras.cpp b/modules/imgcodecs/src/grfmt_sunras.cpp
index 31c0286248..798f295376 100644
--- a/modules/imgcodecs/src/grfmt_sunras.cpp
+++ b/modules/imgcodecs/src/grfmt_sunras.cpp
@@ -342,7 +342,7 @@ bad_decoding_end:
 
                 if( color )
                 {
-                    if( m_type == RAS_FORMAT_RGB )
+                    if( m_type == RAS_FORMAT_RGB || m_use_rgb)
                         icvCvt_RGB2BGR_8u_C3R(src, 0, data, 0, Size(m_width,1) );
                     else
                         memcpy(data, src, std::min(step, (size_t)src_pitch));
@@ -365,7 +365,7 @@ bad_decoding_end:
 
                 if( color )
                     icvCvt_BGRA2BGR_8u_C4C3R( src + 4, 0, data, 0, Size(m_width,1),
-                                              m_type == RAS_FORMAT_RGB ? 2 : 0 );
+                                              (m_type == RAS_FORMAT_RGB || m_use_rgb) ? 2 : 0 );
                 else
                     icvCvt_BGRA2Gray_8u_C4C1R( src + 4, 0, data, 0, Size(m_width,1),
                                                m_type == RAS_FORMAT_RGB ? 2 : 0 );
diff --git a/modules/imgcodecs/src/grfmt_tiff.cpp b/modules/imgcodecs/src/grfmt_tiff.cpp
index 3890df96bd..41607006e2 100644
--- a/modules/imgcodecs/src/grfmt_tiff.cpp
+++ b/modules/imgcodecs/src/grfmt_tiff.cpp
@@ -865,9 +865,14 @@ bool  TiffDecoder::readData( Mat& img )
                                         break;
 
                                     case MAKE_FLAG( 3, 3 ): // RGB to BGR
-                                        icvCvt_BGR2RGB_8u_C3R( bstart, 0,
-                                                img_line_buffer, 0,
-                                                Size(tile_width, 1) );
+                                        if (m_use_rgb)
+                                            memcpy( (void*) img_line_buffer,
+                                                    (void*) bstart,
+                                                    tile_width * sizeof(uchar) );
+                                        else
+                                            icvCvt_BGR2RGB_8u_C3R( bstart, 0,
+                                                    img_line_buffer, 0,
+                                                    Size(tile_width, 1) );
                                         break;
 
                                     case MAKE_FLAG( 4, 1 ): // RGBA to GRAY
@@ -879,7 +884,7 @@ bool  TiffDecoder::readData( Mat& img )
                                     case MAKE_FLAG( 4, 3 ): // RGBA to BGR
                                         icvCvt_BGRA2BGR_8u_C4C3R( bstart, 0,
                                                 img_line_buffer, 0,
-                                                Size(tile_width, 1), 2 );
+                                                Size(tile_width, 1), m_use_rgb ? 0 : 2);
                                         break;
 
                                     case MAKE_FLAG( 4, 4 ): // RGBA to BGRA
@@ -909,7 +914,7 @@ bool  TiffDecoder::readData( Mat& img )
                                         CV_CheckEQ(wanted_channels, 3, "TIFF-8bpp: BGR/BGRA images are supported only");
                                         icvCvt_BGRA2BGR_8u_C4C3R(bstart + i*tile_width0*4, 0,
                                                 img.ptr(img_y + tile_height - i - 1, x), 0,
-                                                Size(tile_width, 1), 2);
+                                                Size(tile_width, 1), m_use_rgb ? 0 : 2);
                                     }
                                 }
                                 else
@@ -972,9 +977,12 @@ bool  TiffDecoder::readData( Mat& img )
                                     else if (ncn == 3)
                                     {
                                         CV_CheckEQ(wanted_channels, 3, "");
-                                        icvCvt_RGB2BGR_16u_C3R(buffer16, 0,
-                                                img.ptr<ushort>(img_y + i, x), 0,
-                                                Size(tile_width, 1));
+                                        if (m_use_rgb)
+                                            memcpy(buffer16, img.ptr<ushort>(img_y + i, x), tile_width * sizeof(ushort));
+                                        else
+                                            icvCvt_RGB2BGR_16u_C3R(buffer16, 0,
+                                                    img.ptr<ushort>(img_y + i, x), 0,
+                                                    Size(tile_width, 1));
                                     }
                                     else if (ncn == 4)
                                     {
@@ -989,7 +997,7 @@ bool  TiffDecoder::readData( Mat& img )
                                             CV_CheckEQ(wanted_channels, 3, "TIFF-16bpp: BGR/BGRA images are supported only");
                                             icvCvt_BGRA2BGR_16u_C4C3R(buffer16, 0,
                                                 img.ptr<ushort>(img_y + i, x), 0,
-                                                Size(tile_width, 1), 2);
+                                                Size(tile_width, 1), m_use_rgb ? 0 : 2);
                                         }
                                     }
                                     else
@@ -1032,7 +1040,7 @@ bool  TiffDecoder::readData( Mat& img )
                             Mat m_tile(Size(tile_width0, tile_height0), CV_MAKETYPE((dst_bpp == 32) ? (depth == CV_32S ? CV_32S : CV_32F) : CV_64F, ncn), src_buffer);
                             Rect roi_tile(0, 0, tile_width, tile_height);
                             Rect roi_img(x, img_y, tile_width, tile_height);
-                            if (!m_hdr && ncn == 3)
+                            if (!m_hdr && ncn == 3 && !m_use_rgb)
                                 extend_cvtColor(m_tile(roi_tile), img(roi_img), COLOR_RGB2BGR);
                             else if (!m_hdr && ncn == 4)
                                 extend_cvtColor(m_tile(roi_tile), img(roi_img), COLOR_RGBA2BGRA);
@@ -1060,7 +1068,10 @@ bool  TiffDecoder::readData( Mat& img )
     if (m_hdr && depth >= CV_32F)
     {
         CV_Assert(photometric == PHOTOMETRIC_LOGLUV);
-        cvtColor(img, img, COLOR_XYZ2BGR);
+        if (m_use_rgb)
+            cvtColor(img, img, COLOR_XYZ2RGB);
+        else
+            cvtColor(img, img, COLOR_XYZ2BGR);
     }
     return true;
 }
diff --git a/modules/imgcodecs/src/grfmt_webp.cpp b/modules/imgcodecs/src/grfmt_webp.cpp
index 6f5cdfb6ab..ca54effe86 100644
--- a/modules/imgcodecs/src/grfmt_webp.cpp
+++ b/modules/imgcodecs/src/grfmt_webp.cpp
@@ -184,14 +184,22 @@ bool WebPDecoder::readData(Mat &img)
         if (channels == 3)
         {
             CV_CheckTypeEQ(read_img.type(), CV_8UC3, "");
-            res_ptr = WebPDecodeBGRInto(data.ptr(), data.total(), out_data,
-                                        (int)out_data_size, (int)read_img.step);
+            if (m_use_rgb)
+                res_ptr = WebPDecodeRGBInto(data.ptr(), data.total(), out_data,
+                                            (int)out_data_size, (int)read_img.step);
+            else
+                res_ptr = WebPDecodeBGRInto(data.ptr(), data.total(), out_data,
+                                            (int)out_data_size, (int)read_img.step);
         }
         else if (channels == 4)
         {
             CV_CheckTypeEQ(read_img.type(), CV_8UC4, "");
-            res_ptr = WebPDecodeBGRAInto(data.ptr(), data.total(), out_data,
-                                         (int)out_data_size, (int)read_img.step);
+            if (m_use_rgb)
+                res_ptr = WebPDecodeRGBAInto(data.ptr(), data.total(), out_data,
+                                             (int)out_data_size, (int)read_img.step);
+            else
+                res_ptr = WebPDecodeBGRAInto(data.ptr(), data.total(), out_data,
+                                             (int)out_data_size, (int)read_img.step);
         }
 
         if (res_ptr != out_data)
diff --git a/modules/imgcodecs/src/loadsave.cpp b/modules/imgcodecs/src/loadsave.cpp
index ec4760b879..354f2a4b34 100644
--- a/modules/imgcodecs/src/loadsave.cpp
+++ b/modules/imgcodecs/src/loadsave.cpp
@@ -88,7 +88,7 @@ static inline int calcType(int type, int flags)
         if( (flags & IMREAD_ANYDEPTH) == 0 )
             type = CV_MAKETYPE(CV_8U, CV_MAT_CN(type));
 
-        if( (flags & IMREAD_COLOR) != 0 ||
+        if( (flags & IMREAD_COLOR) != 0 || (flags & IMREAD_COLOR_RGB) != 0 ||
            ((flags & IMREAD_ANYCOLOR) != 0 && CV_MAT_CN(type) > 1) )
             type = CV_MAKETYPE(CV_MAT_DEPTH(type), 3);
         else
@@ -432,6 +432,12 @@ imread_( const String& filename, int flags, OutputArray mat )
             scale_denom = 8;
     }
 
+    // Try to decode image by RGB instead of BGR.
+    if (flags & IMREAD_COLOR_RGB && flags != IMREAD_UNCHANGED)
+    {
+        decoder->setRGB(true);
+    }
+
     /// set the scale_denom in the driver
     decoder->setScale( scale_denom );
 
@@ -542,6 +548,9 @@ imreadmulti_(const String& filename, int flags, std::vector<Mat>& mats, int star
         count = std::numeric_limits<int>::max();
     }
 
+    if (flags & IMREAD_COLOR_RGB && flags != IMREAD_UNCHANGED)
+        decoder->setRGB(true);
+
     /// set the filename in the driver
     decoder->setSource(filename);
 
@@ -829,6 +838,12 @@ imdecode_( const Mat& buf, int flags, Mat& mat )
             scale_denom = 8;
     }
 
+    // Try to decode image by RGB instead of BGR.
+    if (flags & IMREAD_COLOR_RGB && flags != IMREAD_UNCHANGED)
+    {
+        decoder->setRGB(true);
+    }
+
     /// set the scale_denom in the driver
     decoder->setScale( scale_denom );
 
@@ -965,6 +980,12 @@ imdecodemulti_(const Mat& buf, int flags, std::vector<Mat>& mats, int start, int
     if (!decoder)
         return 0;
 
+    // Try to decode image by RGB instead of BGR.
+    if (flags & IMREAD_COLOR_RGB && flags != IMREAD_UNCHANGED)
+    {
+        decoder->setRGB(true);
+    }
+
     if (count < 0) {
         count = std::numeric_limits<int>::max();
     }
diff --git a/modules/imgcodecs/src/utils.cpp b/modules/imgcodecs/src/utils.cpp
index 0962ebea62..41fd9f5041 100644
--- a/modules/imgcodecs/src/utils.cpp
+++ b/modules/imgcodecs/src/utils.cpp
@@ -352,6 +352,25 @@ void icvCvt_CMYK2BGR_8u_C4C3R( const uchar* cmyk, int cmyk_step,
     }
 }
 
+void icvCvt_CMYK2RGB_8u_C4C3R( const uchar* cmyk, int cmyk_step,
+                               uchar* rgb, int rgb_step, Size size )
+{
+    int i;
+    for( ; size.height--; )
+    {
+        for( i = 0; i < size.width; i++, rgb += 3, cmyk += 4 )
+        {
+            int c = cmyk[0], m = cmyk[1], y = cmyk[2], k = cmyk[3];
+            c = k - ((255 - c)*k>>8);
+            m = k - ((255 - m)*k>>8);
+            y = k - ((255 - y)*k>>8);
+            rgb[0] = (uchar)c; rgb[1] = (uchar)m; rgb[2] = (uchar)y;
+        }
+        rgb += rgb_step - size.width*3;
+        cmyk += cmyk_step - size.width*4;
+    }
+}
+
 
 void icvCvt_CMYK2Gray_8u_C4C1R( const uchar* cmyk, int cmyk_step,
                                 uchar* gray, int gray_step, Size size )
diff --git a/modules/imgcodecs/src/utils.hpp b/modules/imgcodecs/src/utils.hpp
index 43eb907f76..2a9451422e 100644
--- a/modules/imgcodecs/src/utils.hpp
+++ b/modules/imgcodecs/src/utils.hpp
@@ -115,6 +115,8 @@ void icvCvt_BGR5652BGR_8u_C2C3R( const uchar* bgr565, int bgr565_step,
                                  uchar* bgr, int bgr_step, Size size );
 void icvCvt_CMYK2BGR_8u_C4C3R( const uchar* cmyk, int cmyk_step,
                                uchar* bgr, int bgr_step, Size size );
+void icvCvt_CMYK2RGB_8u_C4C3R( const uchar* cmyk, int cmyk_step,
+                               uchar* rgb, int rgb_step, Size size );
 void icvCvt_CMYK2Gray_8u_C4C1R( const uchar* ycck, int ycck_step,
                                 uchar* gray, int gray_step, Size size );
 
diff --git a/modules/imgcodecs/test/test_avif.cpp b/modules/imgcodecs/test/test_avif.cpp
index 72b7f54fea..227c69556d 100644
--- a/modules/imgcodecs/test/test_avif.cpp
+++ b/modules/imgcodecs/test/test_avif.cpp
@@ -150,7 +150,7 @@ INSTANTIATE_TEST_CASE_P(
                        ::testing::ValuesIn({1, 3, 4}),
                        ::testing::ValuesIn({0, 50, 100}),
                        ::testing::ValuesIn({IMREAD_UNCHANGED, IMREAD_GRAYSCALE,
-                                            IMREAD_COLOR})));
+                                            IMREAD_COLOR, IMREAD_COLOR_RGB})));
 
 class Imgcodecs_Avif_Image_EncodeDecodeSuite
     : public Imgcodecs_Avif_Image_RoundTripSuite {};
@@ -183,7 +183,7 @@ INSTANTIATE_TEST_CASE_P(
                        ::testing::ValuesIn({1, 3, 4}),
                        ::testing::ValuesIn({0, 50, 100}),
                        ::testing::ValuesIn({IMREAD_UNCHANGED, IMREAD_GRAYSCALE,
-                                            IMREAD_COLOR})));
+                                            IMREAD_COLOR, IMREAD_COLOR_RGB})));
 
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -311,7 +311,7 @@ INSTANTIATE_TEST_CASE_P(
     ::testing::Combine(::testing::ValuesIn({8, 10, 12}),
                        ::testing::ValuesIn({1, 3}), ::testing::ValuesIn({50}),
                        ::testing::ValuesIn({IMREAD_UNCHANGED, IMREAD_GRAYSCALE,
-                                            IMREAD_COLOR})));
+                                            IMREAD_COLOR, IMREAD_COLOR_RGB})));
 class Imgcodecs_Avif_Animation_WriteDecodeSuite
     : public Imgcodecs_Avif_Animation_RoundTripSuite {};
 
@@ -347,7 +347,7 @@ INSTANTIATE_TEST_CASE_P(
     ::testing::Combine(::testing::ValuesIn({8, 10, 12}),
                        ::testing::ValuesIn({1, 3}), ::testing::ValuesIn({50}),
                        ::testing::ValuesIn({IMREAD_UNCHANGED, IMREAD_GRAYSCALE,
-                                            IMREAD_COLOR})));
+                                            IMREAD_COLOR, IMREAD_COLOR_RGB})));
 
 }  // namespace
 }  // namespace opencv_test
diff --git a/modules/imgcodecs/test/test_exr.impl.hpp b/modules/imgcodecs/test/test_exr.impl.hpp
index 2418d9d817..c8cda11a63 100644
--- a/modules/imgcodecs/test/test_exr.impl.hpp
+++ b/modules/imgcodecs/test/test_exr.impl.hpp
@@ -192,6 +192,15 @@ TEST(Imgcodecs_EXR, read_YC_changeDepth)
     ASSERT_FALSE(img.empty());
     ASSERT_EQ(CV_8UC3, img.type());
 
+    const Mat img_rgb = cv::imread(filenameInput, IMREAD_COLOR_RGB);
+
+    ASSERT_FALSE(img_rgb.empty());
+    ASSERT_EQ(CV_8UC3, img_rgb.type());
+
+    cvtColor(img_rgb, img_rgb, COLOR_RGB2BGR);
+
+    EXPECT_TRUE(cvtest::norm(img, img_rgb, NORM_INF) == 0);
+
     // Cannot test writing, EXR encoder doesn't support 8U depth
 }
 
diff --git a/modules/imgcodecs/test/test_grfmt.cpp b/modules/imgcodecs/test/test_grfmt.cpp
index 1e0bf47b47..947e560c81 100644
--- a/modules/imgcodecs/test/test_grfmt.cpp
+++ b/modules/imgcodecs/test/test_grfmt.cpp
@@ -108,6 +108,7 @@ const int basic_modes[] =
     IMREAD_UNCHANGED,
     IMREAD_GRAYSCALE,
     IMREAD_COLOR,
+    IMREAD_COLOR_RGB,
     IMREAD_ANYDEPTH,
     IMREAD_ANYCOLOR
 };
@@ -356,6 +357,10 @@ TEST(Imgcodecs_Bmp, rgba_scale)
     ASSERT_FALSE(img.empty());
     ASSERT_EQ(CV_8UC3, img.type());
 
+    img = cv::imread(filenameInput, IMREAD_COLOR_RGB);
+    ASSERT_FALSE(img.empty());
+    ASSERT_EQ(CV_8UC3, img.type());
+
     data = img.ptr();
     ASSERT_EQ(data[0], 255);
     ASSERT_EQ(data[1], 255);
diff --git a/modules/imgcodecs/test/test_jpeg.cpp b/modules/imgcodecs/test/test_jpeg.cpp
index ee9da01aa7..503848068a 100644
--- a/modules/imgcodecs/test/test_jpeg.cpp
+++ b/modules/imgcodecs/test/test_jpeg.cpp
@@ -217,6 +217,7 @@ TEST_P(Imgcodecs_Jpeg_decode_cmyk, regression25274)
 INSTANTIATE_TEST_CASE_P( /* nothing */,
                         Imgcodecs_Jpeg_decode_cmyk,
                         testing::Values(cv::IMREAD_COLOR,
+                                        cv::IMREAD_COLOR_RGB,
                                         cv::IMREAD_GRAYSCALE,
                                         cv::IMREAD_ANYCOLOR));
 
@@ -327,6 +328,13 @@ TEST_P(Imgcodecs_Jpeg_encode_withLumaChromaQuality, basic)
     cv::Mat src = imread(fname, cv::IMREAD_COLOR);
     ASSERT_FALSE(src.empty());
 
+    // Add imread RGB test
+    cv::Mat src_rgb = imread(fname, cv::IMREAD_COLOR_RGB);
+    ASSERT_FALSE(src_rgb.empty());
+
+    cvtColor(src_rgb, src_rgb, COLOR_RGB2BGR);
+    EXPECT_TRUE(cvtest::norm(src, src_rgb, NORM_INF) == 0);
+
     std::vector<uint8_t> jpegNormal;
     ASSERT_NO_THROW(cv::imencode(".jpg", src, jpegNormal));
 
diff --git a/modules/imgcodecs/test/test_png.cpp b/modules/imgcodecs/test/test_png.cpp
index cdc7da39b2..13aca2e396 100644
--- a/modules/imgcodecs/test/test_png.cpp
+++ b/modules/imgcodecs/test/test_png.cpp
@@ -83,6 +83,14 @@ TEST(Imgcodecs_Png, read_color_palette_with_alpha)
     EXPECT_EQ(img.at<Vec3b>(0, 0), Vec3b(0, 0, 255));
     EXPECT_EQ(img.at<Vec3b>(0, 1), Vec3b(0, 0, 255));
 
+    img = imread(root + "readwrite/color_palette_alpha.png", IMREAD_COLOR_RGB);
+    ASSERT_FALSE(img.empty());
+    ASSERT_TRUE(img.channels() == 3);
+
+    // pixel is red in RGB
+    EXPECT_EQ(img.at<Vec3b>(0, 0), Vec3b(255, 0, 0));
+    EXPECT_EQ(img.at<Vec3b>(0, 1), Vec3b(255, 0, 0));
+
     // Fourth Test : Read PNG without alpha, imread flag 1
     img = imread(root + "readwrite/color_palette_no_alpha.png", IMREAD_COLOR);
     ASSERT_FALSE(img.empty());
@@ -91,6 +99,14 @@ TEST(Imgcodecs_Png, read_color_palette_with_alpha)
     // pixel is red in BGR
     EXPECT_EQ(img.at<Vec3b>(0, 0), Vec3b(0, 0, 255));
     EXPECT_EQ(img.at<Vec3b>(0, 1), Vec3b(0, 0, 255));
+
+    img = imread(root + "readwrite/color_palette_no_alpha.png", IMREAD_COLOR_RGB);
+    ASSERT_FALSE(img.empty());
+    ASSERT_TRUE(img.channels() == 3);
+
+    // pixel is red in RGB
+    EXPECT_EQ(img.at<Vec3b>(0, 0), Vec3b(255, 0, 0));
+    EXPECT_EQ(img.at<Vec3b>(0, 1), Vec3b(255, 0, 0));
 }
 
 /**
diff --git a/modules/imgcodecs/test/test_precomp.hpp b/modules/imgcodecs/test/test_precomp.hpp
index 3bd4221dae..fab6606859 100644
--- a/modules/imgcodecs/test/test_precomp.hpp
+++ b/modules/imgcodecs/test/test_precomp.hpp
@@ -51,6 +51,11 @@ void PrintTo(const ImreadModes& val, std::ostream* os)
         v &= ~IMREAD_IGNORE_ORIENTATION;
         *os << "IMREAD_IGNORE_ORIENTATION" << (v == 0 ? "" : " | ");
     }
+    if ((v & IMREAD_COLOR_RGB) != 0)
+    {
+        v &= ~IMREAD_COLOR_RGB;
+        *os << "IMREAD_COLOR_RGB" << (v == 0 ? "" : " | ");
+    }
     switch (v)
     {
         case IMREAD_UNCHANGED: return;
@@ -66,6 +71,7 @@ void PrintTo(const ImreadModes& val, std::ostream* os)
         case IMREAD_REDUCED_GRAYSCALE_8: // fallthru
         case IMREAD_REDUCED_COLOR_8: *os << "REDUCED_8"; return;
         case IMREAD_IGNORE_ORIENTATION: return;
+        case IMREAD_COLOR_RGB: return;
     } // don't use "default:" to emit compiler warnings
     *os << "IMREAD_UNKNOWN(" << (int)v << ")";
 }
diff --git a/modules/imgcodecs/test/test_read_write.cpp b/modules/imgcodecs/test/test_read_write.cpp
index 824688b366..255f819a9a 100644
--- a/modules/imgcodecs/test/test_read_write.cpp
+++ b/modules/imgcodecs/test/test_read_write.cpp
@@ -196,9 +196,19 @@ void test_image_io(const Mat& image, const std::string& fname, const std::string
     Mat buf_loaded = imdecode(Mat(buf), imreadFlag);
     EXPECT_FALSE(buf_loaded.empty());
 
+    if (imreadFlag & IMREAD_COLOR_RGB && imreadFlag != -1)
+    {
+        cvtColor(buf_loaded, buf_loaded, COLOR_RGB2BGR);
+    }
+
     Mat loaded = imread(fname, imreadFlag);
     EXPECT_FALSE(loaded.empty());
 
+    if (imreadFlag & IMREAD_COLOR_RGB && imreadFlag != -1)
+    {
+        cvtColor(loaded, loaded, COLOR_RGB2BGR);
+    }
+
     EXPECT_EQ(0, cv::norm(loaded, buf_loaded, NORM_INF)) << "imread() and imdecode() calls must provide the same result (bit-exact)";
 
     double psnr = cvtest::PSNR(loaded, image);
@@ -238,6 +248,7 @@ TEST_P(Imgcodecs_Image, read_write_BGR)
 
     Mat image = generateTestImageBGR();
     EXPECT_NO_THROW(test_image_io(image, fname, ext, IMREAD_COLOR, psnrThreshold));
+    EXPECT_NO_THROW(test_image_io(image, fname, ext, IMREAD_COLOR_RGB, psnrThreshold));
 
     EXPECT_EQ(0, remove(fname.c_str()));
 }
diff --git a/modules/imgcodecs/test/test_tiff.cpp b/modules/imgcodecs/test/test_tiff.cpp
index ee40c54b59..fb607bf18f 100644
--- a/modules/imgcodecs/test/test_tiff.cpp
+++ b/modules/imgcodecs/test/test_tiff.cpp
@@ -53,7 +53,7 @@ enum ImreadMixModes
 {
     IMREAD_MIX_UNCHANGED                   = IMREAD_UNCHANGED                                     ,
     IMREAD_MIX_GRAYSCALE                   = IMREAD_GRAYSCALE                                     ,
-    IMREAD_MIX_COLOR                       = IMREAD_COLOR                                         ,
+    IMREAD_MIX_COLOR                       = IMREAD_COLOR     | IMREAD_COLOR_RGB                  ,
     IMREAD_MIX_GRAYSCALE_ANYDEPTH          = IMREAD_GRAYSCALE | IMREAD_ANYDEPTH                   ,
     IMREAD_MIX_GRAYSCALE_ANYCOLOR          = IMREAD_GRAYSCALE                    | IMREAD_ANYCOLOR,
     IMREAD_MIX_GRAYSCALE_ANYDEPTH_ANYCOLOR = IMREAD_GRAYSCALE | IMREAD_ANYDEPTH  | IMREAD_ANYCOLOR,
@@ -125,7 +125,7 @@ TEST_P(Imgcodecs_Tiff_decode_Huge, regression)
             case IMREAD_GRAYSCALE | IMREAD_ANYCOLOR | IMREAD_ANYDEPTH:
                 ncn = (ncn == 1)?1:3;
                 break;
-            case IMREAD_COLOR:
+            case IMREAD_COLOR | IMREAD_COLOR_RGB:
                 ncn = 3;
                 depth = 1;
                 break;
@@ -818,6 +818,24 @@ TEST(Imgcodecs_Tiff, read_palette_color_image)
     ASSERT_EQ(CV_8UC3, img.type());
 }
 
+TEST(Imgcodecs_Tiff, read_palette_color_image_rgb_and_bgr)
+{
+    const string root = cvtest::TS::ptr()->get_data_path();
+    const string filenameInput = root + "readwrite/test_palette_color_image.tif";
+
+    Mat img_rgb, img_bgr;
+    ASSERT_NO_THROW(img_rgb = cv::imread(filenameInput, IMREAD_COLOR_RGB));
+    ASSERT_NO_THROW(img_bgr = cv::imread(filenameInput, IMREAD_COLOR_BGR));
+    ASSERT_FALSE(img_rgb.empty());
+    ASSERT_EQ(CV_8UC3, img_rgb.type());
+
+    ASSERT_FALSE(img_bgr.empty());
+    ASSERT_EQ(CV_8UC3, img_bgr.type());
+
+    EXPECT_EQ(img_rgb.at<Vec3b>(32, 24), Vec3b(255, 0, 0));
+    EXPECT_EQ(img_bgr.at<Vec3b>(32, 24), Vec3b(0, 0, 255));
+}
+
 TEST(Imgcodecs_Tiff, read_4_bit_palette_color_image)
 {
     const string root = cvtest::TS::ptr()->get_data_path();
@@ -1066,6 +1084,7 @@ const int all_modes[] =
     IMREAD_UNCHANGED,
     IMREAD_GRAYSCALE,
     IMREAD_COLOR,
+    IMREAD_COLOR_RGB,
     IMREAD_ANYDEPTH,
     IMREAD_ANYCOLOR
 };
diff --git a/modules/imgcodecs/test/test_webp.cpp b/modules/imgcodecs/test/test_webp.cpp
index 1f2cad7d89..3647714039 100644
--- a/modules/imgcodecs/test/test_webp.cpp
+++ b/modules/imgcodecs/test/test_webp.cpp
@@ -51,6 +51,12 @@ TEST(Imgcodecs_WebP, encode_decode_lossless_webp)
     ASSERT_FALSE(decode.empty());
     EXPECT_TRUE(cvtest::norm(decode, img_webp, NORM_INF) == 0);
 
+    cv::Mat decode_rgb = cv::imdecode(buf, IMREAD_COLOR_RGB);
+    ASSERT_FALSE(decode_rgb.empty());
+
+    cvtColor(decode_rgb, decode_rgb, COLOR_RGB2BGR);
+    EXPECT_TRUE(cvtest::norm(decode_rgb, img_webp, NORM_INF) == 0);
+
     ASSERT_FALSE(img_webp.empty());
 
     EXPECT_TRUE(cvtest::norm(img, img_webp, NORM_INF) == 0);

From bef6c110a4958406400781e5d7139f6993889a80 Mon Sep 17 00:00:00 2001
From: Wanli <wanli.zhong.1999@gmail.com>
Date: Wed, 3 Jul 2024 15:59:44 +0800
Subject: [PATCH 10/39] Merge pull request #25781 from WanliZhong:v_log

Add support for v_log (Natural Logarithm) #25781

This PR aims to implement `v_log(v_float16 x)`, `v_log(v_float32 x)` and `v_log(v_float64 x)`.
Merged after https://github.com/opencv/opencv/pull/24941

TODO:
- [x] double and half float precision
- [x] tests for them
- [x] doc to explain the implementation

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 .../include/opencv2/core/hal/intrin_cpp.hpp   |  21 +-
 .../include/opencv2/core/hal/intrin_math.hpp  | 222 ++++++++++++++++++
 modules/core/test/test_intrin_utils.hpp       |  72 ++++++
 3 files changed, 313 insertions(+), 2 deletions(-)

diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp
index 40979f6c4b..e9e8d28eaa 100644
--- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp
@@ -364,6 +364,7 @@ Floating point:
 |extract_n          | x | x |
 |broadcast_element  | x |   |
 |exp                | x | x |
+|log                | x | x |
 
  @{ */
 
@@ -742,13 +743,29 @@ OPENCV_HAL_IMPL_MATH_FUNC(v_sqrt, std::sqrt, _Tp)
 OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp)
 #define OPENCV_HAL_MATH_HAVE_EXP 1
 
+/**
+ * @brief Natural logarithm \f$ \log(x) \f$ of elements
+ *
+ * Only for floating point types. Core implementation steps:
+ * 1. Decompose Input: Use binary representation to decompose the input into mantissa part \f$ m \f$ and exponent part \f$ e \f$. Such that \f$ \log(x) = \log(m \cdot 2^e) = \log(m) + e \cdot \ln(2) \f$.
+ * 2. Adjust Mantissa and Exponent Parts: If the mantissa is less than \f$ \sqrt{0.5} \f$, adjust the exponent and mantissa to ensure the mantissa is in the range \f$ (\sqrt{0.5}, \sqrt{2}) \f$ for better approximation.
+ * 3. Polynomial Approximation for \f$ \log(m) \f$: The closer the \f$ m \f$ is to 1, the more accurate the result.
+ *    - For float16 and float32, use a Taylor Series with 9 terms.
+ *    - For float64, use Pade Polynomials Approximation with 6 terms.
+ * 4. Combine Results: Add the two parts together to get the final result.
+ *
+ * @note The precision of the calculation depends on the implementation and the data type of the input.
+ *
+ * @note Similar to the behavior of std::log(), \f$ \ln(0) = -\infty \f$.
+ */
+OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp)
+#define OPENCV_HAL_MATH_HAVE_LOG 1
+
 //! @cond IGNORED
 OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp)
 #define OPENCV_HAL_MATH_HAVE_SIN 1
 OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp)
 #define OPENCV_HAL_MATH_HAVE_COS 1
-OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp)
-#define OPENCV_HAL_MATH_HAVE_LOG 1
 //! @endcond
 
 /** @brief Absolute value of elements
diff --git a/modules/core/include/opencv2/core/hal/intrin_math.hpp b/modules/core/include/opencv2/core/hal/intrin_math.hpp
index 528166889b..0f51b9ba13 100644
--- a/modules/core/include/opencv2/core/hal/intrin_math.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_math.hpp
@@ -194,7 +194,229 @@ namespace CV__SIMD_NAMESPACE {
 
 #define OPENCV_HAL_MATH_HAVE_EXP 1
 //! @}
+#endif
+
+#ifndef OPENCV_HAL_MATH_HAVE_LOG
+
+//! @name Natural Logarithm
+//! @{
+#if defined(CV_SIMD_FP16) && CV_SIMD_FP16
+    inline v_float16 v_log(const v_float16 &x) {
+        const v_float16 _vlog_one_fp16 = vx_setall_f16(1.0f);
+        const v_float16 _vlog_SQRTHF_fp16 = vx_setall_f16(0.707106781186547524f);
+        const v_float16 _vlog_q1_fp16 = vx_setall_f16(-2.12194440E-4f);
+        const v_float16 _vlog_q2_fp16 = vx_setall_f16(0.693359375f);
+        const v_float16 _vlog_p0_fp16 = vx_setall_f16(7.0376836292E-2f);
+        const v_float16 _vlog_p1_fp16 = vx_setall_f16(-1.1514610310E-1f);
+        const v_float16 _vlog_p2_fp16 = vx_setall_f16(1.1676998740E-1f);
+        const v_float16 _vlog_p3_fp16 = vx_setall_f16(-1.2420140846E-1f);
+        const v_float16 _vlog_p4_fp16 = vx_setall_f16(1.4249322787E-1f);
+        const v_float16 _vlog_p5_fp16 = vx_setall_f16(-1.6668057665E-1f);
+        const v_float16 _vlog_p6_fp16 = vx_setall_f16(2.0000714765E-1f);
+        const v_float16 _vlog_p7_fp16 = vx_setall_f16(-2.4999993993E-1f);
+        const v_float16 _vlog_p8_fp16 = vx_setall_f16(3.3333331174E-1f);
+        const v_int16 _vlog_inv_mant_mask_s16 = vx_setall_s16(~0x7c00);
+
+        v_float16 _vlog_x, _vlog_e, _vlog_y, _vlog_z, _vlog_tmp;
+        v_int16 _vlog_ux, _vlog_emm0;
+
+        _vlog_ux = v_reinterpret_as_s16(x);
+        _vlog_emm0 = v_shr(_vlog_ux, 10);
+
+        _vlog_ux = v_and(_vlog_ux, _vlog_inv_mant_mask_s16);
+        _vlog_ux = v_or(_vlog_ux, v_reinterpret_as_s16(vx_setall_f16(0.5f)));
+        _vlog_x = v_reinterpret_as_f16(_vlog_ux);
+
+        _vlog_emm0 = v_sub(_vlog_emm0, vx_setall_s16(0xf));
+        _vlog_e = v_cvt_f16(_vlog_emm0);
+
+        _vlog_e = v_add(_vlog_e, _vlog_one_fp16);
+
+        v_float16 _vlog_mask = v_lt(_vlog_x, _vlog_SQRTHF_fp16);
+        _vlog_tmp = v_and(_vlog_x, _vlog_mask);
+        _vlog_x = v_sub(_vlog_x, _vlog_one_fp16);
+        _vlog_e = v_sub(_vlog_e, v_and(_vlog_one_fp16, _vlog_mask));
+        _vlog_x = v_add(_vlog_x, _vlog_tmp);
+
+        _vlog_z = v_mul(_vlog_x, _vlog_x);
+
+        _vlog_y = v_fma(_vlog_p0_fp16, _vlog_x, _vlog_p1_fp16);
+        _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p2_fp16);
+        _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p3_fp16);
+        _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p4_fp16);
+        _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p5_fp16);
+        _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p6_fp16);
+        _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p7_fp16);
+        _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p8_fp16);
+        _vlog_y = v_mul(_vlog_y, _vlog_x);
+        _vlog_y = v_mul(_vlog_y, _vlog_z);
+
+        _vlog_y = v_fma(_vlog_e, _vlog_q1_fp16, _vlog_y);
+
+        _vlog_y = v_sub(_vlog_y, v_mul(_vlog_z, vx_setall_f16(0.5f)));
+
+        _vlog_x = v_add(_vlog_x, _vlog_y);
+        _vlog_x = v_fma(_vlog_e, _vlog_q2_fp16, _vlog_x);
+        // log(0) -> -INF
+        v_float16 mask_zero = v_eq(x, vx_setzero_f16());
+        _vlog_x = v_select(mask_zero, v_reinterpret_as_f16(vx_setall_s16(0xfc00)), _vlog_x);
+        // log(NEG), log(NAN) -> NAN
+        v_float16 mask_not_nan = v_ge(x, vx_setzero_f16());
+        _vlog_x = v_select(mask_not_nan, _vlog_x, v_reinterpret_as_f16(vx_setall_s16(0x7e00)));
+        // log(INF) -> INF
+        v_float16 mask_inf = v_eq(x, v_reinterpret_as_f16(vx_setall_s16(0x7c00)));
+        _vlog_x = v_select(mask_inf, x, _vlog_x);
+        return _vlog_x;
+    }
+#endif
 
+    inline v_float32 v_log(const v_float32 &x) {
+        const v_float32 _vlog_one_fp32 = vx_setall_f32(1.0f);
+        const v_float32 _vlog_SQRTHF_fp32 = vx_setall_f32(0.707106781186547524f);
+        const v_float32 _vlog_q1_fp32 = vx_setall_f32(-2.12194440E-4f);
+        const v_float32 _vlog_q2_fp32 = vx_setall_f32(0.693359375f);
+        const v_float32 _vlog_p0_fp32 = vx_setall_f32(7.0376836292E-2f);
+        const v_float32 _vlog_p1_fp32 = vx_setall_f32(-1.1514610310E-1f);
+        const v_float32 _vlog_p2_fp32 = vx_setall_f32(1.1676998740E-1f);
+        const v_float32 _vlog_p3_fp32 = vx_setall_f32(-1.2420140846E-1f);
+        const v_float32 _vlog_p4_fp32 = vx_setall_f32(1.4249322787E-1f);
+        const v_float32 _vlog_p5_fp32 = vx_setall_f32(-1.6668057665E-1f);
+        const v_float32 _vlog_p6_fp32 = vx_setall_f32(2.0000714765E-1f);
+        const v_float32 _vlog_p7_fp32 = vx_setall_f32(-2.4999993993E-1f);
+        const v_float32 _vlog_p8_fp32 = vx_setall_f32(3.3333331174E-1f);
+        const v_int32 _vlog_inv_mant_mask_s32 = vx_setall_s32(~0x7f800000);
+
+        v_float32 _vlog_x, _vlog_e, _vlog_y, _vlog_z, _vlog_tmp;
+        v_int32 _vlog_ux, _vlog_emm0;
+
+        _vlog_ux = v_reinterpret_as_s32(x);
+        _vlog_emm0 = v_shr(_vlog_ux, 23);
+
+        _vlog_ux = v_and(_vlog_ux, _vlog_inv_mant_mask_s32);
+        _vlog_ux = v_or(_vlog_ux, v_reinterpret_as_s32(vx_setall_f32(0.5f)));
+        _vlog_x = v_reinterpret_as_f32(_vlog_ux);
+
+        _vlog_emm0 = v_sub(_vlog_emm0, vx_setall_s32(0x7f));
+        _vlog_e = v_cvt_f32(_vlog_emm0);
+
+        _vlog_e = v_add(_vlog_e, _vlog_one_fp32);
+
+        v_float32 _vlog_mask = v_lt(_vlog_x, _vlog_SQRTHF_fp32);
+        _vlog_tmp = v_and(_vlog_x, _vlog_mask);
+        _vlog_x = v_sub(_vlog_x, _vlog_one_fp32);
+        _vlog_e = v_sub(_vlog_e, v_and(_vlog_one_fp32, _vlog_mask));
+        _vlog_x = v_add(_vlog_x, _vlog_tmp);
+
+        _vlog_z = v_mul(_vlog_x, _vlog_x);
+
+        _vlog_y = v_fma(_vlog_p0_fp32, _vlog_x, _vlog_p1_fp32);
+        _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p2_fp32);
+        _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p3_fp32);
+        _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p4_fp32);
+        _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p5_fp32);
+        _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p6_fp32);
+        _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p7_fp32);
+        _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p8_fp32);
+        _vlog_y = v_mul(_vlog_y, _vlog_x);
+        _vlog_y = v_mul(_vlog_y, _vlog_z);
+
+        _vlog_y = v_fma(_vlog_e, _vlog_q1_fp32, _vlog_y);
+
+        _vlog_y = v_sub(_vlog_y, v_mul(_vlog_z, vx_setall_f32(0.5)));
+
+        _vlog_x = v_add(_vlog_x, _vlog_y);
+        _vlog_x = v_fma(_vlog_e, _vlog_q2_fp32, _vlog_x);
+        // log(0) -> -INF
+        v_float32 mask_zero = v_eq(x, vx_setzero_f32());
+        _vlog_x = v_select(mask_zero, v_reinterpret_as_f32(vx_setall_s32(0xff800000)), _vlog_x);
+        // log(NEG), log(NAN) -> NAN
+        v_float32 mask_not_nan = v_ge(x, vx_setzero_f32());
+        _vlog_x = v_select(mask_not_nan, _vlog_x, v_reinterpret_as_f32(vx_setall_s32(0x7fc00000)));
+        // log(INF) -> INF
+        v_float32 mask_inf = v_eq(x, v_reinterpret_as_f32(vx_setall_s32(0x7f800000)));
+        _vlog_x = v_select(mask_inf, x, _vlog_x);
+        return _vlog_x;
+    }
+
+#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
+    inline v_float64 v_log(const v_float64 &x) {
+        const v_float64 _vlog_one_fp64 = vx_setall_f64(1.0);
+        const v_float64 _vlog_SQRTHF_fp64 = vx_setall_f64(0.7071067811865475244);
+        const v_float64 _vlog_p0_fp64 = vx_setall_f64(1.01875663804580931796E-4);
+        const v_float64 _vlog_p1_fp64 = vx_setall_f64(4.97494994976747001425E-1);
+        const v_float64 _vlog_p2_fp64 = vx_setall_f64(4.70579119878881725854);
+        const v_float64 _vlog_p3_fp64 = vx_setall_f64(1.44989225341610930846E1);
+        const v_float64 _vlog_p4_fp64 = vx_setall_f64(1.79368678507819816313E1);
+        const v_float64 _vlog_p5_fp64 = vx_setall_f64(7.70838733755885391666);
+        const v_float64 _vlog_q0_fp64 = vx_setall_f64(1.12873587189167450590E1);
+        const v_float64 _vlog_q1_fp64 = vx_setall_f64(4.52279145837532221105E1);
+        const v_float64 _vlog_q2_fp64 = vx_setall_f64(8.29875266912776603211E1);
+        const v_float64 _vlog_q3_fp64 = vx_setall_f64(7.11544750618563894466E1);
+        const v_float64 _vlog_q4_fp64 = vx_setall_f64(2.31251620126765340583E1);
+
+        const v_float64 _vlog_C0_fp64 = vx_setall_f64(2.121944400546905827679e-4);
+        const v_float64 _vlog_C1_fp64 = vx_setall_f64(0.693359375);
+        const v_int64 _vlog_inv_mant_mask_s64 = vx_setall_s64(~0x7ff0000000000000);
+
+        v_float64 _vlog_x, _vlog_e, _vlog_y, _vlog_z, _vlog_tmp, _vlog_xx;
+        v_int64 _vlog_ux, _vlog_emm0;
+
+        _vlog_ux = v_reinterpret_as_s64(x);
+        _vlog_emm0 = v_shr(_vlog_ux, 52);
+
+        _vlog_ux = v_and(_vlog_ux, _vlog_inv_mant_mask_s64);
+        _vlog_ux = v_or(_vlog_ux, v_reinterpret_as_s64(vx_setall_f64(0.5)));
+        _vlog_x = v_reinterpret_as_f64(_vlog_ux);
+
+        _vlog_emm0 = v_sub(_vlog_emm0, vx_setall_s64(0x3ff));
+        _vlog_e = v_cvt_f64(_vlog_emm0);
+
+        _vlog_e = v_add(_vlog_e, _vlog_one_fp64);
+
+        v_float64 _vlog_mask = v_lt(_vlog_x, _vlog_SQRTHF_fp64);
+        _vlog_tmp = v_and(_vlog_x, _vlog_mask);
+        _vlog_x = v_sub(_vlog_x, _vlog_one_fp64);
+        _vlog_e = v_sub(_vlog_e, v_and(_vlog_one_fp64, _vlog_mask));
+        _vlog_x = v_add(_vlog_x, _vlog_tmp);
+
+        _vlog_xx = v_mul(_vlog_x, _vlog_x);
+
+        _vlog_y = v_fma(_vlog_p0_fp64, _vlog_x, _vlog_p1_fp64);
+        _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p2_fp64);
+        _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p3_fp64);
+        _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p4_fp64);
+        _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p5_fp64);
+        _vlog_y = v_mul(_vlog_y, _vlog_x);
+        _vlog_y = v_mul(_vlog_y, _vlog_xx);
+
+        _vlog_z = v_add(_vlog_x, _vlog_q0_fp64);
+        _vlog_z = v_fma(_vlog_z, _vlog_x, _vlog_q1_fp64);
+        _vlog_z = v_fma(_vlog_z, _vlog_x, _vlog_q2_fp64);
+        _vlog_z = v_fma(_vlog_z, _vlog_x, _vlog_q3_fp64);
+        _vlog_z = v_fma(_vlog_z, _vlog_x, _vlog_q4_fp64);
+
+        _vlog_z = v_div(_vlog_y, _vlog_z);
+        _vlog_z = v_sub(_vlog_z, v_mul(_vlog_e, _vlog_C0_fp64));
+        _vlog_z = v_sub(_vlog_z, v_mul(_vlog_xx, vx_setall_f64(0.5)));
+
+        _vlog_z = v_add(_vlog_z, _vlog_x);
+        _vlog_z = v_fma(_vlog_e, _vlog_C1_fp64, _vlog_z);
+
+        // log(0) -> -INF
+        v_float64 mask_zero = v_eq(x, vx_setzero_f64());
+        _vlog_z = v_select(mask_zero, v_reinterpret_as_f64(vx_setall_s64(0xfff0000000000000)), _vlog_z);
+        // log(NEG), log(NAN) -> NAN
+        v_float64 mask_not_nan = v_ge(x, vx_setzero_f64());
+        _vlog_z = v_select(mask_not_nan, _vlog_z, v_reinterpret_as_f64(vx_setall_s64(0x7ff8000000000000)));
+        // log(INF) -> INF
+        v_float64 mask_inf = v_eq(x, v_reinterpret_as_f64(vx_setall_s64(0x7ff0000000000000)));
+        _vlog_z = v_select(mask_inf, x, _vlog_z);
+        return _vlog_z;
+    }
+#endif
+
+#define OPENCV_HAL_MATH_HAVE_LOG 1
+//! @}
 #endif
 }
 #endif  // OPENCV_HAL_INTRIN_HPP
diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp
index 6e55a6ddd3..4893e64ba8 100644
--- a/modules/core/test/test_intrin_utils.hpp
+++ b/modules/core/test/test_intrin_utils.hpp
@@ -1792,6 +1792,75 @@ template<typename R> struct TheTest
     TheTest &test_exp_fp64() {
 #if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
         __test_exp(709.0, 1e-15, 1e15, DBL_MIN);
+#endif
+        return *this;
+    }
+
+    void __test_log(LaneType expBound, LaneType diff_thr, LaneType flt_min) {
+        int n = VTraits<R>::vlanes();
+        // Test special values
+        std::vector<LaneType> specialValues = {0, 1, (LaneType) M_E, INFINITY, -INFINITY, NAN};
+        const int testRandNum = 10000;
+        const double specialValueProbability = 0.1; // 10% chance to insert a special value
+        cv::RNG_MT19937 rng;
+
+        for (int i = 0; i < testRandNum; i++) {
+            Data<R> dataRand, resRand;
+            for (int j = 0; j < n; ++j) {
+                if (rng.uniform(0.f, 1.f) <= specialValueProbability) {
+                    // Insert a special value
+                    int specialValueIndex = rng.uniform(0, (int) specialValues.size());
+                    dataRand[j] = specialValues[specialValueIndex];
+                } else {
+                    // Generate uniform random data in [-expBound, expBound]
+                    dataRand[j] = (LaneType) std::exp(rng.uniform(-expBound, expBound));
+                }
+            }
+
+            // Compare with std::log
+            R x = dataRand;
+            resRand = v_log(x);
+            for (int j = 0; j < n; ++j) {
+                SCOPED_TRACE(cv::format("Random test value: %f", dataRand[j]));
+                LaneType std_log = std::log(dataRand[j]);
+                if (dataRand[j] == 0) {
+                    // input 0 -> output -INF
+                    EXPECT_TRUE(std::isinf(resRand[j]) && resRand[j] < 0);
+                } else if (dataRand[j] < 0 || std::isnan(dataRand[j])) {
+                    // input less than 0 -> output NAN
+                    // input NaN -> output NaN
+                    EXPECT_TRUE(std::isnan(resRand[j]));
+                } else if (dataRand[j] == 1) {
+                    // input 1 -> output 0
+                    EXPECT_EQ((LaneType) 0, resRand[j]);
+                } else if (std::isinf(dataRand[j]) && dataRand[j] > 0) {
+                    // input INF -> output INF
+                    EXPECT_TRUE(std::isinf(resRand[j]) && resRand[j] > 0);
+                } else {
+                    EXPECT_LT(std::abs(resRand[j] - std_log), diff_thr * (std::abs(std_log) + flt_min * 100));
+                }
+            }
+        }
+    }
+
+    TheTest &test_log_fp16() {
+#if CV_SIMD_FP16
+        float16_t flt16_min;
+        uint16_t flt16_min_hex = 0x0400;
+        std::memcpy(&flt16_min, &flt16_min_hex, sizeof(float16_t));
+        __test_log((float16_t) 9, (float16_t) 1e-3, flt16_min);
+#endif
+        return *this;
+    }
+
+    TheTest &test_log_fp32() {
+        __test_log(25.f, 1e-6f, FLT_MIN);
+        return *this;
+    }
+
+    TheTest &test_log_fp64() {
+#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
+        __test_log(200., 1e-15, DBL_MIN);
 #endif
         return *this;
     }
@@ -2109,6 +2178,7 @@ void test_hal_intrin_float32()
         .test_broadcast_highest()
         .test_pack_triplets()
         .test_exp_fp32()
+        .test_log_fp32()
 #if CV_SIMD_WIDTH == 32
         .test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>()
         .test_rotate<4>().test_rotate<5>().test_rotate<6>().test_rotate<7>()
@@ -2140,6 +2210,7 @@ void test_hal_intrin_float64()
         .test_extract_n<0>().test_extract_n<1>()
         .test_extract_highest()
         .test_exp_fp64()
+        .test_log_fp64()
         //.test_broadcast_element<0>().test_broadcast_element<1>()
 #if CV_SIMD_WIDTH == 32
         .test_extract<2>().test_extract<3>()
@@ -2161,6 +2232,7 @@ void test_hal_intrin_float16()
         .test_loadstore_fp16()
         .test_float_cvt_fp16()
         .test_exp_fp16()
+        .test_log_fp16()
 #endif
         ;
 #else

From 25fb55601bf3e6bff9559acdbdae78426ff17aa2 Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@xperience.ai>
Date: Wed, 3 Jul 2024 12:10:31 +0300
Subject: [PATCH 11/39] Fixed narrowing conversion warning with MSVC compiler.

---
 modules/dnn/test/test_onnx_importer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
index 35ac0be56a..8855eb6439 100644
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -2800,7 +2800,7 @@ TEST_P(Test_ONNX_nets, YOLOv9)
     float iou_threshold = 0.50;
 
     std::vector<int> refClassIds{1, 16, 2}; // wrong class mapping for yolov9
-    std::vector<float> refScores{0.959274f, 0.901125, 0.559396f};
+    std::vector<float> refScores{0.959274f, 0.901125f, 0.559396f};
 
     std::vector<Rect2d> refBoxes{
         Rect2d(106.255, 107.927, 472.497, 350.309),

From 99c0a96a2ff5ca671268fdafe2db8773fa224af9 Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@xperience.ai>
Date: Wed, 3 Jul 2024 13:41:07 +0300
Subject: [PATCH 12/39] Enable KleidiCV for Android aarch64 by default.

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b36482d9ce..30b205ecd8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -261,7 +261,7 @@ OCV_OPTION(WITH_CAP_IOS "Enable iOS video capture" ON
   VERIFY HAVE_CAP_IOS)
 OCV_OPTION(WITH_CAROTENE "Use NVidia carotene acceleration library for ARM platform" (NOT CV_DISABLE_OPTIMIZATION)
   VISIBLE_IF (ARM OR AARCH64) AND NOT IOS AND NOT XROS)
-OCV_OPTION(WITH_KLEIDICV "Use KleidiCV library for ARM platforms" OFF
+OCV_OPTION(WITH_KLEIDICV "Use KleidiCV library for ARM platforms" (ANDROID AND AARCH64 AND NOT CV_DISABLE_OPTIMIZATION)
   VISIBLE_IF (AARCH64 AND (ANDROID OR UNIX AND NOT IOS AND NOT XROS)))
 OCV_OPTION(WITH_NDSRVP "Use Andes RVP extension" (NOT CV_DISABLE_OPTIMIZATION)
   VISIBLE_IF RISCV)

From e28c6eb3b64a21e0239d556de9dce1620cb9a3bd Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@xperience.ai>
Date: Wed, 3 Jul 2024 19:22:06 +0300
Subject: [PATCH 13/39] Fixed gtkglext search in cmake.

---
 modules/highgui/cmake/detect_gtk.cmake | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/modules/highgui/cmake/detect_gtk.cmake b/modules/highgui/cmake/detect_gtk.cmake
index 5e9412d427..b7f53d1a9b 100644
--- a/modules/highgui/cmake/detect_gtk.cmake
+++ b/modules/highgui/cmake/detect_gtk.cmake
@@ -29,7 +29,15 @@ if(WITH_GTK)
   if((WITH_OPENGL OR HAVE_OPENGL) AND HAVE_GTK2)
     ocv_check_modules(GTKGLEXT gtkglext-1.0)
     if(HAVE_GTKGLEXT)
-      ocv_add_external_target(gtkglext "${GTKGLEXT_INCLUDE_DIRS}" "${GTKGLEXT_LIBRARIES}" "HAVE_GTKGLEXT")
+      # HACK for https://github.com/opencv/opencv/issues/20850
+      # pkg-config reports some include directories that do not exist. Just filter them out.
+      set(GTKGLEXT_INCLUDE_DIRS_EXISTS "")
+      foreach(p ${GTKGLEXT_INCLUDE_DIRS})
+        if (EXISTS "${p}")
+          list(APPEND GTKGLEXT_INCLUDE_DIRS_EXISTS "${p}")
+        endif()
+      endforeach()
+      ocv_add_external_target(gtkglext "${GTKGLEXT_INCLUDE_DIRS_EXISTS}" "${GTKGLEXT_LIBRARIES}" "HAVE_GTKGLEXT")
     endif()
   endif()
 elseif(HAVE_GTK)

From 5510718381f8a35f2beb6c488b2ae1d8501cf0fc Mon Sep 17 00:00:00 2001
From: Yuantao Feng <yuantao.feng@opencv.org.cn>
Date: Thu, 4 Jul 2024 13:33:20 +0800
Subject: [PATCH 14/39] Merge pull request #25810 from
 fengyuentau:python/fix_parsing_3d_mat_in_dnn

python: attempts to fix 3d mat parsing problem for dnn #25810

Fixes https://github.com/opencv/opencv/issues/25762 https://github.com/opencv/opencv/issues/23242
Relates https://github.com/opencv/opencv/issues/25763 https://github.com/opencv/opencv/issues/19091

Although `cv.Mat` has already been introduced to workaround this problem, people do not know it and it kind of leads to confusion with `numpy.array`. This patch adds a "switch" to turn off the auto multichannel feature when the API is from cv::dnn::Net (more specifically, `setInput`) and the parameter is of type `Mat`. This patch only leads to changes of three places in `pyopencv_generated_types_content.h`:

```.diff
static PyObject* pyopencv_cv_dnn_dnn_Net_setInput(PyObject* self, PyObject* py_args, PyObject* kw)
{
...
- pyopencv_to_safe(pyobj_blob, blob, ArgInfo("blob", 0)) &&
+ pyopencv_to_safe(pyobj_blob, blob, ArgInfo("blob", 8)) &&
...
}

// I guess we also need to change this as one-channel blob is expected for param
static PyObject* pyopencv_cv_dnn_dnn_Net_setParam(PyObject* self, PyObject* py_args, PyObject* kw)
{
...
- pyopencv_to_safe(pyobj_blob, blob, ArgInfo("blob", 0)) )
+ pyopencv_to_safe(pyobj_blob, blob, ArgInfo("blob", 8)) )
...
- pyopencv_to_safe(pyobj_blob, blob, ArgInfo("blob", 0)) )
+ pyopencv_to_safe(pyobj_blob, blob, ArgInfo("blob", 8)) )
...
}
```

Others are unchanged, e.g. `dnn_SegmentationModel` and stuff like that.

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 modules/core/include/opencv2/core/cvdef.h |  1 +
 modules/dnn/include/opencv2/dnn/dnn.hpp   | 10 ++--
 modules/dnn/misc/python/test/test_dnn.py  | 61 ++++++++++++++++++++---
 modules/python/src2/cv2.hpp               |  5 +-
 modules/python/src2/cv2_convert.cpp       |  2 +-
 modules/python/src2/cv2_convert.hpp       | 34 +++++++++----
 modules/python/src2/gen2.py               |  5 ++
 modules/python/src2/hdr_parser.py         |  4 ++
 8 files changed, 100 insertions(+), 22 deletions(-)

diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h
index 748ecb9ece..ff1a3d7a5f 100644
--- a/modules/core/include/opencv2/core/cvdef.h
+++ b/modules/core/include/opencv2/core/cvdef.h
@@ -478,6 +478,7 @@ Cv64suf;
 #define CV_OUT
 #define CV_PROP
 #define CV_PROP_RW
+#define CV_ND // Indicates that input data should be parsed into Mat without channels
 #define CV_WRAP
 #define CV_WRAP_AS(synonym)
 #define CV_WRAP_MAPPABLE(mappable)
diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp
index b516f80bde..0077ae4853 100644
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -642,13 +642,13 @@ CV__DNN_INLINE_NS_BEGIN
          *  @param outputName name for layer which output is needed to get
          *  @details If @p outputName is empty, runs forward pass for the whole network.
          */
-        CV_WRAP void forward(OutputArrayOfArrays outputBlobs, const String& outputName = String());
+        CV_WRAP void forward(CV_ND OutputArrayOfArrays outputBlobs, const String& outputName = String());
 
         /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames.
          *  @param outputBlobs contains blobs for first outputs of specified layers.
          *  @param outBlobNames names for layers which outputs are needed to get
          */
-        CV_WRAP void forward(OutputArrayOfArrays outputBlobs,
+        CV_WRAP void forward(CV_ND OutputArrayOfArrays outputBlobs,
                              const std::vector<String>& outBlobNames);
 
         /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames.
@@ -727,7 +727,7 @@ CV__DNN_INLINE_NS_BEGIN
          *  as:
          * \f[input(n,c,h,w) = scalefactor \times (blob(n,c,h,w) - mean_c)\f]
          */
-        CV_WRAP void setInput(InputArray blob, const String& name = "",
+        CV_WRAP void setInput(CV_ND InputArray blob, const String& name = "",
                               double scalefactor = 1.0, const Scalar& mean = Scalar());
 
         /** @brief Sets the new value for the learned param of the layer.
@@ -738,8 +738,8 @@ CV__DNN_INLINE_NS_BEGIN
          *  @note If shape of the new blob differs from the previous shape,
          *  then the following forward pass may fail.
         */
-        CV_WRAP void setParam(int layer, int numParam, const Mat &blob);
-        CV_WRAP inline void setParam(const String& layerName, int numParam, const Mat &blob) { return setParam(getLayerId(layerName), numParam, blob); }
+        CV_WRAP void setParam(int layer, int numParam, CV_ND const Mat &blob);
+        CV_WRAP inline void setParam(const String& layerName, int numParam, CV_ND const Mat &blob) { return setParam(getLayerId(layerName), numParam, blob); }
 
         /** @brief Returns parameter blob of the layer.
          *  @param layer name or id of the layer.
diff --git a/modules/dnn/misc/python/test/test_dnn.py b/modules/dnn/misc/python/test/test_dnn.py
index e3cc376dd2..8d7eed52af 100644
--- a/modules/dnn/misc/python/test/test_dnn.py
+++ b/modules/dnn/misc/python/test/test_dnn.py
@@ -455,10 +455,6 @@ class dnn_test(NewOpenCVTests):
                                     "Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.")
 
         input = np.load(input_file)
-        # we have to expand the shape of input tensor because Python bindings cut 3D tensors to 2D
-        # it should be fixed in future. see : https://github.com/opencv/opencv/issues/19091
-        # please remove `expand_dims` after that
-        input = np.expand_dims(input, axis=3)
         gold_output = np.load(output_file)
 
         for backend, target in self.dnnBackendsAndTargets:
@@ -469,10 +465,63 @@ class dnn_test(NewOpenCVTests):
             net.setPreferableBackend(backend)
             net.setPreferableTarget(target)
 
+            # Check whether 3d shape is parsed correctly for setInput
             net.setInput(input)
-            real_output = net.forward()
 
-            normAssert(self, real_output, gold_output, "", getDefaultThreshold(target))
+            # Case 0: test API `forward(const String& outputName = String()`
+            real_output = net.forward() # Retval is a np.array of shape [2, 5, 3]
+            normAssert(self, real_output, gold_output, "Case 1", getDefaultThreshold(target))
+
+            '''
+            Pre-allocate output memory with correct shape.
+            Normally Python users do not use in this way,
+            but we have to test it since we design API in this way
+            '''
+            # Case 1: a np.array with a string of output name.
+            #         It tests API `forward(OutputArrayOfArrays outputBlobs, const String& outputName = String()`
+            #         when outputBlobs is a np.array and we expect it to be the only output.
+            real_output = np.empty([2, 5, 3], dtype=np.float32)
+            real_output = net.forward(real_output, "237") # Retval is a tuple with a np.array of shape [2, 5, 3]
+            normAssert(self, real_output, gold_output, "Case 1", getDefaultThreshold(target))
+
+            # Case 2: a tuple of np.array with a string of output name.
+            #         It tests API `forward(OutputArrayOfArrays outputBlobs, const String& outputName = String()`
+            #         when outputBlobs is a container of several np.array and we expect to save all outputs accordingly.
+            real_output = tuple(np.empty([2, 5, 3], dtype=np.float32))
+            real_output = net.forward(real_output, "237") # Retval is a tuple with a np.array of shape [2, 5, 3]
+            normAssert(self, real_output, gold_output, "Case 2", getDefaultThreshold(target))
+
+            # Case 3: a tuple of np.array with a string of output name.
+            #         It tests API `forward(OutputArrayOfArrays outputBlobs, const std::vector<String>& outBlobNames)`
+            real_output = tuple(np.empty([2, 5, 3], dtype=np.float32))
+            # Note that it does not support parsing a list , e.g. ["237"]
+            real_output = net.forward(real_output, ("237")) # Retval is a tuple with a np.array of shape [2, 5, 3]
+            normAssert(self, real_output, gold_output, "Case 3", getDefaultThreshold(target))
+
+    def test_set_param_3d(self):
+        model_path = self.find_dnn_file('dnn/onnx/models/matmul_3d_init.onnx')
+        input_file = self.find_dnn_file('dnn/onnx/data/input_matmul_3d_init.npy')
+        output_file = self.find_dnn_file('dnn/onnx/data/output_matmul_3d_init.npy')
+
+        input = np.load(input_file)
+        output = np.load(output_file)
+
+        for backend, target in self.dnnBackendsAndTargets:
+            printParams(backend, target)
+
+            net = cv.dnn.readNet(model_path)
+
+            node_name = net.getLayerNames()[0]
+            w = net.getParam(node_name, 0) # returns the original tensor of three-dimensional shape
+            net.setParam(node_name, 0, w)  # set param once again to see whether tensor is converted with correct shape
+
+            net.setPreferableBackend(backend)
+            net.setPreferableTarget(target)
+
+            net.setInput(input)
+            res_output = net.forward()
+
+            normAssert(self, output, res_output, "", getDefaultThreshold(target))
 
     def test_scalefactor_assign(self):
         params = cv.dnn.Image2BlobParams()
diff --git a/modules/python/src2/cv2.hpp b/modules/python/src2/cv2.hpp
index 06080f1aa1..8cd0c2e4b1 100644
--- a/modules/python/src2/cv2.hpp
+++ b/modules/python/src2/cv2.hpp
@@ -46,19 +46,22 @@ private:
     static const uint32_t arg_outputarg_flag     = 0x1;
     static const uint32_t arg_arithm_op_src_flag = 0x2;
     static const uint32_t arg_pathlike_flag      = 0x4;
+    static const uint32_t arg_nd_mat_flag        = 0x8;
 
 public:
     const char* name;
     bool outputarg;
     bool arithm_op_src;
     bool pathlike;
+    bool nd_mat;
     // more fields may be added if necessary
 
     ArgInfo(const char* name_, uint32_t arg_) :
         name(name_),
         outputarg((arg_ & arg_outputarg_flag) != 0),
         arithm_op_src((arg_ & arg_arithm_op_src_flag) != 0),
-        pathlike((arg_ & arg_pathlike_flag) != 0) {}
+        pathlike((arg_ & arg_pathlike_flag) != 0),
+        nd_mat((arg_ & arg_nd_mat_flag) != 0) {}
 
 private:
     ArgInfo(const ArgInfo&) = delete;
diff --git a/modules/python/src2/cv2_convert.cpp b/modules/python/src2/cv2_convert.cpp
index 35766b47c9..0626e42e53 100644
--- a/modules/python/src2/cv2_convert.cpp
+++ b/modules/python/src2/cv2_convert.cpp
@@ -173,7 +173,7 @@ bool pyopencv_to(PyObject* o, Mat& m, const ArgInfo& info)
 
     CV_LOG_DEBUG(NULL, "Incoming ndarray '" << info.name << "': ndims=" << ndims << "  _sizes=" << pycv_dumpArray(_sizes, ndims) << "  _strides=" << pycv_dumpArray(_strides, ndims));
 
-    bool ismultichannel = ndims == 3 && _sizes[2] <= CV_CN_MAX;
+    bool ismultichannel = ndims == 3 && _sizes[2] <= CV_CN_MAX && !info.nd_mat;
     if (pyopencv_Mat_TypePtr && PyObject_TypeCheck(o, pyopencv_Mat_TypePtr))
     {
         bool wrapChannels = false;
diff --git a/modules/python/src2/cv2_convert.hpp b/modules/python/src2/cv2_convert.hpp
index 0c0fbd7b96..979425c3f9 100644
--- a/modules/python/src2/cv2_convert.hpp
+++ b/modules/python/src2/cv2_convert.hpp
@@ -340,21 +340,37 @@ static bool pyopencv_to_generic_vec(PyObject* obj, std::vector<Tp>& value, const
     {
         return true;
     }
-    if (!PySequence_Check(obj))
+    if (info.nd_mat && PyArray_Check(obj))
     {
-        failmsg("Can't parse '%s'. Input argument doesn't provide sequence protocol", info.name);
-        return false;
+        /*
+            If obj is marked as nd mat and of array type, it is parsed to a single
+            mat in the target vector to avoid being split into multiple mats
+        */
+        value.resize(1);
+        if (!pyopencv_to(obj, value.front(), info))
+        {
+            failmsg("Can't parse '%s'. Array item has a wrong type", info.name);
+            return false;
+        }
     }
-    const size_t n = static_cast<size_t>(PySequence_Size(obj));
-    value.resize(n);
-    for (size_t i = 0; i < n; i++)
+    else // parse as sequence
     {
-        SafeSeqItem item_wrap(obj, i);
-        if (!pyopencv_to(item_wrap.item, value[i], info))
+        if (!PySequence_Check(obj))
         {
-            failmsg("Can't parse '%s'. Sequence item with index %lu has a wrong type", info.name, i);
+            failmsg("Can't parse '%s'. Input argument doesn't provide sequence protocol", info.name);
             return false;
         }
+        const size_t n = static_cast<size_t>(PySequence_Size(obj));
+        value.resize(n);
+        for (size_t i = 0; i < n; i++)
+        {
+            SafeSeqItem item_wrap(obj, i);
+            if (!pyopencv_to(item_wrap.item, value[i], info))
+            {
+                failmsg("Can't parse '%s'. Sequence item with index %lu has a wrong type", info.name, i);
+                return false;
+            }
+        }
     }
     return true;
 }
diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py
index 29a91958ee..3249c57f82 100755
--- a/modules/python/src2/gen2.py
+++ b/modules/python/src2/gen2.py
@@ -488,6 +488,10 @@ class ArgInfo(object):
             return self.name + '_'
         return self.name
 
+    @property
+    def nd_mat(self):
+        return '/ND' in self._modifiers
+
     @property
     def inputarg(self):
         return '/O' not in self._modifiers
@@ -528,6 +532,7 @@ class ArgInfo(object):
         arg  = 0x01 if self.outputarg else 0x0
         arg += 0x02 if self.arithm_op_src_arg else 0x0
         arg += 0x04 if self.pathlike else 0x0
+        arg += 0x08 if self.nd_mat else 0x0
         return "ArgInfo(\"%s\", %d)" % (self.name, arg)
 
 
diff --git a/modules/python/src2/hdr_parser.py b/modules/python/src2/hdr_parser.py
index fa2d0077d9..27f0fe0963 100755
--- a/modules/python/src2/hdr_parser.py
+++ b/modules/python/src2/hdr_parser.py
@@ -82,6 +82,10 @@ class CppHeaderParser(object):
         modlist = []
 
         # pass 0: extracts the modifiers
+        if "CV_ND" in arg_str:
+            modlist.append("/ND")
+            arg_str = arg_str.replace("CV_ND", "")
+
         if "CV_OUT" in arg_str:
             modlist.append("/O")
             arg_str = arg_str.replace("CV_OUT", "")

From 20e72b0b30b671d577886134fb8c987548f479d9 Mon Sep 17 00:00:00 2001
From: alexlyulkov <alex.lyulkov@gmail.com>
Date: Thu, 4 Jul 2024 13:26:37 +0300
Subject: [PATCH 15/39] Merge pull request #25856 from
 alexlyulkov:al/android-optional-kotlin

Fixed kotlin requirement in Android build.gradle #25856

Now OpenCV Android SDK doesn't always require kotlin plugin. Kotlin code is compiled only if the application uses kotlin plugin.

Fixes #24663

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 modules/java/android_sdk/build.gradle.in | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/modules/java/android_sdk/build.gradle.in b/modules/java/android_sdk/build.gradle.in
index d3e37d0362..85e5c049fb 100644
--- a/modules/java/android_sdk/build.gradle.in
+++ b/modules/java/android_sdk/build.gradle.in
@@ -90,7 +90,12 @@
 
 apply plugin: 'com.android.library'
 apply plugin: 'maven-publish'
-@KOTLIN_PLUGIN_DECLARATION@
+try {
+    @KOTLIN_PLUGIN_DECLARATION@
+    println "Configure OpenCV with Kotlin"
+} catch (Exception e) {
+    println "Configure OpenCV without Kotlin"
+}
 
 def openCVersionName = "@OPENCV_VERSION@"
 def openCVersionCode = ((@OPENCV_VERSION_MAJOR@ * 100 + @OPENCV_VERSION_MINOR@) * 100 + @OPENCV_VERSION_PATCH@) * 10 + 0

From efbc9f0b66fc0689a0b1fa9f6646dde8a8cc5d1a Mon Sep 17 00:00:00 2001
From: Abduragim Shtanchaev <44877829+Abdurrahheem@users.noreply.github.com>
Date: Thu, 4 Jul 2024 16:25:31 +0300
Subject: [PATCH 16/39] Merge pull request #25861 from
 Abdurrahheem:ash/torch-attention-export-fix-4x

Merge pull request #25861 from Abdurrahheem:ash/torch-attention-export-fix-4x

Support for Unflatten operation requred by Attention layer - 4.x #25861

### Pull Request Readiness Checklist

All test data and models for PR are located [#1190](https://github.com/opencv/opencv_extra/pull/1190)

This PR fixes issue reised when importing batched  vanilla `Attention` layer from `PyTorch` via ONNX. Currently batched version of `Attention` layer in PyTorch [has unflatten operation inside](https://github.com/pytorch/pytorch/blob/e3b3431c4203e9eeead48f96d4afd462f0b81de5/torch/nn/functional.py#L5500C17-L5500C31). `unflatten` operation causes issue in `reshape` layer (see the Reshape_2 in the graph below) due to incorrect output of `slice` layer. This PR particularly fixes `slice` and `concat` layers to handle `unflatten` operation.


<img width="673" alt="image" src="https://github.com/opencv/opencv/assets/44877829/5b612b31-657a-47f1-83a4-0ac35a950abd">


See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 modules/dnn/src/layers/concat_layer.cpp |  2 ++
 modules/dnn/src/layers/slice_layer.cpp  | 14 +++++++++-----
 modules/dnn/test/test_onnx_importer.cpp |  7 +++++++
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp
index 3a6466bd80..6cb083e453 100644
--- a/modules/dnn/src/layers/concat_layer.cpp
+++ b/modules/dnn/src/layers/concat_layer.cpp
@@ -302,6 +302,8 @@ public:
             ranges[cAxis].start = 0;
             for (size_t i = 0; i < inputs.size(); i++)
             {
+                if (inputs[i].empty())
+                    continue;
                 ranges[cAxis].end = ranges[cAxis].start + inputs[i].size[cAxis];
                 for (int j = 0; j < outMat.dims; ++j)
                 {
diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp
index de302ec291..195ed7cb24 100644
--- a/modules/dnn/src/layers/slice_layer.cpp
+++ b/modules/dnn/src/layers/slice_layer.cpp
@@ -69,10 +69,12 @@ Range normalizeRange(const Range& input_range, int n)
 {
     Range range = input_range;
 
-    range.start = std::min(std::max(range.start, -n), n - 1);
-    if (range.start < 0)
-    {
-        range.start += n;
+    if (range.start != n){
+        range.start = std::min(std::max(range.start, -n), n - 1);
+        if (range.start < 0)
+        {
+            range.start += n;
+        }
     }
 
     range.end = std::min(std::max(range.end, -n), n);
@@ -610,7 +612,9 @@ public:
         {
             for (size_t i = 0; i < outputs.size(); i++)
             {
-                inpMat(finalSliceRanges[i]).copyTo(outputs[i]);
+                if (finalSliceRanges[i][0].start != finalSliceRanges[i][0].end){
+                    inpMat(finalSliceRanges[i]).copyTo(outputs[i]);
+                }
             }
         }
         else
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
index 8855eb6439..e560ff2dbe 100644
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -3110,6 +3110,13 @@ TEST_P(Test_ONNX_layers, Attention) {
 TEST_P(Test_ONNX_layers, AttentionSingleHead) {
     testONNXModels("attention_single_head");
 }
+TEST_P(Test_ONNX_layers, PyTorchAttentionSingleHead){
+    testONNXModels("pytorch_attention_single_head");
+}
+
+TEST_P(Test_ONNX_layers, PyTorchUnflatten){
+    testONNXModels("unflatten");
+}
 
 TEST_P(Test_ONNX_nets, ViT_B_32) {
     applyTestTag(CV_TEST_TAG_LONG, CV_TEST_TAG_DEBUG_LONG);

From 94b7a2d32025c215d762810fc66b574b5f3545e3 Mon Sep 17 00:00:00 2001
From: Maksim Shabunin <maksim.shabunin@gmail.com>
Date: Thu, 4 Jul 2024 16:29:08 +0300
Subject: [PATCH 17/39] Merge pull request #25842 from
 mshabunin:cpp-imgproc-test-4.x

imgproc: remove C-API usage from tests #25842

Final cleanup will be done in 5.x after regular merge.

Some tests have been reworked, some required only slight modifications.
---
 modules/imgproc/test/test_canny.cpp      |  221 +-
 modules/imgproc/test/test_color.cpp      |   30 +-
 modules/imgproc/test/test_convhull.cpp   | 2591 +++++-----------------
 modules/imgproc/test/test_filter.cpp     |   18 +-
 modules/imgproc/test/test_imgwarp.cpp    |   17 +-
 modules/imgproc/test/test_pc.cpp         |    4 +-
 modules/imgproc/test/test_templmatch.cpp |  425 ++--
 7 files changed, 851 insertions(+), 2455 deletions(-)

diff --git a/modules/imgproc/test/test_canny.cpp b/modules/imgproc/test/test_canny.cpp
index 7e4fffbf30..9758700ab5 100644
--- a/modules/imgproc/test/test_canny.cpp
+++ b/modules/imgproc/test/test_canny.cpp
@@ -43,130 +43,7 @@
 
 namespace opencv_test { namespace {
 
-class CV_CannyTest : public cvtest::ArrayTest
-{
-public:
-    CV_CannyTest(bool custom_deriv = false);
-
-protected:
-    void get_test_array_types_and_sizes( int test_case_idx, vector<vector<Size> >& sizes, vector<vector<int> >& types );
-    double get_success_error_level( int test_case_idx, int i, int j );
-    int prepare_test_case( int test_case_idx );
-    void run_func();
-    void prepare_to_validation( int );
-    int validate_test_results( int /*test_case_idx*/ );
-
-    int aperture_size;
-    bool use_true_gradient;
-    double threshold1, threshold2;
-    bool test_cpp;
-    bool test_custom_deriv;
-
-    Mat img;
-};
-
-
-CV_CannyTest::CV_CannyTest(bool custom_deriv)
-{
-    test_array[INPUT].push_back(NULL);
-    test_array[OUTPUT].push_back(NULL);
-    test_array[REF_OUTPUT].push_back(NULL);
-    element_wise_relative_error = true;
-    aperture_size = 0;
-    use_true_gradient = false;
-    threshold1 = threshold2 = 0;
-    test_custom_deriv = custom_deriv;
-
-    const char imgPath[] = "shared/fruits.png";
-    img = cv::imread(cvtest::TS::ptr()->get_data_path() + imgPath, IMREAD_GRAYSCALE);
-}
-
-
-void CV_CannyTest::get_test_array_types_and_sizes( int test_case_idx,
-                                                  vector<vector<Size> >& sizes,
-                                                  vector<vector<int> >& types )
-{
-    RNG& rng = ts->get_rng();
-    double thresh_range;
-
-    cvtest::ArrayTest::get_test_array_types_and_sizes( test_case_idx, sizes, types );
-    types[INPUT][0] = types[OUTPUT][0] = types[REF_OUTPUT][0] = CV_8U;
-
-    aperture_size = cvtest::randInt(rng) % 2 ? 5 : 3;
-    thresh_range = aperture_size == 3 ? 300 : 1000;
-
-    threshold1 = cvtest::randReal(rng)*thresh_range;
-    threshold2 = cvtest::randReal(rng)*thresh_range*0.3;
-
-    if( cvtest::randInt(rng) % 2 )
-        CV_SWAP( threshold1, threshold2, thresh_range );
-
-    use_true_gradient = cvtest::randInt(rng) % 2 != 0;
-    test_cpp = (cvtest::randInt(rng) & 256) == 0;
-
-    ts->printf(cvtest::TS::LOG, "Canny(size = %d x %d, aperture_size = %d, threshold1 = %g, threshold2 = %g, L2 = %s) test_cpp = %s (test case #%d)\n",
-        sizes[0][0].width, sizes[0][0].height, aperture_size, threshold1, threshold2, use_true_gradient ? "TRUE" : "FALSE", test_cpp ? "TRUE" : "FALSE", test_case_idx);
-}
-
-
-int CV_CannyTest::prepare_test_case( int test_case_idx )
-{
-    int code = cvtest::ArrayTest::prepare_test_case( test_case_idx );
-    if( code > 0 )
-    {
-        RNG& rng = ts->get_rng();
-        Mat& src = test_mat[INPUT][0];
-        //GaussianBlur(src, src, Size(11, 11), 5, 5);
-        if(src.cols > img.cols || src.rows > img.rows)
-            resize(img, src, src.size(), 0, 0, INTER_LINEAR_EXACT);
-        else
-            img(
-                Rect(
-                    cvtest::randInt(rng) % (img.cols-src.cols),
-                    cvtest::randInt(rng) % (img.rows-src.rows),
-                    src.cols,
-                    src.rows
-                )
-            ).copyTo(src);
-        GaussianBlur(src, src, Size(5, 5), 0);
-    }
-
-    return code;
-}
-
-
-double CV_CannyTest::get_success_error_level( int /*test_case_idx*/, int /*i*/, int /*j*/ )
-{
-    return 0;
-}
-
-
-void CV_CannyTest::run_func()
-{
-    if (test_custom_deriv)
-    {
-        cv::Mat _out = cv::cvarrToMat(test_array[OUTPUT][0]);
-        cv::Mat src = cv::cvarrToMat(test_array[INPUT][0]);
-        cv::Mat dx, dy;
-        int m = aperture_size;
-        Point anchor(m/2, m/2);
-        Mat dxkernel = cvtest::calcSobelKernel2D( 1, 0, m, 0 );
-        Mat dykernel = cvtest::calcSobelKernel2D( 0, 1, m, 0 );
-        cvtest::filter2D(src, dx, CV_16S, dxkernel, anchor, 0, BORDER_REPLICATE);
-        cvtest::filter2D(src, dy, CV_16S, dykernel, anchor, 0, BORDER_REPLICATE);
-        cv::Canny(dx, dy, _out, threshold1, threshold2, use_true_gradient);
-    }
-    else
-    {
-        cv::Mat _out = cv::cvarrToMat(test_array[OUTPUT][0]);
-        cv::Canny(cv::cvarrToMat(test_array[INPUT][0]), _out, threshold1, threshold2,
-                aperture_size, use_true_gradient);
-    }
-}
-
-
-static void
-cannyFollow( int x, int y, float lowThreshold, const Mat& mag, Mat& dst )
+static void Canny_reference_follow( int x, int y, float lowThreshold, const Mat& mag, Mat& dst )
 {
     static const int ofs[][2] = {{1,0},{1,-1},{0,-1},{-1,-1},{-1,0},{-1,1},{0,1},{1,1}};
     int i;
@@ -181,16 +58,15 @@ cannyFollow( int x, int y, float lowThreshold, const Mat& mag, Mat& dst )
             (unsigned)y1 < (unsigned)mag.rows &&
             mag.at<float>(y1, x1) > lowThreshold &&
             !dst.at<uchar>(y1, x1) )
-            cannyFollow( x1, y1, lowThreshold, mag, dst );
+            Canny_reference_follow( x1, y1, lowThreshold, mag, dst );
     }
 }
 
-
-static void
-test_Canny( const Mat& src, Mat& dst,
+static void Canny_reference( const Mat& src, Mat& dst,
             double threshold1, double threshold2,
             int aperture_size, bool use_true_gradient )
 {
+    dst.create(src.size(), src.type());
     int m = aperture_size;
     Point anchor(m/2, m/2);
     const double tan_pi_8 = tan(CV_PI/8.);
@@ -273,47 +149,80 @@ test_Canny( const Mat& src, Mat& dst,
     {
         for( x = 0; x < width; x++ )
             if( mag.at<float>(y, x) > highThreshold && !dst.at<uchar>(y, x) )
-                cannyFollow( x, y, lowThreshold, mag, dst );
+                Canny_reference_follow( x, y, lowThreshold, mag, dst );
     }
 }
 
+//==============================================================================
 
-void CV_CannyTest::prepare_to_validation( int )
+// aperture, true gradient
+typedef testing::TestWithParam<testing::tuple<int, bool>> Canny_Modes;
+
+TEST_P(Canny_Modes, accuracy)
 {
-    Mat src = test_mat[INPUT][0], dst = test_mat[REF_OUTPUT][0];
-    test_Canny( src, dst, threshold1, threshold2, aperture_size, use_true_gradient );
-}
+    const int aperture = get<0>(GetParam());
+    const bool trueGradient = get<1>(GetParam());
+    const double range = aperture == 3 ? 300. : 1000.;
+    RNG & rng = TS::ptr()->get_rng();
 
+    for (int ITER = 0; ITER < 20; ++ITER)
+    {
+        SCOPED_TRACE(cv::format("iteration %d", ITER));
 
-int CV_CannyTest::validate_test_results( int test_case_idx )
-{
-    int code = cvtest::TS::OK, nz0;
-    prepare_to_validation(test_case_idx);
+        const std::string fname = cvtest::findDataFile("shared/fruits.png");
+        const Mat original = cv::imread(fname, IMREAD_GRAYSCALE);
 
-    double err = cvtest::norm(test_mat[OUTPUT][0], test_mat[REF_OUTPUT][0], CV_L1);
-    if( err == 0 )
-        return code;
+        const double thresh1 = rng.uniform(0., range);
+        const double thresh2 = rng.uniform(0., range * 0.3);
+        const Size sz(rng.uniform(127, 800), rng.uniform(127, 600));
+        const Size osz = original.size();
 
-    if( err != cvRound(err) || cvRound(err)%255 != 0 )
-    {
-        ts->printf( cvtest::TS::LOG, "Some of the pixels, produced by Canny, are not 0's or 255's; the difference is %g\n", err );
-        ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
-        return code;
-    }
+        // preparation
+        Mat img;
+        if (sz.width >= osz.width || sz.height >= osz.height)
+        {
+            // larger image -> scale
+            resize(original, img, sz, 0, 0, INTER_LINEAR_EXACT);
+        }
+        else
+        {
+            // smaller image -> crop
+            Point origin(rng.uniform(0, osz.width - sz.width), rng.uniform(0, osz.height - sz.height));
+            Rect roi(origin, sz);
+            original(roi).copyTo(img);
+        }
+        GaussianBlur(img, img, Size(5, 5), 0);
 
-    nz0 = cvRound(cvtest::norm(test_mat[REF_OUTPUT][0], CV_L1)/255);
-    err = (err/255/MAX(nz0,100))*100;
-    if( err > 1 )
-    {
-        ts->printf( cvtest::TS::LOG, "Too high percentage of non-matching edge pixels = %g%%\n", err);
-        ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
-    }
+        // regular function
+        Mat result;
+        {
+            cv::Canny(img, result, thresh1, thresh2, aperture, trueGradient);
+        }
 
-    return code;
+        // custom derivatives
+        Mat customResult;
+        {
+            Mat dxkernel = cvtest::calcSobelKernel2D(1, 0, aperture, 0);
+            Mat dykernel = cvtest::calcSobelKernel2D(0, 1, aperture, 0);
+            Point anchor(aperture / 2, aperture / 2);
+            cv::Mat dx, dy;
+            cvtest::filter2D(img, dx, CV_16S, dxkernel, anchor, 0, BORDER_REPLICATE);
+            cvtest::filter2D(img, dy, CV_16S, dykernel, anchor, 0, BORDER_REPLICATE);
+            cv::Canny(dx, dy, customResult, thresh1, thresh2, trueGradient);
+        }
+
+        Mat reference;
+        Canny_reference(img, reference, thresh1, thresh2, aperture, trueGradient);
+
+        EXPECT_MAT_NEAR(result, reference, 0);
+        EXPECT_MAT_NEAR(customResult, reference, 0);
+    }
 }
 
-TEST(Imgproc_Canny, accuracy) { CV_CannyTest test; test.safe_run(); }
-TEST(Imgproc_Canny, accuracy_deriv) { CV_CannyTest test(true); test.safe_run(); }
+INSTANTIATE_TEST_CASE_P(/**/, Canny_Modes,
+    testing::Combine(
+        testing::Values(3, 5),
+        testing::Values(true, false)));
 
 
 /*
diff --git a/modules/imgproc/test/test_color.cpp b/modules/imgproc/test/test_color.cpp
index 5a5898415c..60862b2805 100644
--- a/modules/imgproc/test/test_color.cpp
+++ b/modules/imgproc/test/test_color.cpp
@@ -76,7 +76,6 @@ protected:
     bool inplace;
     bool custom_inv_transform;
     int fwd_code, inv_code;
-    bool test_cpp;
     int hue_range;
     bool srgb;
 };
@@ -97,7 +96,6 @@ CV_ColorCvtBaseTest::CV_ColorCvtBaseTest( bool _custom_inv_transform, bool _allo
 
     fwd_code_str = inv_code_str = 0;
 
-    test_cpp = false;
     hue_range = 0;
     blue_idx = 0;
     srgb = false;
@@ -147,7 +145,6 @@ void CV_ColorCvtBaseTest::get_test_array_types_and_sizes( int test_case_idx,
         types[OUTPUT][1] = types[REF_OUTPUT][1] = CV_MAKETYPE(depth, cn);
 
     inplace = cn == 3 && cvtest::randInt(rng) % 2 != 0;
-    test_cpp = (cvtest::randInt(rng) & 256) == 0;
 }
 
 
@@ -161,23 +158,17 @@ int CV_ColorCvtBaseTest::prepare_test_case( int test_case_idx )
 
 void CV_ColorCvtBaseTest::run_func()
 {
-    CvArr* out0 = test_array[OUTPUT][0];
-    cv::Mat _out0 = cv::cvarrToMat(out0), _out1 = cv::cvarrToMat(test_array[OUTPUT][1]);
+    cv::Mat out0 = test_mat[OUTPUT][0];
+    cv::Mat _out0 = out0, _out1 = test_mat[OUTPUT][1];
 
-    if(!test_cpp)
-        cvCvtColor( inplace ? out0 : test_array[INPUT][0], out0, fwd_code );
-    else
-        cv::cvtColor( cv::cvarrToMat(inplace ? out0 : test_array[INPUT][0]), _out0, fwd_code, _out0.channels());
+    cv::cvtColor( inplace ? out0 : test_mat[INPUT][0], _out0, fwd_code, _out0.channels());
 
     if( inplace )
     {
-        cvCopy( out0, test_array[OUTPUT][1] );
-        out0 = test_array[OUTPUT][1];
+        out0.copyTo(test_mat[OUTPUT][1]);
+        out0 = test_mat[OUTPUT][1];
     }
-    if(!test_cpp)
-        cvCvtColor( out0, test_array[OUTPUT][1], inv_code );
-    else
-        cv::cvtColor(cv::cvarrToMat(out0), _out1, inv_code, _out1.channels());
+    cv::cvtColor(out0, _out1, inv_code, _out1.channels());
 }
 
 
@@ -1730,13 +1721,8 @@ double CV_ColorBayerTest::get_success_error_level( int /*test_case_idx*/, int /*
 
 void CV_ColorBayerTest::run_func()
 {
-    if(!test_cpp)
-        cvCvtColor( test_array[INPUT][0], test_array[OUTPUT][0], fwd_code );
-    else
-    {
-        cv::Mat _out = cv::cvarrToMat(test_array[OUTPUT][0]);
-        cv::cvtColor(cv::cvarrToMat(test_array[INPUT][0]), _out, fwd_code, _out.channels());
-    }
+    cv::Mat _out = test_mat[OUTPUT][0];
+    cv::cvtColor(test_mat[INPUT][0], _out, fwd_code, _out.channels());
 }
 
 
diff --git a/modules/imgproc/test/test_convhull.cpp b/modules/imgproc/test/test_convhull.cpp
index de45bf2c72..7e10757514 100644
--- a/modules/imgproc/test/test_convhull.cpp
+++ b/modules/imgproc/test/test_convhull.cpp
@@ -39,2026 +39,93 @@
 //
 //M*/
 
+#include "opencv2/core/hal/interface.h"
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/cuda_test.hpp"
 #include "test_precomp.hpp"
 
 namespace opencv_test { namespace {
 
-/*static int
-cvTsPointConvexPolygon( CvPoint2D32f pt, CvPoint2D32f* v, int n )
-{
-    CvPoint2D32f v0 = v[n-1];
-    int i, sign = 0;
-
-    for( i = 0; i < n; i++ )
-    {
-        CvPoint2D32f v1 = v[i];
-        float dx = pt.x - v0.x, dy = pt.y - v0.y;
-        float dx1 = v1.x - v0.x, dy1 = v1.y - v0.y;
-        double t = (double)dx*dy1 - (double)dx1*dy;
-        if( fabs(t) > DBL_EPSILON )
-        {
-            if( t*sign < 0 )
-                break;
-            if( sign == 0 )
-                sign = t < 0 ? -1 : 1;
-        }
-        else if( fabs(dx) + fabs(dy) < DBL_EPSILON )
-            return i+1;
-        v0 = v1;
-    }
-
-    return i < n ? -1 : 0;
-}*/
-
-CV_INLINE double
-cvTsDist( CvPoint2D32f a, CvPoint2D32f b )
-{
-    double dx = a.x - b.x;
-    double dy = a.y - b.y;
-    return sqrt(dx*dx + dy*dy);
-}
-CV_INLINE double
-cvTsDist( const Point2f& a, const Point2f& b )
-{
-    double dx = a.x - b.x;
-    double dy = a.y - b.y;
-    return sqrt(dx*dx + dy*dy);
-}
-
-CV_INLINE double
-cvTsPtLineDist( CvPoint2D32f pt, CvPoint2D32f a, CvPoint2D32f b )
-{
-    double d0 = cvTsDist( pt, a ), d1;
-    double dd = cvTsDist( a, b );
-    if( dd < FLT_EPSILON )
-        return d0;
-    d1 = cvTsDist( pt, b );
-    dd = fabs((double)(pt.x - a.x)*(b.y - a.y) - (double)(pt.y - a.y)*(b.x - a.x))/dd;
-    d0 = MIN( d0, d1 );
-    return MIN( d0, dd );
-}
-
-static double
-cvTsPointPolygonTest( CvPoint2D32f pt, const CvPoint2D32f* vv, int n, int* _idx=0, int* _on_edge=0 )
-{
-    int i;
-    Point2f v = vv[n-1], v0;
-    double min_dist_num = FLT_MAX, min_dist_denom = 1;
-    int min_dist_idx = -1, min_on_edge = 0;
-    int counter = 0;
-    double result;
-
-    for( i = 0; i < n; i++ )
-    {
-        double dx, dy, dx1, dy1, dx2, dy2, dist_num, dist_denom = 1;
-        int on_edge = 0, idx = i;
-
-        v0 = v; v = vv[i];
-        dx = v.x - v0.x; dy = v.y - v0.y;
-        dx1 = pt.x - v0.x; dy1 = pt.y - v0.y;
-        dx2 = pt.x - v.x; dy2 = pt.y - v.y;
-
-        if( dx2*dx + dy2*dy >= 0 )
-            dist_num = dx2*dx2 + dy2*dy2;
-        else if( dx1*dx + dy1*dy <= 0 )
-        {
-            dist_num = dx1*dx1 + dy1*dy1;
-            idx = i - 1;
-            if( idx < 0 ) idx = n-1;
-        }
-        else
-        {
-            dist_num = (dy1*dx - dx1*dy);
-            dist_num *= dist_num;
-            dist_denom = dx*dx + dy*dy;
-            on_edge = 1;
-        }
-
-        if( dist_num*min_dist_denom < min_dist_num*dist_denom )
-        {
-            min_dist_num = dist_num;
-            min_dist_denom = dist_denom;
-            min_dist_idx = idx;
-            min_on_edge = on_edge;
-            if( min_dist_num == 0 )
-                break;
-        }
-
-        if( (v0.y <= pt.y && v.y <= pt.y) ||
-            (v0.y > pt.y && v.y > pt.y) ||
-            (v0.x < pt.x && v.x < pt.x) )
-            continue;
-
-        dist_num = dy1*dx - dx1*dy;
-        if( dy < 0 )
-            dist_num = -dist_num;
-        counter += dist_num > 0;
-    }
-
-    result = sqrt(min_dist_num/min_dist_denom);
-    if( counter % 2 == 0 )
-        result = -result;
-
-    if( _idx )
-        *_idx = min_dist_idx;
-    if( _on_edge )
-        *_on_edge = min_on_edge;
-
-    return result;
-}
-
-static cv::Point2f
-cvTsMiddlePoint(const cv::Point2f &a, const cv::Point2f &b)
-{
-    return cv::Point2f((a.x + b.x) / 2, (a.y + b.y) / 2);
-}
-
-static bool
-cvTsIsPointOnLineSegment(const cv::Point2f &x, const cv::Point2f &a, const cv::Point2f &b)
-{
-    double d1 = cvTsDist(cvPoint2D32f(x.x, x.y), cvPoint2D32f(a.x, a.y));
-    double d2 = cvTsDist(cvPoint2D32f(x.x, x.y), cvPoint2D32f(b.x, b.y));
-    double d3 = cvTsDist(cvPoint2D32f(a.x, a.y), cvPoint2D32f(b.x, b.y));
-
-    return (abs(d1 + d2 - d3) <= (1E-4));
-}
-
-
-/****************************************************************************************\
-*                              Base class for shape descriptor tests                     *
-\****************************************************************************************/
-
-class CV_BaseShapeDescrTest : public cvtest::BaseTest
-{
-public:
-    CV_BaseShapeDescrTest();
-    virtual ~CV_BaseShapeDescrTest();
-    void clear();
-
-protected:
-    int read_params( const cv::FileStorage& fs );
-    void run_func(void);
-    int prepare_test_case( int test_case_idx );
-    int validate_test_results( int test_case_idx );
-    virtual void generate_point_set( void* points );
-    virtual void extract_points();
-
-    int min_log_size;
-    int max_log_size;
-    int dims;
-    bool enable_flt_points;
-
-    CvMemStorage* storage;
-    CvSeq* points1;
-    CvMat* points2;
-    void* points;
-    void* result;
-    double low_high_range;
-    Scalar low, high;
-
-    bool test_cpp;
-};
-
-
-CV_BaseShapeDescrTest::CV_BaseShapeDescrTest()
-{
-    points1 = 0;
-    points2 = 0;
-    points = 0;
-    storage = 0;
-    test_case_count = 500;
-    min_log_size = 0;
-    max_log_size = 10;
-    low = high = cvScalarAll(0);
-    low_high_range = 50;
-    dims = 2;
-    enable_flt_points = true;
-
-    test_cpp = false;
-}
-
-
-CV_BaseShapeDescrTest::~CV_BaseShapeDescrTest()
-{
-    clear();
-}
-
-
-void CV_BaseShapeDescrTest::clear()
-{
-    cvtest::BaseTest::clear();
-    cvReleaseMemStorage( &storage );
-    cvReleaseMat( &points2 );
-    points1 = 0;
-    points = 0;
-}
-
-
-int CV_BaseShapeDescrTest::read_params( const cv::FileStorage& fs )
-{
-    int code = cvtest::BaseTest::read_params( fs );
-    if( code < 0 )
-        return code;
-
-    read( find_param( fs, "struct_count" ), test_case_count, test_case_count );
-    read( find_param( fs, "min_log_size" ), min_log_size, min_log_size );
-    read( find_param( fs, "max_log_size" ), max_log_size, max_log_size );
-
-    min_log_size = cvtest::clipInt( min_log_size, 0, 8 );
-    max_log_size = cvtest::clipInt( max_log_size, 0, 10 );
-    if( min_log_size > max_log_size )
-    {
-        int t;
-        CV_SWAP( min_log_size, max_log_size, t );
-    }
-
-    return 0;
-}
-
-
-void CV_BaseShapeDescrTest::generate_point_set( void* pointsSet )
-{
-    RNG& rng = ts->get_rng();
-    int i, k, n, total, point_type;
-    CvSeqReader reader;
-    uchar* data = 0;
-    double a[4], b[4];
-
-    for( k = 0; k < 4; k++ )
-    {
-        a[k] = high.val[k] - low.val[k];
-        b[k] = low.val[k];
-    }
-    memset( &reader, 0, sizeof(reader) );
-
-    if( CV_IS_SEQ(pointsSet) )
-    {
-        CvSeq* ptseq = (CvSeq*)pointsSet;
-        total = ptseq->total;
-        point_type = CV_SEQ_ELTYPE(ptseq);
-        cvStartReadSeq( ptseq, &reader );
-    }
-    else
-    {
-        CvMat* ptm = (CvMat*)pointsSet;
-        CV_Assert( CV_IS_MAT(ptm) && CV_IS_MAT_CONT(ptm->type) );
-        total = ptm->rows + ptm->cols - 1;
-        point_type = CV_MAT_TYPE(ptm->type);
-        data = ptm->data.ptr;
-    }
-
-    n = CV_MAT_CN(point_type);
-    point_type = CV_MAT_DEPTH(point_type);
-
-    CV_Assert( (point_type == CV_32S || point_type == CV_32F) && n <= 4 );
-
-    for( i = 0; i < total; i++ )
-    {
-        int* pi;
-        float* pf;
-        if( reader.ptr )
-        {
-            pi = (int*)reader.ptr;
-            pf = (float*)reader.ptr;
-            CV_NEXT_SEQ_ELEM( reader.seq->elem_size, reader );
-        }
-        else
-        {
-            pi = (int*)data + i*n;
-            pf = (float*)data + i*n;
-        }
-        if( point_type == CV_32S )
-            for( k = 0; k < n; k++ )
-                pi[k] = cvRound(cvtest::randReal(rng)*a[k] + b[k]);
-        else
-            for( k = 0; k < n; k++ )
-                pf[k] = (float)(cvtest::randReal(rng)*a[k] + b[k]);
-    }
-}
-
-
-int CV_BaseShapeDescrTest::prepare_test_case( int test_case_idx )
-{
-    int size;
-    int use_storage = 0;
-    int point_type;
-    int i;
-    RNG& rng = ts->get_rng();
-
-    cvtest::BaseTest::prepare_test_case( test_case_idx );
-
-    clear();
-    size = cvRound( exp((cvtest::randReal(rng) * (max_log_size - min_log_size) + min_log_size)*CV_LOG2) );
-    use_storage = cvtest::randInt(rng) % 2;
-    point_type = CV_MAKETYPE(cvtest::randInt(rng) %
-        (enable_flt_points ? 2 : 1) ? CV_32F : CV_32S, dims);
-
-    if( use_storage )
-    {
-        storage = cvCreateMemStorage( (cvtest::randInt(rng)%10 + 1)*1024 );
-        points1 = cvCreateSeq( point_type, sizeof(CvSeq), CV_ELEM_SIZE(point_type), storage );
-        cvSeqPushMulti( points1, 0, size );
-        points = points1;
-    }
-    else
-    {
-        int rows = 1, cols = size;
-        if( cvtest::randInt(rng) % 2 )
-            rows = size, cols = 1;
-
-        points2 = cvCreateMat( rows, cols, point_type );
-        points = points2;
-    }
-
-    for( i = 0; i < 4; i++ )
-    {
-        low.val[i] = (cvtest::randReal(rng)-0.5)*low_high_range*2;
-        high.val[i] = (cvtest::randReal(rng)-0.5)*low_high_range*2;
-        if( low.val[i] > high.val[i] )
-        {
-            double t;
-            CV_SWAP( low.val[i], high.val[i], t );
-        }
-        if( high.val[i] < low.val[i] + 1 )
-            high.val[i] += 1;
-    }
-
-    generate_point_set( points );
-
-    test_cpp = (cvtest::randInt(rng) & 16) == 0;
-    return 1;
-}
-
-
-void CV_BaseShapeDescrTest::extract_points()
-{
-    if( points1 )
-    {
-        points2 = cvCreateMat( 1, points1->total, CV_SEQ_ELTYPE(points1) );
-        cvCvtSeqToArray( points1, points2->data.ptr );
-    }
-
-    if( CV_MAT_DEPTH(points2->type) != CV_32F && enable_flt_points )
-    {
-        CvMat tmp = cvMat( points2->rows, points2->cols,
-            (points2->type & ~CV_MAT_DEPTH_MASK) | CV_32F, points2->data.ptr );
-        cvConvert( points2, &tmp );
-    }
-}
-
-
-void CV_BaseShapeDescrTest::run_func(void)
-{
-}
-
-
-int CV_BaseShapeDescrTest::validate_test_results( int /*test_case_idx*/ )
-{
-    extract_points();
-    return 0;
-}
-
-
-/****************************************************************************************\
-*                                     Convex Hull Test                                   *
-\****************************************************************************************/
-
-class CV_ConvHullTest : public CV_BaseShapeDescrTest
-{
-public:
-    CV_ConvHullTest();
-    virtual ~CV_ConvHullTest();
-    void clear();
-
-protected:
-    void run_func(void);
-    int prepare_test_case( int test_case_idx );
-    int validate_test_results( int test_case_idx );
-
-    CvSeq* hull1;
-    CvMat* hull2;
-    void* hull_storage;
-    int orientation;
-    int return_points;
-};
-
-
-CV_ConvHullTest::CV_ConvHullTest()
-{
-    hull1 = 0;
-    hull2 = 0;
-    hull_storage = 0;
-    orientation = return_points = 0;
-}
-
-
-CV_ConvHullTest::~CV_ConvHullTest()
-{
-    clear();
-}
-
-
-void CV_ConvHullTest::clear()
-{
-    CV_BaseShapeDescrTest::clear();
-    cvReleaseMat( &hull2 );
-    hull1 = 0;
-    hull_storage = 0;
-}
-
-
-int CV_ConvHullTest::prepare_test_case( int test_case_idx )
-{
-    int code = CV_BaseShapeDescrTest::prepare_test_case( test_case_idx );
-    int use_storage_for_hull = 0;
-    RNG& rng = ts->get_rng();
-
-    if( code <= 0 )
-        return code;
-
-    orientation = cvtest::randInt(rng) % 2 ? CV_CLOCKWISE : CV_COUNTER_CLOCKWISE;
-    return_points = cvtest::randInt(rng) % 2;
-
-    use_storage_for_hull = (cvtest::randInt(rng) % 2) && !test_cpp;
-    if( use_storage_for_hull )
-    {
-        if( !storage )
-            storage = cvCreateMemStorage( (cvtest::randInt(rng)%10 + 1)*1024 );
-        hull_storage = storage;
-    }
-    else
-    {
-        int rows, cols;
-        int sz = points1 ? points1->total : points2->cols + points2->rows - 1;
-        int point_type = points1 ? CV_SEQ_ELTYPE(points1) : CV_MAT_TYPE(points2->type);
-
-        if( cvtest::randInt(rng) % 2 )
-            rows = sz, cols = 1;
-        else
-            rows = 1, cols = sz;
-
-        hull2 = cvCreateMat( rows, cols, return_points ? point_type : CV_32SC1 );
-        hull_storage = hull2;
-    }
-
-    return code;
-}
-
-
-void CV_ConvHullTest::run_func()
-{
-    if(!test_cpp)
-        hull1 = cvConvexHull2( points, hull_storage, orientation, return_points );
-    else
-    {
-        cv::Mat _points = cv::cvarrToMat(points);
-        bool clockwise = orientation == CV_CLOCKWISE;
-        size_t n = 0;
-        if( !return_points )
-        {
-            std::vector<int> _hull;
-            cv::convexHull(_points, _hull, clockwise);
-            n = _hull.size();
-            memcpy(hull2->data.ptr, &_hull[0], n*sizeof(_hull[0]));
-        }
-        else if(_points.type() == CV_32SC2)
-        {
-            std::vector<cv::Point> _hull;
-            cv::convexHull(_points, _hull, clockwise);
-            n = _hull.size();
-            memcpy(hull2->data.ptr, &_hull[0], n*sizeof(_hull[0]));
-        }
-        else if(_points.type() == CV_32FC2)
-        {
-            std::vector<cv::Point2f> _hull;
-            cv::convexHull(_points, _hull, clockwise);
-            n = _hull.size();
-            memcpy(hull2->data.ptr, &_hull[0], n*sizeof(_hull[0]));
-        }
-        if(hull2->rows > hull2->cols)
-            hull2->rows = (int)n;
-        else
-            hull2->cols = (int)n;
-    }
-}
-
-
-int CV_ConvHullTest::validate_test_results( int test_case_idx )
-{
-    int code = CV_BaseShapeDescrTest::validate_test_results( test_case_idx );
-    CvMat* hull = 0;
-    CvMat* mask = 0;
-    int i, point_count, hull_count;
-    CvPoint2D32f *p, *h;
-    CvSeq header, hheader, *ptseq, *hseq;
-    CvSeqBlock block, hblock;
-
-    if( points1 )
-        ptseq = points1;
-    else
-        ptseq = cvMakeSeqHeaderForArray( CV_MAT_TYPE(points2->type),
-            sizeof(CvSeq), CV_ELEM_SIZE(points2->type), points2->data.ptr,
-            points2->rows + points2->cols - 1, &header, &block );
-    point_count = ptseq->total;
-    p = (CvPoint2D32f*)(points2->data.ptr);
-
-    if( hull1 )
-        hseq = hull1;
-    else
-        hseq = cvMakeSeqHeaderForArray( CV_MAT_TYPE(hull2->type),
-            sizeof(CvSeq), CV_ELEM_SIZE(hull2->type), hull2->data.ptr,
-            hull2->rows + hull2->cols - 1, &hheader, &hblock );
-    hull_count = hseq->total;
-    hull = cvCreateMat( 1, hull_count, CV_32FC2 );
-    mask = cvCreateMat( 1, hull_count, CV_8UC1 );
-    cvZero( mask );
-    Mat _mask = cvarrToMat(mask);
-
-    h = (CvPoint2D32f*)(hull->data.ptr);
-
-    // extract convex hull points
-    if( return_points )
-    {
-        cvCvtSeqToArray( hseq, hull->data.ptr );
-        if( CV_SEQ_ELTYPE(hseq) != CV_32FC2 )
-        {
-            CvMat tmp = cvMat( hull->rows, hull->cols, CV_32SC2, hull->data.ptr );
-            cvConvert( &tmp, hull );
-        }
-    }
-    else
-    {
-        CvSeqReader reader;
-        cvStartReadSeq( hseq, &reader );
-
-        for( i = 0; i < hull_count; i++ )
-        {
-            schar* ptr = reader.ptr;
-            int idx;
-            CV_NEXT_SEQ_ELEM( hseq->elem_size, reader );
-
-            if( hull1 )
-                idx = cvSeqElemIdx( ptseq, *(uchar**)ptr );
-            else
-                idx = *(int*)ptr;
-
-            if( idx < 0 || idx >= point_count )
-            {
-                ts->printf( cvtest::TS::LOG, "Invalid convex hull point #%d\n", i );
-                code = cvtest::TS::FAIL_INVALID_OUTPUT;
-                goto _exit_;
-            }
-            h[i] = p[idx];
-        }
-    }
-
-    // check that the convex hull is a convex polygon
-    if( hull_count >= 3 )
-    {
-        CvPoint2D32f pt0 = h[hull_count-1];
-        for( i = 0; i < hull_count; i++ )
-        {
-            int j = i+1;
-            CvPoint2D32f pt1 = h[i], pt2 = h[j < hull_count ? j : 0];
-            float dx0 = pt1.x - pt0.x, dy0 = pt1.y - pt0.y;
-            float dx1 = pt2.x - pt1.x, dy1 = pt2.y - pt1.y;
-            double t = (double)dx0*dy1 - (double)dx1*dy0;
-            if( (t < 0) ^ (orientation != CV_COUNTER_CLOCKWISE) )
-            {
-                ts->printf( cvtest::TS::LOG, "The convex hull is not convex or has a wrong orientation (vtx %d)\n", i );
-                code = cvtest::TS::FAIL_INVALID_OUTPUT;
-                goto _exit_;
-            }
-            pt0 = pt1;
-        }
-    }
-
-    // check that all the points are inside the hull or on the hull edge
-    // and at least hull_point points are at the hull vertices
-    for( i = 0; i < point_count; i++ )
-    {
-        int idx = 0, on_edge = 0;
-        double pptresult = cvTsPointPolygonTest( p[i], h, hull_count, &idx, &on_edge );
-
-        if( pptresult < 0 )
-        {
-            ts->printf( cvtest::TS::LOG, "The point #%d is outside of the convex hull\n", i );
-            code = cvtest::TS::FAIL_BAD_ACCURACY;
-            goto _exit_;
-        }
-
-        if( pptresult < FLT_EPSILON && !on_edge )
-            mask->data.ptr[idx] = (uchar)1;
-    }
-
-    if( cvtest::norm( _mask, Mat::zeros(_mask.dims, _mask.size, _mask.type()), NORM_L1 ) != hull_count )
-    {
-        ts->printf( cvtest::TS::LOG, "Not every convex hull vertex coincides with some input point\n" );
-        code = cvtest::TS::FAIL_BAD_ACCURACY;
-        goto _exit_;
-    }
-
-_exit_:
-
-    cvReleaseMat( &hull );
-    cvReleaseMat( &mask );
-    if( code < 0 )
-        ts->set_failed_test_info( code );
-    return code;
-}
-
-
-/****************************************************************************************\
-*                                     MinAreaRect Test                                   *
-\****************************************************************************************/
-
-class CV_MinAreaRectTest : public CV_BaseShapeDescrTest
-{
-public:
-    CV_MinAreaRectTest();
-
-protected:
-    void run_func(void);
-    int validate_test_results( int test_case_idx );
-
-    CvBox2D box;
-    CvPoint2D32f box_pt[4];
-};
-
-
-CV_MinAreaRectTest::CV_MinAreaRectTest()
-{
-}
-
-
-void CV_MinAreaRectTest::run_func()
-{
-    if(!test_cpp)
-    {
-        box = cvMinAreaRect2( points, storage );
-        cvBoxPoints( box, box_pt );
-    }
-    else
-    {
-        cv::RotatedRect r = cv::minAreaRect(cv::cvarrToMat(points));
-        box = cvBox2D(r);
-        r.points((cv::Point2f*)box_pt);
-    }
-}
-
-
-int CV_MinAreaRectTest::validate_test_results( int test_case_idx )
-{
-    double eps = 1e-1;
-    int code = CV_BaseShapeDescrTest::validate_test_results( test_case_idx );
-    int i, j, point_count = points2->rows + points2->cols - 1;
-    CvPoint2D32f *p = (CvPoint2D32f*)(points2->data.ptr);
-    int mask[] = {0,0,0,0};
-
-    // check that the bounding box is a rotated rectangle:
-    //  1. diagonals should be equal
-    //  2. they must intersect in their middle points
-    {
-        double d0 = cvTsDist( box_pt[0], box_pt[2] );
-        double d1 = cvTsDist( box_pt[1], box_pt[3] );
-
-        double x0 = (box_pt[0].x + box_pt[2].x)*0.5;
-        double y0 = (box_pt[0].y + box_pt[2].y)*0.5;
-        double x1 = (box_pt[1].x + box_pt[3].x)*0.5;
-        double y1 = (box_pt[1].y + box_pt[3].y)*0.5;
-
-        if( fabs(d0 - d1) + fabs(x0 - x1) + fabs(y0 - y1) > eps*MAX(d0,d1) )
-        {
-            ts->printf( cvtest::TS::LOG, "The bounding box is not a rectangle\n" );
-            code = cvtest::TS::FAIL_INVALID_OUTPUT;
-            goto _exit_;
-        }
-    }
-
-#if 0
-    {
-    int n = 4;
-    double a = 8, c = 8, b = 100, d = 150;
-    CvPoint bp[4], *bpp = bp;
-    cvNamedWindow( "test", 1 );
-    IplImage* img = cvCreateImage( cvSize(500,500), 8, 3 );
-    cvZero(img);
-    for( i = 0; i < point_count; i++ )
-        cvCircle(img,cvPoint(cvRound(p[i].x*a+b),cvRound(p[i].y*c+d)), 3, CV_RGB(0,255,0), -1 );
-    for( i = 0; i < n; i++ )
-        bp[i] = cvPoint(cvRound(box_pt[i].x*a+b),cvRound(box_pt[i].y*c+d));
-    cvPolyLine( img, &bpp, &n, 1, 1, CV_RGB(255,255,0), 1, cv::LINE_AA, 0 );
-    cvShowImage( "test", img );
-    cvWaitKey();
-    cvReleaseImage(&img);
-    }
-#endif
-
-    // check that the box includes all the points
-    // and there is at least one point at (or very close to) every box side
-    for( i = 0; i < point_count; i++ )
-    {
-        int idx = 0, on_edge = 0;
-        double pptresult = cvTsPointPolygonTest( p[i], box_pt, 4, &idx, &on_edge );
-        if( pptresult < -eps )
-        {
-            ts->printf( cvtest::TS::LOG, "The point #%d is outside of the box\n", i );
-            code = cvtest::TS::FAIL_BAD_ACCURACY;
-            goto _exit_;
-        }
-
-        if( pptresult < eps )
-        {
-            for( j = 0; j < 4; j++ )
-            {
-                double d = cvTsPtLineDist( p[i], box_pt[(j-1)&3], box_pt[j] );
-                if( d < eps )
-                    mask[j] = (uchar)1;
-            }
-        }
-    }
-
-    if( mask[0] + mask[1] + mask[2] + mask[3] != 4 )
-    {
-        ts->printf( cvtest::TS::LOG, "Not every box side has a point nearby\n" );
-        code = cvtest::TS::FAIL_BAD_ACCURACY;
-        goto _exit_;
-    }
-
-_exit_:
-
-    if( code < 0 )
-        ts->set_failed_test_info( code );
-    return code;
-}
-
-
-/****************************************************************************************\
-*                                   MinEnclosingTriangle Test                            *
-\****************************************************************************************/
-
-class CV_MinTriangleTest : public CV_BaseShapeDescrTest
-{
-public:
-    CV_MinTriangleTest();
-
-protected:
-    void run_func(void);
-    int validate_test_results( int test_case_idx );
-    std::vector<cv::Point2f> getTriangleMiddlePoints();
-
-    std::vector<cv::Point2f> convexPolygon;
-    std::vector<cv::Point2f> triangle;
-};
-
-
-CV_MinTriangleTest::CV_MinTriangleTest()
-{
-}
-
-std::vector<cv::Point2f> CV_MinTriangleTest::getTriangleMiddlePoints()
-{
-    std::vector<cv::Point2f> triangleMiddlePoints;
-
-    for (int i = 0; i < 3; i++) {
-        triangleMiddlePoints.push_back(cvTsMiddlePoint(triangle[i], triangle[(i + 1) % 3]));
-    }
-
-    return triangleMiddlePoints;
-}
-
-
-void CV_MinTriangleTest::run_func()
-{
-    std::vector<cv::Point2f> pointsAsVector;
-
-    cv::cvarrToMat(points).convertTo(pointsAsVector, CV_32F);
-
-    cv::minEnclosingTriangle(pointsAsVector, triangle);
-    cv::convexHull(pointsAsVector, convexPolygon, true, true);
-}
-
-
-int CV_MinTriangleTest::validate_test_results( int test_case_idx )
-{
-    bool errorEnclosed = false, errorMiddlePoints = false, errorFlush = true;
-    double eps = 1e-4;
-    int code = CV_BaseShapeDescrTest::validate_test_results( test_case_idx );
-
-#if 0
-    {
-    int n = 3;
-    double a = 8, c = 8, b = 100, d = 150;
-    CvPoint bp[4], *bpp = bp;
-    cvNamedWindow( "test", 1 );
-    IplImage* img = cvCreateImage( cvSize(500,500), 8, 3 );
-    cvZero(img);
-    for( i = 0; i < point_count; i++ )
-        cvCircle(img,cvPoint(cvRound(p[i].x*a+b),cvRound(p[i].y*c+d)), 3, CV_RGB(0,255,0), -1 );
-    for( i = 0; i < n; i++ )
-        bp[i] = cvPoint(cvRound(triangle[i].x*a+b),cvRound(triangle[i].y*c+d));
-    cvPolyLine( img, &bpp, &n, 1, 1, CV_RGB(255,255,0), 1, cv::LINE_AA, 0 );
-    cvShowImage( "test", img );
-    cvWaitKey();
-    cvReleaseImage(&img);
-    }
-#endif
-
-    int polygonVertices = (int) convexPolygon.size();
-
-    if (polygonVertices > 2) {
-        // Check if all points are enclosed by the triangle
-        for (int i = 0; (i < polygonVertices) && (!errorEnclosed); i++)
-        {
-            if (cv::pointPolygonTest(triangle, cv::Point2f(convexPolygon[i].x, convexPolygon[i].y), true) < (-eps))
-                errorEnclosed = true;
-        }
-
-        // Check if triangle edges middle points touch the polygon
-        std::vector<cv::Point2f> middlePoints = getTriangleMiddlePoints();
-
-        for (int i = 0; (i < 3) && (!errorMiddlePoints); i++)
-        {
-            bool isTouching = false;
-
-            for (int j = 0; (j < polygonVertices) && (!isTouching); j++)
-            {
-                if (cvTsIsPointOnLineSegment(middlePoints[i], convexPolygon[j],
-                                             convexPolygon[(j + 1) % polygonVertices]))
-                    isTouching = true;
-            }
-
-            errorMiddlePoints = (isTouching) ? false : true;
-        }
-
-        // Check if at least one of the edges is flush
-        for (int i = 0; (i < 3) && (errorFlush); i++)
-        {
-            for (int j = 0; (j < polygonVertices) && (errorFlush); j++)
-            {
-                if ((cvTsIsPointOnLineSegment(convexPolygon[j], triangle[i],
-                                              triangle[(i + 1) % 3])) &&
-                    (cvTsIsPointOnLineSegment(convexPolygon[(j + 1) % polygonVertices], triangle[i],
-                                              triangle[(i + 1) % 3])))
-                    errorFlush = false;
-            }
-        }
-
-        // Report any found errors
-        if (errorEnclosed)
-        {
-            ts->printf( cvtest::TS::LOG,
-            "All points should be enclosed by the triangle.\n" );
-            code = cvtest::TS::FAIL_BAD_ACCURACY;
-        }
-        else if (errorMiddlePoints)
-        {
-            ts->printf( cvtest::TS::LOG,
-            "All triangle edges middle points should touch the convex hull of the points.\n" );
-            code = cvtest::TS::FAIL_INVALID_OUTPUT;
-        }
-        else if (errorFlush)
-        {
-            ts->printf( cvtest::TS::LOG,
-            "At least one edge of the enclosing triangle should be flush with one edge of the polygon.\n" );
-            code = cvtest::TS::FAIL_INVALID_OUTPUT;
-        }
-    }
-
-    if ( code < 0 )
-        ts->set_failed_test_info( code );
-
-    return code;
-}
-
-
-/****************************************************************************************\
-*                                     MinEnclosingCircle Test                            *
-\****************************************************************************************/
-
-class CV_MinCircleTest : public CV_BaseShapeDescrTest
-{
-public:
-    CV_MinCircleTest();
-
-protected:
-    void run_func(void);
-    int validate_test_results( int test_case_idx );
-
-    Point2f center;
-    float radius;
-};
-
-
-CV_MinCircleTest::CV_MinCircleTest()
-{
-}
-
-
-void CV_MinCircleTest::run_func()
-{
-    if(!test_cpp)
-    {
-        CvPoint2D32f c_center = cvPoint2D32f(center);
-        cvMinEnclosingCircle( points, &c_center, &radius );
-        center = c_center;
-    }
-    else
-    {
-        cv::Point2f tmpcenter;
-        cv::minEnclosingCircle(cv::cvarrToMat(points), tmpcenter, radius);
-        center = tmpcenter;
-    }
-}
-
-
-int CV_MinCircleTest::validate_test_results( int test_case_idx )
-{
-    double eps = 1.03;
-    int code = CV_BaseShapeDescrTest::validate_test_results( test_case_idx );
-    int i, j = 0, point_count = points2->rows + points2->cols - 1;
-    Point2f *p = (Point2f*)(points2->data.ptr);
-    Point2f v[3];
-
-#if 0
-    {
-    double a = 2, b = 200, d = 400;
-    cvNamedWindow( "test", 1 );
-    IplImage* img = cvCreateImage( cvSize(500,500), 8, 3 );
-    cvZero(img);
-    for( i = 0; i < point_count; i++ )
-        cvCircle(img,cvPoint(cvRound(p[i].x*a+b),cvRound(p[i].y*a+d)), 3, CV_RGB(0,255,0), -1 );
-    cvCircle( img, cvPoint(cvRound(center.x*a+b),cvRound(center.y*a+d)),
-              cvRound(radius*a), CV_RGB(255,255,0), 1 );
-    cvShowImage( "test", img );
-    cvWaitKey();
-    cvReleaseImage(&img);
-    }
-#endif
-
-    // check that the circle contains all the points inside and
-    // remember at most 3 points that are close to the boundary
-    for( i = 0; i < point_count; i++ )
-    {
-        double d = cvTsDist(p[i], center);
-        if( d > radius )
-        {
-            ts->printf( cvtest::TS::LOG, "The point #%d is outside of the circle\n", i );
-            code = cvtest::TS::FAIL_BAD_ACCURACY;
-            goto _exit_;
-        }
-
-        if( radius - d < eps*radius && j < 3 )
-            v[j++] = p[i];
-    }
-
-    if( point_count >= 2 && (j < 2 || (j == 2 && cvTsDist(v[0],v[1]) < (radius-1)*2/eps)) )
-    {
-        ts->printf( cvtest::TS::LOG,
-            "There should be at least 3 points near the circle boundary or 2 points on the diameter\n" );
-        code = cvtest::TS::FAIL_BAD_ACCURACY;
-        goto _exit_;
-    }
-
-_exit_:
-
-    if( code < 0 )
-        ts->set_failed_test_info( code );
-    return code;
-}
-
-/****************************************************************************************\
-*                                 MinEnclosingCircle Test 2                              *
-\****************************************************************************************/
-
-class CV_MinCircleTest2 : public CV_BaseShapeDescrTest
-{
-public:
-    CV_MinCircleTest2();
-protected:
-    RNG rng;
-    void run_func(void);
-    int validate_test_results( int test_case_idx );
-    float delta;
-};
-
-
-CV_MinCircleTest2::CV_MinCircleTest2()
-{
-    rng = ts->get_rng();
-}
-
-
-void CV_MinCircleTest2::run_func()
-{
-    Point2f center = Point2f(rng.uniform(0.0f, 1000.0f), rng.uniform(0.0f, 1000.0f));;
-    float radius = rng.uniform(0.0f, 500.0f);
-    float angle = (float)rng.uniform(0.0f, (float)(CV_2PI));
-    vector<Point2f> pts;
-    pts.push_back(center + Point2f(radius * cos(angle), radius * sin(angle)));
-    angle += (float)CV_PI;
-    pts.push_back(center + Point2f(radius * cos(angle), radius * sin(angle)));
-    float radius2 = radius * radius;
-    float x = rng.uniform(center.x - radius, center.x + radius);
-    float deltaX = x - center.x;
-    float upperBoundY = sqrt(radius2 - deltaX * deltaX);
-    float y = rng.uniform(center.y - upperBoundY, center.y + upperBoundY);
-    pts.push_back(Point2f(x, y));
-    // Find the minimum area enclosing circle
-    Point2f calcCenter;
-    float calcRadius;
-    minEnclosingCircle(pts, calcCenter, calcRadius);
-    delta = (float)cv::norm(calcCenter - center) + abs(calcRadius - radius);
-}
-
-int CV_MinCircleTest2::validate_test_results( int test_case_idx )
-{
-    float eps = 1.0F;
-    int code = CV_BaseShapeDescrTest::validate_test_results( test_case_idx );
-    if (delta > eps)
-    {
-        ts->printf( cvtest::TS::LOG, "Delta center and calcCenter > %f\n", eps );
-        code = cvtest::TS::FAIL_BAD_ACCURACY;
-        ts->set_failed_test_info( code );
-    }
-    return code;
-}
-
-/****************************************************************************************\
-*                                 minEnclosingCircle Test 3                              *
-\****************************************************************************************/
-
-TEST(Imgproc_minEnclosingCircle, basic_test)
-{
-    vector<Point2f> pts;
-    pts.push_back(Point2f(0, 0));
-    pts.push_back(Point2f(10, 0));
-    pts.push_back(Point2f(5, 1));
-    const float EPS = 1.0e-3f;
-    Point2f center;
-    float radius;
-
-    // pts[2] is within the circle with diameter pts[0] - pts[1].
-    //        2
-    // 0             1
-    // NB: The triangle is obtuse, so the only pts[0] and pts[1] are on the circle.
-    minEnclosingCircle(pts, center, radius);
-    EXPECT_NEAR(center.x, 5, EPS);
-    EXPECT_NEAR(center.y, 0, EPS);
-    EXPECT_NEAR(5, radius, EPS);
-
-    // pts[2] is on the circle with diameter pts[0] - pts[1].
-    //  2
-    // 0 1
-    pts[2] = Point2f(5, 5);
-    minEnclosingCircle(pts, center, radius);
-    EXPECT_NEAR(center.x, 5, EPS);
-    EXPECT_NEAR(center.y, 0, EPS);
-    EXPECT_NEAR(5, radius, EPS);
-
-    // pts[2] is outside the circle with diameter pts[0] - pts[1].
-    //   2
-    //
-    //
-    // 0   1
-    // NB: The triangle is acute, so all 3 points are on the circle.
-    pts[2] = Point2f(5, 10);
-    minEnclosingCircle(pts, center, radius);
-    EXPECT_NEAR(center.x, 5, EPS);
-    EXPECT_NEAR(center.y, 3.75, EPS);
-    EXPECT_NEAR(6.25f, radius, EPS);
-
-    // The 3 points are colinear.
-    pts[2] = Point2f(3, 0);
-    minEnclosingCircle(pts, center, radius);
-    EXPECT_NEAR(center.x, 5, EPS);
-    EXPECT_NEAR(center.y, 0, EPS);
-    EXPECT_NEAR(5, radius, EPS);
-
-    // 2 points are the same.
-    pts[2] = pts[1];
-    minEnclosingCircle(pts, center, radius);
-    EXPECT_NEAR(center.x, 5, EPS);
-    EXPECT_NEAR(center.y, 0, EPS);
-    EXPECT_NEAR(5, radius, EPS);
-
-    // 3 points are the same.
-    pts[0] = pts[1];
-    minEnclosingCircle(pts, center, radius);
-    EXPECT_NEAR(center.x, 10, EPS);
-    EXPECT_NEAR(center.y, 0, EPS);
-    EXPECT_NEAR(0, radius, EPS);
-}
-
-TEST(Imgproc_minEnclosingCircle, regression_16051) {
-    vector<Point2f> pts;
-    pts.push_back(Point2f(85, 1415));
-    pts.push_back(Point2f(87, 1415));
-    pts.push_back(Point2f(89, 1414));
-    pts.push_back(Point2f(89, 1414));
-    pts.push_back(Point2f(87, 1412));
-    Point2f center;
-    float radius;
-    minEnclosingCircle(pts, center, radius);
-    EXPECT_NEAR(center.x, 86.9f, 1e-3);
-    EXPECT_NEAR(center.y, 1414.1f, 1e-3);
-    EXPECT_NEAR(2.1024551f, radius, 1e-3);
-}
-
-/****************************************************************************************\
-*                                   Perimeter Test                                     *
-\****************************************************************************************/
-
-class CV_PerimeterTest : public CV_BaseShapeDescrTest
-{
-public:
-    CV_PerimeterTest();
-
-protected:
-    int prepare_test_case( int test_case_idx );
-    void run_func(void);
-    int validate_test_results( int test_case_idx );
-    CvSlice slice;
-    int is_closed;
-    double result;
-};
-
-
-CV_PerimeterTest::CV_PerimeterTest()
-{
-}
-
-
-int CV_PerimeterTest::prepare_test_case( int test_case_idx )
-{
-    int code = CV_BaseShapeDescrTest::prepare_test_case( test_case_idx );
-    RNG& rng = ts->get_rng();
-    int total;
-
-    if( code < 0 )
-        return code;
-
-    is_closed = cvtest::randInt(rng) % 2;
-
-    if( points1 )
-    {
-        points1->flags |= CV_SEQ_KIND_CURVE;
-        if( is_closed )
-            points1->flags |= CV_SEQ_FLAG_CLOSED;
-        total = points1->total;
-    }
-    else
-        total = points2->cols + points2->rows - 1;
-
-    if( (cvtest::randInt(rng) % 3) && !test_cpp )
-    {
-        slice.start_index = cvtest::randInt(rng) % total;
-        slice.end_index = cvtest::randInt(rng) % total;
-    }
-    else
-        slice = CV_WHOLE_SEQ;
-
-    return 1;
-}
-
-
-void CV_PerimeterTest::run_func()
-{
-    if(!test_cpp)
-        result = cvArcLength( points, slice, points1 ? -1 : is_closed );
-    else
-        result = cv::arcLength(cv::cvarrToMat(points),
-            !points1 ? is_closed != 0 : (points1->flags & CV_SEQ_FLAG_CLOSED) != 0);
-}
-
-
-int CV_PerimeterTest::validate_test_results( int test_case_idx )
-{
-    int code = CV_BaseShapeDescrTest::validate_test_results( test_case_idx );
-    int i, len = slice.end_index - slice.start_index, total = points2->cols + points2->rows - 1;
-    double result0 = 0;
-    Point2f prev_pt, pt;
-    CvPoint2D32f *ptr;
-
-    if( len < 0 )
-        len += total;
-
-    len = MIN( len, total );
-    //len -= !is_closed && len == total;
-
-    ptr = (CvPoint2D32f*)points2->data.fl;
-    prev_pt = ptr[(is_closed ? slice.start_index+len-1 : slice.start_index) % total];
-
-    for( i = 0; i < len + (len < total && (!is_closed || len==1)); i++ )
-    {
-        pt = ptr[(i + slice.start_index) % total];
-        double dx = pt.x - prev_pt.x, dy = pt.y - prev_pt.y;
-        result0 += sqrt(dx*dx + dy*dy);
-        prev_pt = pt;
-    }
-
-    if( cvIsNaN(result) || cvIsInf(result) )
-    {
-        ts->printf( cvtest::TS::LOG, "cvArcLength() returned invalid value (%g)\n", result );
-        code = cvtest::TS::FAIL_INVALID_OUTPUT;
-    }
-    else if( fabs(result - result0) > FLT_EPSILON*100*result0 )
-    {
-        ts->printf( cvtest::TS::LOG, "The function returned %g, while the correct result is %g\n", result, result0 );
-        code = cvtest::TS::FAIL_BAD_ACCURACY;
-    }
-
-    if( code < 0 )
-        ts->set_failed_test_info( code );
-    return code;
-}
-
-
-/****************************************************************************************\
-*                                   FitEllipse Test                                      *
-\****************************************************************************************/
-
-class CV_FitEllipseTest : public CV_BaseShapeDescrTest
-{
-public:
-    CV_FitEllipseTest();
-
-protected:
-    int prepare_test_case( int test_case_idx );
-    void generate_point_set( void* points );
-    void run_func(void);
-    int validate_test_results( int test_case_idx );
-    RotatedRect box0, box;
-    double min_ellipse_size, max_noise;
-};
-
-
-CV_FitEllipseTest::CV_FitEllipseTest()
-{
-    min_log_size = 5; // for robust ellipse fitting a dozen of points is needed at least
-    max_log_size = 10;
-    min_ellipse_size = 10;
-    max_noise = 0.05;
-}
-
-
-void CV_FitEllipseTest::generate_point_set( void* pointsSet )
-{
-    RNG& rng = ts->get_rng();
-    int i, total, point_type;
-    CvSeqReader reader;
-    uchar* data = 0;
-    double a, b;
-
-    box0.center.x = (float)((low.val[0] + high.val[0])*0.5);
-    box0.center.y = (float)((low.val[1] + high.val[1])*0.5);
-    box0.size.width = (float)(MAX(high.val[0] - low.val[0], min_ellipse_size)*2);
-    box0.size.height = (float)(MAX(high.val[1] - low.val[1], min_ellipse_size)*2);
-    box0.angle = (float)(cvtest::randReal(rng)*180);
-    a = cos(box0.angle*CV_PI/180.);
-    b = sin(box0.angle*CV_PI/180.);
-
-    if( box0.size.width > box0.size.height )
-    {
-        float t;
-        CV_SWAP( box0.size.width, box0.size.height, t );
-    }
-    memset( &reader, 0, sizeof(reader) );
-
-    if( CV_IS_SEQ(pointsSet) )
-    {
-        CvSeq* ptseq = (CvSeq*)pointsSet;
-        total = ptseq->total;
-        point_type = CV_SEQ_ELTYPE(ptseq);
-        cvStartReadSeq( ptseq, &reader );
-    }
-    else
-    {
-        CvMat* ptm = (CvMat*)pointsSet;
-        CV_Assert( CV_IS_MAT(ptm) && CV_IS_MAT_CONT(ptm->type) );
-        total = ptm->rows + ptm->cols - 1;
-        point_type = CV_MAT_TYPE(ptm->type);
-        data = ptm->data.ptr;
-    }
-
-    CV_Assert(point_type == CV_32SC2 || point_type == CV_32FC2);
-
-    for( i = 0; i < total; i++ )
-    {
-        CvPoint* pp;
-        CvPoint2D32f p = {0, 0};
-        double angle = cvtest::randReal(rng)*CV_PI*2;
-        double x = box0.size.height*0.5*(cos(angle) + (cvtest::randReal(rng)-0.5)*2*max_noise);
-        double y = box0.size.width*0.5*(sin(angle) + (cvtest::randReal(rng)-0.5)*2*max_noise);
-        p.x = (float)(box0.center.x + a*x + b*y);
-        p.y = (float)(box0.center.y - b*x + a*y);
-
-        if( reader.ptr )
-        {
-            pp = (CvPoint*)reader.ptr;
-            CV_NEXT_SEQ_ELEM( sizeof(*pp), reader );
-        }
-        else
-            pp = ((CvPoint*)data) + i;
-        if( point_type == CV_32SC2 )
-        {
-            pp->x = cvRound(p.x);
-            pp->y = cvRound(p.y);
-        }
-        else
-            *(CvPoint2D32f*)pp = p;
-    }
-}
-
-
-int CV_FitEllipseTest::prepare_test_case( int test_case_idx )
-{
-    min_log_size = MAX(min_log_size,4);
-    max_log_size = MAX(min_log_size,max_log_size);
-    return CV_BaseShapeDescrTest::prepare_test_case( test_case_idx );
-}
-
-
-void CV_FitEllipseTest::run_func()
-{
-    if(!test_cpp)
-        box = cvFitEllipse2( points );
-    else
-        box = cv::fitEllipse(cv::cvarrToMat(points));
-}
-
-int CV_FitEllipseTest::validate_test_results( int test_case_idx )
-{
-    int code = CV_BaseShapeDescrTest::validate_test_results( test_case_idx );
-    double diff_angle;
-
-    if( cvIsNaN(box.center.x) || cvIsInf(box.center.x) ||
-        cvIsNaN(box.center.y) || cvIsInf(box.center.y) ||
-        cvIsNaN(box.size.width) || cvIsInf(box.size.width) ||
-        cvIsNaN(box.size.height) || cvIsInf(box.size.height) ||
-        cvIsNaN(box.angle) || cvIsInf(box.angle) )
-    {
-        ts->printf( cvtest::TS::LOG, "Some of the computed ellipse parameters are invalid (x=%g,y=%g,w=%g,h=%g,angle=%g)\n",
-            box.center.x, box.center.y, box.size.width, box.size.height, box.angle );
-        code = cvtest::TS::FAIL_INVALID_OUTPUT;
-        goto _exit_;
-    }
-
-    box.angle = (float)(90-box.angle);
-    if( box.angle < 0 )
-        box.angle += 360;
-    if( box.angle > 360 )
-        box.angle -= 360;
-
-    if( fabs(box.center.x - box0.center.x) > 3 ||
-        fabs(box.center.y - box0.center.y) > 3 ||
-        fabs(box.size.width - box0.size.width) > 0.1*fabs(box0.size.width) ||
-        fabs(box.size.height - box0.size.height) > 0.1*fabs(box0.size.height) )
-    {
-        ts->printf( cvtest::TS::LOG, "The computed ellipse center and/or size are incorrect:\n\t"
-            "(x=%.1f,y=%.1f,w=%.1f,h=%.1f), while it should be (x=%.1f,y=%.1f,w=%.1f,h=%.1f)\n",
-            box.center.x, box.center.y, box.size.width, box.size.height,
-            box0.center.x, box0.center.y, box0.size.width, box0.size.height );
-        code = cvtest::TS::FAIL_BAD_ACCURACY;
-        goto _exit_;
-    }
-
-    diff_angle = fabs(box0.angle - box.angle);
-    diff_angle = MIN( diff_angle, fabs(diff_angle - 360));
-    diff_angle = MIN( diff_angle, fabs(diff_angle - 180));
-
-    if( box0.size.height >= 1.3*box0.size.width && diff_angle > 30 )
-    {
-        ts->printf( cvtest::TS::LOG, "Incorrect ellipse angle (=%1.f, should be %1.f)\n",
-            box.angle, box0.angle );
-        code = cvtest::TS::FAIL_BAD_ACCURACY;
-        goto _exit_;
-    }
-
-_exit_:
-
-#if 0
-    if( code < 0 )
-    {
-    cvNamedWindow( "test", 0 );
-    IplImage* img = cvCreateImage( cvSize(cvRound(low_high_range*4),
-        cvRound(low_high_range*4)), 8, 3 );
-    cvZero( img );
-
-    box.center.x += (float)low_high_range*2;
-    box.center.y += (float)low_high_range*2;
-    cvEllipseBox( img, box, CV_RGB(255,0,0), 3, 8 );
-
-    for( int i = 0; i < points2->rows + points2->cols - 1; i++ )
-    {
-        CvPoint pt;
-        pt.x = cvRound(points2->data.fl[i*2] + low_high_range*2);
-        pt.y = cvRound(points2->data.fl[i*2+1] + low_high_range*2);
-        cvCircle( img, pt, 1, CV_RGB(255,255,255), -1, 8 );
-    }
-
-    cvShowImage( "test", img );
-    cvReleaseImage( &img );
-    cvWaitKey(0);
-    }
-#endif
-
-    if( code < 0 )
-    {
-        ts->set_failed_test_info( code );
-    }
-    return code;
-}
-
-
-class CV_FitEllipseSmallTest : public cvtest::BaseTest
-{
-public:
-    CV_FitEllipseSmallTest() {}
-    ~CV_FitEllipseSmallTest() {}
-protected:
-    void run(int)
-    {
-        Size sz(50, 50);
-        vector<vector<Point> > c;
-        c.push_back(vector<Point>());
-        int scale = 1;
-        Point ofs = Point(0,0);//sz.width/2, sz.height/2) - Point(4,4)*scale;
-        c[0].push_back(Point(2, 0)*scale+ofs);
-        c[0].push_back(Point(0, 2)*scale+ofs);
-        c[0].push_back(Point(0, 6)*scale+ofs);
-        c[0].push_back(Point(2, 8)*scale+ofs);
-        c[0].push_back(Point(6, 8)*scale+ofs);
-        c[0].push_back(Point(8, 6)*scale+ofs);
-        c[0].push_back(Point(8, 2)*scale+ofs);
-        c[0].push_back(Point(6, 0)*scale+ofs);
-
-        RotatedRect e = fitEllipse(c[0]);
-        CV_Assert( fabs(e.center.x - 4) <= 1. &&
-                   fabs(e.center.y - 4) <= 1. &&
-                   fabs(e.size.width - 9) <= 1. &&
-                   fabs(e.size.height - 9) <= 1. );
-    }
-};
-
-
-// Regression test for incorrect fitEllipse result reported in Bug #3989
-// Check edge cases for rotation angles of ellipse ([-180, 90, 0, 90, 180] degrees)
-class CV_FitEllipseParallelTest : public CV_FitEllipseTest
-{
-public:
-    CV_FitEllipseParallelTest();
-    ~CV_FitEllipseParallelTest();
-protected:
-    void generate_point_set( void* points );
-    void run_func(void);
-    Mat pointsMat;
-};
-
-CV_FitEllipseParallelTest::CV_FitEllipseParallelTest()
-{
-    min_ellipse_size = 5;
-}
-
-void CV_FitEllipseParallelTest::generate_point_set( void* )
-{
-    RNG& rng = ts->get_rng();
-    int height = (int)(MAX(high.val[0] - low.val[0], min_ellipse_size));
-    int width = (int)(MAX(high.val[1] - low.val[1], min_ellipse_size));
-    const int angle = ( (cvtest::randInt(rng) % 5) - 2 ) * 90;
-    const int dim = max(height, width);
-    const Point center = Point(dim*2, dim*2);
-
-    if( width > height )
-    {
-        int t;
-        CV_SWAP( width, height, t );
-    }
-
-    Mat image = Mat::zeros(dim*4, dim*4, CV_8UC1);
-    ellipse(image, center, Size(height, width), angle,
-            0, 360, Scalar(255, 0, 0), 1, 8);
-
-    box0.center.x = (float)center.x;
-    box0.center.y = (float)center.y;
-    box0.size.width = (float)width*2;
-    box0.size.height = (float)height*2;
-    box0.angle = (float)angle;
-
-    vector<vector<Point> > contours;
-    findContours(image, contours,  RETR_EXTERNAL,  CHAIN_APPROX_NONE);
-    Mat(contours[0]).convertTo(pointsMat, CV_32F);
-}
-
-void CV_FitEllipseParallelTest::run_func()
-{
-    box = cv::fitEllipse(pointsMat);
-}
-
-CV_FitEllipseParallelTest::~CV_FitEllipseParallelTest(){
-    pointsMat.release();
-}
-
-/****************************************************************************************\
-*                                   FitLine Test                                         *
-\****************************************************************************************/
-
-class CV_FitLineTest : public CV_BaseShapeDescrTest
-{
-public:
-    CV_FitLineTest();
-
-protected:
-    int prepare_test_case( int test_case_idx );
-    void generate_point_set( void* points );
-    void run_func(void);
-    int validate_test_results( int test_case_idx );
-    double max_noise;
-    AutoBuffer<float> line, line0;
-    int dist_type;
-    double reps, aeps;
-};
-
-
-CV_FitLineTest::CV_FitLineTest()
-{
-    min_log_size = 5; // for robust line fitting a dozen of points is needed at least
-    max_log_size = 10;
-    max_noise = 0.05;
-}
-
-void CV_FitLineTest::generate_point_set( void* pointsSet )
-{
-    RNG& rng = ts->get_rng();
-    int i, k, n, total, point_type;
-    CvSeqReader reader;
-    uchar* data = 0;
-    double s = 0;
-
-    n = dims;
-    for( k = 0; k < n; k++ )
-    {
-        line0[k+n] = (float)((low.val[k] + high.val[k])*0.5);
-        line0[k] = (float)(high.val[k] - low.val[k]);
-        if( cvtest::randInt(rng) % 2 )
-            line0[k] = -line0[k];
-        s += (double)line0[k]*line0[k];
-    }
-
-    s = 1./sqrt(s);
-    for( k = 0; k < n; k++ )
-        line0[k] = (float)(line0[k]*s);
-
-    memset( &reader, 0, sizeof(reader) );
-
-    if( CV_IS_SEQ(pointsSet) )
-    {
-        CvSeq* ptseq = (CvSeq*)pointsSet;
-        total = ptseq->total;
-        point_type = CV_MAT_DEPTH(CV_SEQ_ELTYPE(ptseq));
-        cvStartReadSeq( ptseq, &reader );
-    }
-    else
-    {
-        CvMat* ptm = (CvMat*)pointsSet;
-        CV_Assert( CV_IS_MAT(ptm) && CV_IS_MAT_CONT(ptm->type) );
-        total = ptm->rows + ptm->cols - 1;
-        point_type = CV_MAT_DEPTH(CV_MAT_TYPE(ptm->type));
-        data = ptm->data.ptr;
-    }
-
-    for( i = 0; i < total; i++ )
-    {
-        int* pi;
-        float* pf;
-        float p[4], t;
-        if( reader.ptr )
-        {
-            pi = (int*)reader.ptr;
-            pf = (float*)reader.ptr;
-            CV_NEXT_SEQ_ELEM( reader.seq->elem_size, reader );
-        }
-        else
-        {
-            pi = (int*)data + i*n;
-            pf = (float*)data + i*n;
-        }
-
-        t = (float)((cvtest::randReal(rng)-0.5)*low_high_range*2);
-
-        for( k = 0; k < n; k++ )
-        {
-            p[k] = (float)((cvtest::randReal(rng)-0.5)*max_noise*2 + t*line0[k] + line0[k+n]);
-
-            if( point_type == CV_32S )
-                pi[k] = cvRound(p[k]);
-            else
-                pf[k] = p[k];
-        }
-    }
-}
-
-int CV_FitLineTest::prepare_test_case( int test_case_idx )
-{
-    RNG& rng = ts->get_rng();
-    dims = cvtest::randInt(rng) % 2 + 2;
-    line.allocate(dims * 2);
-    line0.allocate(dims * 2);
-    min_log_size = MAX(min_log_size,5);
-    max_log_size = MAX(min_log_size,max_log_size);
-    int code = CV_BaseShapeDescrTest::prepare_test_case( test_case_idx );
-    dist_type = cvtest::randInt(rng) % 6 + 1;
-    dist_type += dist_type == CV_DIST_C;
-    reps = 0.1; aeps = 0.01;
-    return code;
-}
-
-
-void CV_FitLineTest::run_func()
-{
-    if(!test_cpp)
-        cvFitLine( points, dist_type, 0, reps, aeps, line.data());
-    else if(dims == 2)
-        cv::fitLine(cv::cvarrToMat(points), (cv::Vec4f&)line[0], dist_type, 0, reps, aeps);
-    else
-        cv::fitLine(cv::cvarrToMat(points), (cv::Vec6f&)line[0], dist_type, 0, reps, aeps);
-}
-
-int CV_FitLineTest::validate_test_results( int test_case_idx )
-{
-    int code = CV_BaseShapeDescrTest::validate_test_results( test_case_idx );
-    int k, max_k = 0;
-    double vec_diff = 0, t;
-
-    //std::cout << dims << " " << Mat(1, dims*2, CV_32FC1, line.data()) << " " << Mat(1, dims, CV_32FC1, line0.data()) << std::endl;
-
-    for( k = 0; k < dims*2; k++ )
-    {
-        if( cvIsNaN(line[k]) || cvIsInf(line[k]) )
-        {
-            ts->printf( cvtest::TS::LOG, "Some of the computed line parameters are invalid (line[%d]=%g)\n",
-                k, line[k] );
-            code = cvtest::TS::FAIL_INVALID_OUTPUT;
-            goto _exit_;
-        }
-    }
-
-    if( fabs(line0[1]) > fabs(line0[0]) )
-        max_k = 1;
-    if( fabs(line0[dims-1]) > fabs(line0[max_k]) )
-        max_k = dims-1;
-    if( line0[max_k] < 0 )
-        for( k = 0; k < dims; k++ )
-            line0[k] = -line0[k];
-    if( line[max_k] < 0 )
-        for( k = 0; k < dims; k++ )
-            line[k] = -line[k];
-
-    for( k = 0; k < dims; k++ )
-    {
-        double dt = line[k] - line0[k];
-        vec_diff += dt*dt;
-    }
-
-    if( sqrt(vec_diff) > 0.05 )
-    {
-        if( dims == 2 )
-            ts->printf( cvtest::TS::LOG,
-                "The computed line vector (%.2f,%.2f) is different from the actual (%.2f,%.2f)\n",
-                line[0], line[1], line0[0], line0[1] );
-        else
-            ts->printf( cvtest::TS::LOG,
-                "The computed line vector (%.2f,%.2f,%.2f) is different from the actual (%.2f,%.2f,%.2f)\n",
-                line[0], line[1], line[2], line0[0], line0[1], line0[2] );
-        code = cvtest::TS::FAIL_BAD_ACCURACY;
-        goto _exit_;
-    }
-
-    t = (line[max_k+dims] - line0[max_k+dims])/line0[max_k];
-    for( k = 0; k < dims; k++ )
-    {
-        double p = line0[k+dims] + t*line0[k] - line[k+dims];
-        vec_diff += p*p;
-    }
-
-    if( sqrt(vec_diff) > 1*MAX(fabs(t),1) )
-    {
-        if( dims == 2 )
-            ts->printf( cvtest::TS::LOG,
-                "The computed line point (%.2f,%.2f) is too far from the actual line\n",
-                line[2]+line0[2], line[3]+line0[3] );
-        else
-            ts->printf( cvtest::TS::LOG,
-                "The computed line point (%.2f,%.2f,%.2f) is too far from the actual line\n",
-                line[3]+line0[3], line[4]+line0[4], line[5]+line0[5] );
-        code = cvtest::TS::FAIL_BAD_ACCURACY;
-        goto _exit_;
-    }
-
-_exit_:
-
-    if( code < 0 )
-    {
-        ts->set_failed_test_info( code );
-    }
-    return code;
-}
-
 /****************************************************************************************\
-*                                   ContourMoments Test                                  *
+*                                 minEnclosingCircle Test 3                              *
 \****************************************************************************************/
 
-
-static void
-cvTsGenerateTousledBlob( CvPoint2D32f center, CvSize2D32f axes,
-    double max_r_scale, double angle, CvArr* points, RNG& rng )
-{
-    int i, total, point_type;
-    uchar* data = 0;
-    CvSeqReader reader;
-    memset( &reader, 0, sizeof(reader) );
-
-    if( CV_IS_SEQ(points) )
-    {
-        CvSeq* ptseq = (CvSeq*)points;
-        total = ptseq->total;
-        point_type = CV_SEQ_ELTYPE(ptseq);
-        cvStartReadSeq( ptseq, &reader );
-    }
-    else
-    {
-        CvMat* ptm = (CvMat*)points;
-        CV_Assert( CV_IS_MAT(ptm) && CV_IS_MAT_CONT(ptm->type) );
-        total = ptm->rows + ptm->cols - 1;
-        point_type = CV_MAT_TYPE(ptm->type);
-        data = ptm->data.ptr;
-    }
-
-    CV_Assert( point_type == CV_32SC2 || point_type == CV_32FC2 );
-
-    for( i = 0; i < total; i++ )
-    {
-        CvPoint* pp;
-        Point2f p;
-
-        double phi0 = 2*CV_PI*i/total;
-        double phi = CV_PI*angle/180.;
-        double t = cvtest::randReal(rng)*max_r_scale + (1 - max_r_scale);
-        double ta = axes.height*t;
-        double tb = axes.width*t;
-        double c0 = cos(phi0)*ta, s0 = sin(phi0)*tb;
-        double c = cos(phi), s = sin(phi);
-        p.x = (float)(c0*c - s0*s + center.x);
-        p.y = (float)(c0*s + s0*c + center.y);
-
-        if( reader.ptr )
-        {
-            pp = (CvPoint*)reader.ptr;
-            CV_NEXT_SEQ_ELEM( sizeof(*pp), reader );
-        }
-        else
-            pp = ((CvPoint*)data) + i;
-
-        if( point_type == CV_32SC2 )
-        {
-            pp->x = cvRound(p.x);
-            pp->y = cvRound(p.y);
-        }
-        else
-            *(CvPoint2D32f*)pp = cvPoint2D32f(p);
-    }
-}
-
-
-class CV_ContourMomentsTest : public CV_BaseShapeDescrTest
+TEST(minEnclosingCircle, basic_test)
 {
-public:
-    CV_ContourMomentsTest();
-
-protected:
-    int prepare_test_case( int test_case_idx );
-    void generate_point_set( void* points );
-    void run_func(void);
-    int validate_test_results( int test_case_idx );
-    CvMoments moments0, moments;
-    double area0, area;
-    Size2f axes;
+    vector<Point2f> pts;
+    pts.push_back(Point2f(0, 0));
+    pts.push_back(Point2f(10, 0));
+    pts.push_back(Point2f(5, 1));
+    const float EPS = 1.0e-3f;
     Point2f center;
-    int max_max_r_scale;
-    double max_r_scale, angle;
-    Size img_size;
-};
-
-
-CV_ContourMomentsTest::CV_ContourMomentsTest()
-{
-    min_log_size = 3;
-    max_log_size = 8;
-    max_max_r_scale = 15;
-    low_high_range = 200;
-    enable_flt_points = false;
-}
-
-
-void CV_ContourMomentsTest::generate_point_set( void* pointsSet )
-{
-    RNG& rng = ts->get_rng();
-    float max_sz;
-
-    axes.width = (float)((cvtest::randReal(rng)*0.9 + 0.1)*low_high_range);
-    axes.height = (float)((cvtest::randReal(rng)*0.9 + 0.1)*low_high_range);
-    max_sz = MAX(axes.width, axes.height);
-
-    img_size.width = img_size.height = cvRound(low_high_range*2.2);
-
-    center.x = (float)(img_size.width*0.5 + (cvtest::randReal(rng)-0.5)*(img_size.width - max_sz*2)*0.8);
-    center.y = (float)(img_size.height*0.5 + (cvtest::randReal(rng)-0.5)*(img_size.height - max_sz*2)*0.8);
-
-    CV_Assert( 0 < center.x - max_sz && center.x + max_sz < img_size.width &&
-               0 < center.y - max_sz && center.y + max_sz < img_size.height );
-
-    max_r_scale = cvtest::randReal(rng)*max_max_r_scale*0.01;
-    angle = cvtest::randReal(rng)*360;
-
-    cvTsGenerateTousledBlob( cvPoint2D32f(center), cvSize2D32f(axes), max_r_scale, angle, pointsSet, rng );
-
-    if( points1 )
-        points1->flags = CV_SEQ_MAGIC_VAL + CV_SEQ_POLYGON;
-}
-
-
-int CV_ContourMomentsTest::prepare_test_case( int test_case_idx )
-{
-    min_log_size = MAX(min_log_size,3);
-    max_log_size = MIN(max_log_size,8);
-    max_log_size = MAX(min_log_size,max_log_size);
-    int code = CV_BaseShapeDescrTest::prepare_test_case( test_case_idx );
-    return code;
-}
-
-
-void CV_ContourMomentsTest::run_func()
-{
-    if(!test_cpp)
-    {
-        cvMoments( points, &moments );
-        area = cvContourArea( points );
-    }
-    else
-    {
-        moments = cvMoments(cv::moments(cv::cvarrToMat(points)));
-        area = cv::contourArea(cv::cvarrToMat(points));
-    }
-}
-
-
-int CV_ContourMomentsTest::validate_test_results( int test_case_idx )
-{
-    int code = CV_BaseShapeDescrTest::validate_test_results( test_case_idx );
-    int i, n = (int)(sizeof(moments)/sizeof(moments.inv_sqrt_m00));
-    CvMat* img = cvCreateMat( img_size.height, img_size.width, CV_8UC1 );
-    CvPoint* pt = (CvPoint*)points2->data.i;
-    int count = points2->cols + points2->rows - 1;
-    double max_v0 = 0;
-
-    cvZero(img);
-    cvFillPoly( img, &pt, &count, 1, cvScalarAll(1));
-    cvMoments( img, &moments0 );
-
-    for( i = 0; i < n; i++ )
-    {
-        double t = fabs((&moments0.m00)[i]);
-        max_v0 = MAX(max_v0, t);
-    }
-
-    for( i = 0; i <= n; i++ )
-    {
-        double v = i < n ? (&moments.m00)[i] : area;
-        double v0 = i < n ? (&moments0.m00)[i] : moments0.m00;
-
-        if( cvIsNaN(v) || cvIsInf(v) )
-        {
-            ts->printf( cvtest::TS::LOG,
-                "The contour %s is invalid (=%g)\n", i < n ? "moment" : "area", v );
-            code = cvtest::TS::FAIL_INVALID_OUTPUT;
-            break;
-        }
-
-        if( fabs(v - v0) > 0.1*max_v0 )
-        {
-            ts->printf( cvtest::TS::LOG,
-                "The computed contour %s is %g, while it should be %g\n",
-                i < n ? "moment" : "area", v, v0 );
-            code = cvtest::TS::FAIL_BAD_ACCURACY;
-            break;
-        }
-    }
-
-    if( code < 0 )
-    {
-        ts->set_failed_test_info( code );
-    }
-
-    cvReleaseMat( &img );
-    return code;
-}
-
+    float radius;
 
-////////////////////////////////////// Perimeter/Area/Slice test ///////////////////////////////////
+    // pts[2] is within the circle with diameter pts[0] - pts[1].
+    //        2
+    // 0             1
+    // NB: The triangle is obtuse, so the only pts[0] and pts[1] are on the circle.
+    minEnclosingCircle(pts, center, radius);
+    EXPECT_NEAR(center.x, 5, EPS);
+    EXPECT_NEAR(center.y, 0, EPS);
+    EXPECT_NEAR(5, radius, EPS);
 
-class CV_PerimeterAreaSliceTest : public cvtest::BaseTest
-{
-public:
-    CV_PerimeterAreaSliceTest();
-    ~CV_PerimeterAreaSliceTest();
-protected:
-    void run(int);
-};
+    // pts[2] is on the circle with diameter pts[0] - pts[1].
+    //  2
+    // 0 1
+    pts[2] = Point2f(5, 5);
+    minEnclosingCircle(pts, center, radius);
+    EXPECT_NEAR(center.x, 5, EPS);
+    EXPECT_NEAR(center.y, 0, EPS);
+    EXPECT_NEAR(5, radius, EPS);
 
-CV_PerimeterAreaSliceTest::CV_PerimeterAreaSliceTest()
-{
-}
-CV_PerimeterAreaSliceTest::~CV_PerimeterAreaSliceTest() {}
+    // pts[2] is outside the circle with diameter pts[0] - pts[1].
+    //   2
+    //
+    //
+    // 0   1
+    // NB: The triangle is acute, so all 3 points are on the circle.
+    pts[2] = Point2f(5, 10);
+    minEnclosingCircle(pts, center, radius);
+    EXPECT_NEAR(center.x, 5, EPS);
+    EXPECT_NEAR(center.y, 3.75, EPS);
+    EXPECT_NEAR(6.25f, radius, EPS);
 
-void CV_PerimeterAreaSliceTest::run( int )
-{
-    Ptr<CvMemStorage> storage(cvCreateMemStorage());
-    RNG& rng = theRNG();
-    const double min_r = 90, max_r = 120;
+    // The 3 points are colinear.
+    pts[2] = Point2f(3, 0);
+    minEnclosingCircle(pts, center, radius);
+    EXPECT_NEAR(center.x, 5, EPS);
+    EXPECT_NEAR(center.y, 0, EPS);
+    EXPECT_NEAR(5, radius, EPS);
 
-    for( int i = 0; i < 100; i++ )
-    {
-        ts->update_context( this, i, true );
-        int n = rng.uniform(3, 30);
-        cvClearMemStorage(storage);
-        CvSeq* contour = cvCreateSeq(CV_SEQ_POLYGON, sizeof(CvSeq), sizeof(CvPoint), storage);
-        double dphi = CV_PI*2/n;
-        Point center;
-        center.x = rng.uniform(cvCeil(max_r), cvFloor(640-max_r));
-        center.y = rng.uniform(cvCeil(max_r), cvFloor(480-max_r));
-
-        for( int j = 0; j < n; j++ )
-        {
-            CvPoint pt = CV_STRUCT_INITIALIZER;
-            double r = rng.uniform(min_r, max_r);
-            double phi = j*dphi;
-            pt.x = cvRound(center.x + r*cos(phi));
-            pt.y = cvRound(center.y - r*sin(phi));
-            cvSeqPush(contour, &pt);
-        }
+    // 2 points are the same.
+    pts[2] = pts[1];
+    minEnclosingCircle(pts, center, radius);
+    EXPECT_NEAR(center.x, 5, EPS);
+    EXPECT_NEAR(center.y, 0, EPS);
+    EXPECT_NEAR(5, radius, EPS);
 
-        CvSlice slice = {0, 0};
-        for(;;)
-        {
-            slice.start_index = rng.uniform(-n/2, 3*n/2);
-            slice.end_index = rng.uniform(-n/2, 3*n/2);
-            int len = cvSliceLength(slice, contour);
-            if( len > 2 )
-                break;
-        }
-        CvSeq *cslice = cvSeqSlice(contour, slice);
-        /*printf( "%d. (%d, %d) of %d, length = %d, length1 = %d\n",
-               i, slice.start_index, slice.end_index,
-               contour->total, cvSliceLength(slice, contour), cslice->total );
-
-        double area0 = cvContourArea(cslice);
-        double area1 = cvContourArea(contour, slice);
-        if( area0 != area1 )
-        {
-            ts->printf(cvtest::TS::LOG,
-                       "The contour area slice is computed differently (%g vs %g)\n", area0, area1 );
-            ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
-            return;
-        }*/
-
-        double len0 = cvArcLength(cslice, CV_WHOLE_SEQ, 1);
-        double len1 = cvArcLength(contour, slice, 1);
-        if( len0 != len1 )
-        {
-            ts->printf(cvtest::TS::LOG,
-                       "The contour arc length is computed differently (%g vs %g)\n", len0, len1 );
-            ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
-            return;
-        }
-    }
-    ts->set_failed_test_info(cvtest::TS::OK);
+    // 3 points are the same.
+    pts[0] = pts[1];
+    minEnclosingCircle(pts, center, radius);
+    EXPECT_NEAR(center.x, 10, EPS);
+    EXPECT_NEAR(center.y, 0, EPS);
+    EXPECT_NEAR(0, radius, EPS);
 }
 
-
-TEST(Imgproc_ConvexHull, accuracy) { CV_ConvHullTest test; test.safe_run(); }
-TEST(Imgproc_MinAreaRect, accuracy) { CV_MinAreaRectTest test; test.safe_run(); }
-TEST(Imgproc_MinTriangle, accuracy) { CV_MinTriangleTest test; test.safe_run(); }
-TEST(Imgproc_MinCircle, accuracy) { CV_MinCircleTest test; test.safe_run(); }
-TEST(Imgproc_MinCircle2, accuracy) { CV_MinCircleTest2 test; test.safe_run(); }
-TEST(Imgproc_ContourPerimeter, accuracy) { CV_PerimeterTest test; test.safe_run(); }
-TEST(Imgproc_FitEllipse, accuracy) { CV_FitEllipseTest test; test.safe_run(); }
-TEST(Imgproc_FitEllipse, parallel) { CV_FitEllipseParallelTest test; test.safe_run(); }
-TEST(Imgproc_FitLine, accuracy) { CV_FitLineTest test; test.safe_run(); }
-TEST(Imgproc_ContourMoments, accuracy) { CV_ContourMomentsTest test; test.safe_run(); }
-TEST(Imgproc_ContourPerimeterSlice, accuracy) { CV_PerimeterAreaSliceTest test; test.safe_run(); }
-TEST(Imgproc_FitEllipse, small) { CV_FitEllipseSmallTest test; test.safe_run(); }
-
-
+TEST(Imgproc_minEnclosingCircle, regression_16051) {
+    vector<Point2f> pts;
+    pts.push_back(Point2f(85, 1415));
+    pts.push_back(Point2f(87, 1415));
+    pts.push_back(Point2f(89, 1414));
+    pts.push_back(Point2f(89, 1414));
+    pts.push_back(Point2f(87, 1412));
+    Point2f center;
+    float radius;
+    minEnclosingCircle(pts, center, radius);
+    EXPECT_NEAR(center.x, 86.9f, 1e-3);
+    EXPECT_NEAR(center.y, 1414.1f, 1e-3);
+    EXPECT_NEAR(2.1024551f, radius, 1e-3);
+}
 
 PARAM_TEST_CASE(ConvexityDefects_regression_5908, bool, int)
 {
@@ -2484,5 +551,523 @@ TEST(Imgproc_minEnclosingTriangle, regression_mat_with_diff_channels)
     EXPECT_NO_THROW(minEnclosingTriangle(pointsNx1, triangle));
 }
 
+//==============================================================================
+
+typedef testing::TestWithParam<tuple<int, int>> fitLine_Modes;
+
+TEST_P(fitLine_Modes, accuracy)
+{
+    const int data_type = get<0>(GetParam());
+    const int dist_type = get<1>(GetParam());
+    const int CN = CV_MAT_CN(data_type);
+    const int res_type = CV_32FC(CN);
+
+    for (int ITER = 0; ITER < 20; ++ITER)
+    {
+        SCOPED_TRACE(cv::format("iteration %d", ITER));
+
+        Mat v0(1, 1, data_type), v1(1, 1, data_type); // pt = v0 + v1 * t
+        Mat v1n;
+
+        RNG& rng = TS::ptr()->get_rng();
+        cvtest::randUni(rng, v0, Scalar::all(1), Scalar::all(100));
+        cvtest::randUni(rng, v1, Scalar::all(1), Scalar::all(100));
+        normalize(v1, v1n, 1, 0, NORM_L2, res_type);
+        v0.convertTo(v0, res_type);
+        v1.convertTo(v1, res_type);
+
+        const int NUM = rng.uniform(30, 100);
+        Mat points(NUM, 1, data_type, Scalar::all(0));
+        for (int i = 0; i < NUM; ++i)
+        {
+            Mat pt = v0 + v1 * i;
+            if (CV_MAT_DEPTH(data_type) == CV_32F)
+            {
+                Mat noise = cvtest::randomMat(rng, Size(1, 1), res_type, -0.01, 0.01, false);
+                pt += noise;
+
+            }
+            pt.copyTo(points.row(i));
+        }
+
+        Mat line_;
+        cv::fitLine(points, line_, dist_type, 0, 0.1, 0.01);
+        Mat line = line_.reshape(points.channels(), 1);
+
+        // check result type and size
+        EXPECT_EQ(res_type, line.type());
+        EXPECT_EQ(Size(2, 1), line.size());
+
+        // check result pt1
+        const double angle = line.col(0).dot(v1n);
+        EXPECT_NEAR(abs(angle), 1, 1e-2);
+
+        // put result pt0 to the original equation (pt = v0 + v1 * t) and find "t"
+        Mat diff = line.col(1) - v0;
+        cv::divide(diff, v1, diff);
+        cv::divide(diff, diff.at<float>(0, 0), diff);
+        const Mat unit(1, 1, res_type, Scalar::all(1));
+        EXPECT_NEAR(cvtest::norm(diff, unit, NORM_L1), 0, 0.01);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(/**/,
+    fitLine_Modes,
+    testing::Combine(
+        testing::Values(CV_32FC2, CV_32FC3, CV_32SC2, CV_32SC3),
+        testing::Values(DIST_L1, DIST_L2, DIST_L12, DIST_FAIR, DIST_WELSCH, DIST_HUBER)));
+
+//==============================================================================
+
+inline float normAngle(float angle_deg)
+{
+    while (angle_deg < 0.f)
+        angle_deg += 180.f;
+    while (angle_deg > 180.f)
+        angle_deg -= 180.f;
+    if (abs(angle_deg - 180.f) < 0.01) // border case
+        angle_deg = 0.f;
+    return angle_deg;
+}
+
+inline float angleToDeg(float angle_rad)
+{
+    return angle_rad * 180.f / (float)M_PI;
+}
+
+inline float angleDiff(float a, float b)
+{
+    float res = a - b;
+    return normAngle(res);
+}
+
+typedef testing::TestWithParam<int> fitEllipse_Modes;
+
+TEST_P(fitEllipse_Modes, accuracy)
+{
+    const int data_type = GetParam();
+    const float int_scale = 1000.f;
+    const Size sz(1, 2);
+    const Matx22f rot {0.f, -1.f, 1.f, 0.f};
+    RNG& rng = TS::ptr()->get_rng();
+
+    for (int ITER = 0; ITER < 20; ++ITER)
+    {
+        SCOPED_TRACE(cv::format("iteration %d", ITER));
+
+        Mat f0(sz, CV_32FC1), f1(sz, CV_32FC1), f2(sz, CV_32FC1);
+        cvtest::randUni(rng, f0, Scalar::all(-100), Scalar::all(100));
+        cvtest::randUni(rng, f1, Scalar::all(-100), Scalar::all(100));
+        if (ITER % 4 == 0)
+        {
+            // 0/90 degrees case
+            f1.at<float>(0, 0) = 0.;
+        }
+        // f2 is orthogonal to f1 and scaled
+        f2 = rot * f1 * cvtest::randomDouble(0.01, 3);
+
+        const Point2f ref_center(f0.at<float>(0), f0.at<float>(1));
+        const Size2f ref_size(
+            (float)cvtest::norm(f1, NORM_L2) * 2.f,
+            (float)cvtest::norm(f2, NORM_L2) * 2.f);
+        const float ref_angle1 = angleToDeg(atan(f1.at<float>(1) / f1.at<float>(0)));
+        const float ref_angle2 = angleToDeg(atan(f2.at<float>(1) / f2.at<float>(0)));
+
+        const int NUM = rng.uniform(10, 30);
+        Mat points(NUM, 1, data_type, Scalar::all(0));
+        for (int i = 0; i < NUM; ++i)
+        {
+            Mat pt = f0 + f1 * sin(i) + f2 * cos(i);
+            pt = pt.reshape(2);
+            if (data_type == CV_32SC2)
+            {
+                pt.convertTo(points.row(i), CV_32SC2, int_scale);
+            }
+            else if (data_type == CV_32FC2)
+            {
+                pt.copyTo(points.row(i));
+            }
+            else
+            {
+                FAIL() << "unsupported data type: " << data_type;
+            }
+        }
+
+        RotatedRect res = cv::fitEllipse(points);
+
+        if (data_type == CV_32SC2)
+        {
+            res.center /= int_scale;
+            res.size = Size2f(res.size.width / int_scale, res.size.height / int_scale);
+        }
+        const bool sizeSwap = (res.size.width < res.size.height) != (ref_size.width < ref_size.height);
+        if (sizeSwap)
+        {
+            std::swap(res.size.width, res.size.height);
+        }
+        EXPECT_FALSE(res.size.empty());
+        EXPECT_POINT2_NEAR(res.center, ref_center, 0.01);
+        const float sizeDiff = (data_type == CV_32FC2) ? 0.1f : 1.f;
+        EXPECT_NEAR(min(res.size.width, res.size.height), min(ref_size.width, ref_size.height), sizeDiff);
+        EXPECT_NEAR(max(res.size.width, res.size.height), max(ref_size.width, ref_size.height), sizeDiff);
+        if (sizeSwap)
+        {
+            EXPECT_LE(angleDiff(ref_angle2, res.angle), 0.1);
+        }
+        else
+        {
+            EXPECT_LE(angleDiff(ref_angle1, res.angle), 0.1);
+        }
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(/**/,
+    fitEllipse_Modes,
+        testing::Values(CV_32FC2, CV_32SC2));
+
+//==============================================================================
+
+TEST(fitEllipse, small)
+{
+    Size sz(50, 50);
+    vector<vector<Point> > c;
+    c.push_back(vector<Point>());
+    int scale = 1;
+    Point ofs = Point(0,0);//sz.width/2, sz.height/2) - Point(4,4)*scale;
+    c[0].push_back(Point(2, 0)*scale+ofs);
+    c[0].push_back(Point(0, 2)*scale+ofs);
+    c[0].push_back(Point(0, 6)*scale+ofs);
+    c[0].push_back(Point(2, 8)*scale+ofs);
+    c[0].push_back(Point(6, 8)*scale+ofs);
+    c[0].push_back(Point(8, 6)*scale+ofs);
+    c[0].push_back(Point(8, 2)*scale+ofs);
+    c[0].push_back(Point(6, 0)*scale+ofs);
+
+    RotatedRect e = cv::fitEllipse(c[0]);
+
+    EXPECT_NEAR(e.center.x, 4, 1.f);
+    EXPECT_NEAR(e.center.y, 4, 1.f);
+    EXPECT_NEAR(e.size.width, 9, 1.);
+    EXPECT_NEAR(e.size.height, 9, 1.f);
+}
+
+//==============================================================================
+
+// points stored in rows
+inline static int findPointInMat(const Mat & data, const Mat & point)
+{
+    for (int i = 0; i < data.rows; ++i)
+        if (cvtest::norm(data.row(i), point, NORM_L1) == 0)
+            return i;
+    return -1;
+}
+
+// > 0 - "pt" is to the right of AB
+// < 0 - "pt" is to the left of AB
+// points stored in rows
+inline static double getSide(const Mat & ptA, const Mat & ptB, const Mat & pt)
+{
+    Mat d0 = pt - ptA, d1 = ptB - pt, prod;
+    vconcat(d0, d1, prod);
+    prod = prod.reshape(1);
+    if (prod.depth() == CV_32S)
+        prod.convertTo(prod, CV_32F);
+    return determinant(prod);
+}
+
+typedef testing::TestWithParam<perf::MatDepth> convexHull_Modes;
+
+TEST_P(convexHull_Modes, accuracy)
+{
+    const int data_type = CV_MAKE_TYPE(GetParam(), 2);
+    RNG & rng = TS::ptr()->get_rng();
+
+    for (int ITER = 0; ITER < 20; ++ITER)
+    {
+        SCOPED_TRACE(cv::format("iteration %d", ITER));
+
+        const int NUM = cvtest::randomInt(5, 100);
+        Mat points(NUM, 1, data_type, Scalar::all(0));
+        cvtest::randUni(rng, points, Scalar(-10), Scalar::all(10));
+
+        Mat hull, c_hull, indexes;
+        cv::convexHull(points, hull, false, true); // default parameters
+        cv::convexHull(points, c_hull, true, true); // counter-clockwise
+        cv::convexHull(points, indexes, false, false); // point indexes
+
+        ASSERT_EQ(hull.size().width, 1);
+        ASSERT_GE(hull.size().height, 3);
+        ASSERT_EQ(hull.size(), c_hull.size());
+        ASSERT_EQ(hull.size(), indexes.size());
+
+        // find shift between hull and counter-clockwise hull
+        const int c_diff = findPointInMat(hull, c_hull.row(0));
+        ASSERT_NE(c_diff, -1);
+
+        const int sz = (int)hull.total();
+        for (int i = 0; i < sz; ++i)
+        {
+            SCOPED_TRACE(cv::format("vertex %d", i));
+
+            Mat prev = (i == 0) ? hull.row(sz - 1) : hull.row(i - 1);
+            Mat cur = hull.row(i);
+            Mat next = (i != sz - 1) ? hull.row(i + 1) : hull.row(0);
+            // 1. "cur' is one of points
+            EXPECT_NE(findPointInMat(points, cur), -1);
+            // 2. convexity: "cur" is on right side of "prev - next" edge
+            EXPECT_GE(getSide(prev, next, cur), 0);
+            // 3. all points are inside polygon - on the left side of "cur - next" edge
+            for (int j = 0; j < points.rows; ++j)
+            {
+                SCOPED_TRACE(cv::format("point %d", j));
+                EXPECT_LE(getSide(cur, next, points.row(j)), 0);
+            }
+            // check counter-clockwise hull
+            const int c_idx = (sz - i + c_diff) % sz;
+            Mat c_cur = c_hull.row(c_idx);
+            EXPECT_MAT_NEAR(cur, c_cur, 0);
+            // check indexed hull
+            const int pt_index = indexes.at<int>(i);
+            EXPECT_MAT_NEAR(cur, points.row(pt_index), 0);
+        }
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(/**/,
+    convexHull_Modes,
+        testing::Values(CV_32F, CV_32S));
+
+
+//==============================================================================
+
+typedef testing::TestWithParam<perf::MatDepth> minAreaRect_Modes;
+
+TEST_P(minAreaRect_Modes, accuracy)
+{
+    const int data_type = CV_MAKE_TYPE(GetParam(), 2);
+    RNG & rng = TS::ptr()->get_rng();
+    for (int ITER = 0; ITER < 20; ++ITER)
+    {
+        SCOPED_TRACE(cv::format("iteration %d", ITER));
+
+        const int NUM = cvtest::randomInt(5, 100);
+        Mat points(NUM, 1, data_type, Scalar::all(0));
+        cvtest::randUni(rng, points, Scalar(-10), Scalar::all(10));
+
+        const RotatedRect res = cv::minAreaRect(points);
+        Point2f box_pts[4] {};
+        res.points(box_pts);
+
+        // check that the box contains all the points - all on one side
+        double common_side = 0.;
+        bool edgeHasPoint[4] {0};
+        for (int i = 0; i < 4; ++i)
+        {
+            const int j = (i == 3) ? 0 : i + 1;
+            Mat cur(1, 1, CV_32FC2, box_pts + i);
+            Mat next(1, 1, CV_32FC2, box_pts + j);
+            for (int k = 0; k < points.rows; ++k)
+            {
+                SCOPED_TRACE(cv::format("point %d", j));
+                Mat one_point;
+                points.row(k).convertTo(one_point, CV_32FC2);
+                const double side = getSide(cur, next, one_point);
+                if (abs(side) < 0.01) // point on edge - no need to check
+                {
+                    edgeHasPoint[i] = true;
+                    continue;
+                }
+                if (common_side == 0.) // initial state
+                {
+                    common_side = side > 0 ? 1. : -1.; // only sign matters
+                }
+                else
+                {
+                    EXPECT_EQ(common_side > 0, side > 0) << common_side << ", " << side;
+                }
+            }
+        }
+        EXPECT_TRUE(edgeHasPoint[0] && edgeHasPoint[1] && edgeHasPoint[2] && edgeHasPoint[3]);
+    }
+
+}
+
+INSTANTIATE_TEST_CASE_P(/**/,
+    minAreaRect_Modes,
+        testing::Values(CV_32F, CV_32S));
+
+
+//==============================================================================
+
+// true if "point" is on one of hull's edges
+inline static bool isPointOnHull(const Mat &hull, const Mat &point, const double thresh = 0.01)
+{
+    const int sz = hull.rows;
+    for (int k = 0; k < sz; ++k)
+    {
+        const double side = getSide(hull.row(k), hull.row(k == sz - 1 ? 0 : k + 1), point);
+        if (abs(side) < thresh)
+            return true;
+    }
+    return false;
+}
+
+// true if one of hull's edges touches "A-B"
+inline static bool isEdgeOnHull(const Mat &hull, const Mat &ptA, const Mat &ptB, const double thresh = 0.01)
+{
+    const int sz = hull.rows;
+    double prev_side = getSide(ptA, ptB, hull.row(sz - 1));
+    for (int k = 0; k < sz; ++k)
+    {
+        Mat cur = hull.row(k);
+        const double cur_side = getSide(ptA, ptB, cur);
+        if (abs(prev_side) < thresh && abs(cur_side) < thresh)
+            return true;
+        prev_side = cur_side;
+    }
+    return false;
+}
+
+typedef testing::TestWithParam<perf::MatDepth> minEnclosingTriangle_Modes;
+
+TEST_P(minEnclosingTriangle_Modes, accuracy)
+{
+    const int data_type = CV_MAKETYPE(GetParam(), 2);
+    RNG & rng = TS::ptr()->get_rng();
+    for (int ITER = 0; ITER < 20; ++ITER)
+    {
+        SCOPED_TRACE(cv::format("iteration %d", ITER));
+
+        const int NUM = cvtest::randomInt(5, 100);
+        Mat points(NUM, 1, data_type, Scalar::all(0));
+        cvtest::randUni(rng, points, Scalar::all(-100), Scalar::all(100));
+
+        Mat triangle;
+        const double area = cv::minEnclosingTriangle(points, triangle);
+
+        ASSERT_GT(area, 0.0001);
+        ASSERT_EQ(triangle.type(), CV_32FC2);
+        triangle = triangle.reshape(2, 1);
+        ASSERT_EQ(triangle.size(), Size(3, 1));
+
+        Mat hull;
+        cv::convexHull(points, hull);
+        hull.convertTo(hull, CV_32FC2);
+
+        // check that all points are enclosed by triangle sides
+        double commonSide = 0.;
+        bool hasEdgeOnHull = false;
+        for (int i = 0; i < 3; ++i)
+        {
+            SCOPED_TRACE(cv::format("edge %d", i));
+            const int j = (i == 2) ? 0 : i + 1;
+            Mat cur = triangle.col(i);
+            Mat next = triangle.col(j);
+            for (int k = 0; k < points.rows; ++k)
+            {
+                SCOPED_TRACE(cv::format("point %d", k));
+                Mat pt;
+                points.row(k).convertTo(pt, CV_32FC2);
+                const double side = getSide(cur, next, pt);
+                if (abs(side) < 0.01) // point on edge - no need to check
+                    continue;
+                if (commonSide == 0.f) // initial state
+                {
+                    commonSide = side > 0 ? 1.f : -1.f; // only sign matters
+                }
+                else
+                {
+                    // either on the same side or close to zero
+                    EXPECT_EQ(commonSide > 0, side > 0) << commonSide << ", side=" << side;
+                }
+            }
+
+            // triangle mid-points must be on the hull edges
+            const Mat midPoint = (cur + next) / 2;
+            EXPECT_TRUE(isPointOnHull(hull, midPoint));
+
+            // at least one of hull edges must be on tirangle edge
+            hasEdgeOnHull = hasEdgeOnHull || isEdgeOnHull(hull, cur, next);
+        }
+        EXPECT_TRUE(hasEdgeOnHull);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(/**/,
+    minEnclosingTriangle_Modes,
+        testing::Values(CV_32F, CV_32S));
+
+//==============================================================================
+
+typedef testing::TestWithParam<perf::MatDepth> minEnclosingCircle_Modes;
+
+TEST_P(minEnclosingCircle_Modes, accuracy)
+{
+    const int data_type = CV_MAKETYPE(GetParam(), 2);
+    RNG & rng = TS::ptr()->get_rng();
+    for (int ITER = 0; ITER < 20; ++ITER)
+    {
+        SCOPED_TRACE(cv::format("iteration %d", ITER));
+
+        const int NUM = cvtest::randomInt(5, 100);
+        Mat points(NUM, 1, data_type, Scalar::all(0)), fpoints;
+        cvtest::randUni(rng, points, Scalar::all(-100), Scalar::all(100));
+        points.convertTo(fpoints, CV_32FC2);
+
+        Point2f center {};
+        float radius = 0.f;
+        cv::minEnclosingCircle(points, center, radius);
+
+        vector<int> boundPts; // indexes
+        for (int i = 0; i < NUM; ++i)
+        {
+            Point2f pt = fpoints.at<Point2f>(i);
+            const double dist = cv::norm(pt - center);
+            EXPECT_LE(dist, radius);
+            if (abs(dist - radius) < 0.01)
+                boundPts.push_back(i);
+        }
+        // 2 points on diameter or at least 3 points on circle
+        EXPECT_GE(boundPts.size(), 2llu);
+
+        // 2 points on diameter
+        if (boundPts.size() == 2llu)
+        {
+            const Point2f diff = fpoints.at<Point2f>(boundPts[0]) - fpoints.at<Point2f>(boundPts[1]);
+            EXPECT_NEAR(cv::norm(diff), 2 * radius, 0.001);
+        }
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(/**/,
+    minEnclosingCircle_Modes,
+        testing::Values(CV_32F, CV_32S));
+
+//==============================================================================
+
+TEST(minEnclosingCircle, three_points)
+{
+    RNG & rng = TS::ptr()->get_rng();
+    Point2f center = Point2f(rng.uniform(0.0f, 1000.0f), rng.uniform(0.0f, 1000.0f));;
+    float radius = rng.uniform(0.0f, 500.0f);
+    float angle = (float)rng.uniform(0.0f, (float)(CV_2PI));
+    vector<Point2f> pts;
+    pts.push_back(center + Point2f(radius * cos(angle), radius * sin(angle)));
+    angle += (float)CV_PI;
+    pts.push_back(center + Point2f(radius * cos(angle), radius * sin(angle)));
+    float radius2 = radius * radius;
+    float x = rng.uniform(center.x - radius, center.x + radius);
+    float deltaX = x - center.x;
+    float upperBoundY = sqrt(radius2 - deltaX * deltaX);
+    float y = rng.uniform(center.y - upperBoundY, center.y + upperBoundY);
+    pts.push_back(Point2f(x, y));
+    // Find the minimum area enclosing circle
+    Point2f calcCenter;
+    float calcRadius;
+    cv::minEnclosingCircle(pts, calcCenter, calcRadius);
+    const float delta = (float)cv::norm(calcCenter - center) + abs(calcRadius - radius);
+    EXPECT_LE(delta, 1.f);
+}
+
 }} // namespace
 /* End of file. */
diff --git a/modules/imgproc/test/test_filter.cpp b/modules/imgproc/test/test_filter.cpp
index a6e45709f3..30423c4b45 100644
--- a/modules/imgproc/test/test_filter.cpp
+++ b/modules/imgproc/test/test_filter.cpp
@@ -470,7 +470,7 @@ void CV_DerivBaseTest::get_test_array_types_and_sizes( int test_case_idx,
     int sameDepth = cvtest::randInt(rng) % 2;
     types[OUTPUT][0] = types[REF_OUTPUT][0] = sameDepth ? depth : CV_MAKETYPE(depth==CV_8U?CV_16S:CV_32F,1);
     _aperture_size = (cvtest::randInt(rng)%5)*2 - 1;
-    sizes[INPUT][1] = aperture_size = cvSize(_aperture_size, _aperture_size);
+    sizes[INPUT][1] = aperture_size = Size(_aperture_size, _aperture_size);
 }
 
 
@@ -519,21 +519,21 @@ void CV_SobelTest::get_test_array_types_and_sizes( int test_case_idx,
     }
 
     if( _aperture_size < 0 )
-        aperture_size = cvSize(3, 3);
+        aperture_size = Size(3, 3);
     else if( _aperture_size == 1 )
     {
         if( dx == 0 )
-            aperture_size = cvSize(1, 3);
+            aperture_size = Size(1, 3);
         else if( dy == 0 )
-            aperture_size = cvSize(3, 1);
+            aperture_size = Size(3, 1);
         else
         {
             _aperture_size = 3;
-            aperture_size = cvSize(3, 3);
+            aperture_size = Size(3, 3);
         }
     }
     else
-        aperture_size = cvSize(_aperture_size, _aperture_size);
+        aperture_size = Size(_aperture_size, _aperture_size);
 
     sizes[INPUT][1] = aperture_size;
     anchor.x = aperture_size.width / 2;
@@ -647,10 +647,10 @@ void CV_LaplaceTest::get_test_array_types_and_sizes( int test_case_idx,
     {
         if( _aperture_size < 0 )
             _aperture_size = 1;
-        aperture_size = cvSize(3, 3);
+        aperture_size = Size(3, 3);
     }
     else
-        aperture_size = cvSize(_aperture_size, _aperture_size);
+        aperture_size = Size(_aperture_size, _aperture_size);
 
     sizes[INPUT][1] = aperture_size;
     anchor.x = aperture_size.width / 2;
@@ -1575,7 +1575,7 @@ CV_PreCornerDetectTest::CV_PreCornerDetectTest() : CV_FeatureSelBaseTest( 1 )
 
 void CV_PreCornerDetectTest::run_func()
 {
-    cvPreCornerDetect( test_array[INPUT][0], test_array[OUTPUT][0], aperture_size );
+    cv::preCornerDetect( test_mat[INPUT][0], test_mat[OUTPUT][0], aperture_size, BORDER_REPLICATE );
 }
 
 
diff --git a/modules/imgproc/test/test_imgwarp.cpp b/modules/imgproc/test/test_imgwarp.cpp
index e8840d231b..b468ebf757 100644
--- a/modules/imgproc/test/test_imgwarp.cpp
+++ b/modules/imgproc/test/test_imgwarp.cpp
@@ -39,6 +39,8 @@
 //
 //M*/
 
+#include "opencv2/ts/ocl_test.hpp"
+#include "opencv2/ts/ts_gtest.h"
 #include "test_precomp.hpp"
 
 namespace opencv_test { namespace {
@@ -768,8 +770,8 @@ void CV_RemapTest::fill_array( int test_case_idx, int i, int j, Mat& arr )
 
 void CV_RemapTest::run_func()
 {
-    cvRemap( test_array[INPUT][0], test_array[INPUT_OUTPUT][0],
-             test_array[INPUT][1], test_array[INPUT][2], interpolation );
+    cv::remap(test_mat[INPUT][0], test_mat[INPUT_OUTPUT][0],
+              test_mat[INPUT][1], test_mat[INPUT][2], interpolation );
 }
 
 
@@ -873,7 +875,7 @@ protected:
     double get_success_error_level( int test_case_idx, int i, int j );
     void fill_array( int test_case_idx, int i, int j, Mat& arr );
 
-    CvPoint2D32f center;
+    Point2f center;
     bool test_cpp;
 };
 
@@ -925,13 +927,8 @@ void CV_GetRectSubPixTest::fill_array( int test_case_idx, int i, int j, Mat& arr
 
 void CV_GetRectSubPixTest::run_func()
 {
-    if(!test_cpp)
-        cvGetRectSubPix( test_array[INPUT][0], test_array[INPUT_OUTPUT][0], center );
-    else
-    {
-        cv::Mat _out = cv::cvarrToMat(test_array[INPUT_OUTPUT][0]);
-        cv::getRectSubPix( cv::cvarrToMat(test_array[INPUT][0]), _out.size(), center, _out, _out.type());
-    }
+    cv::Mat _out = test_mat[INPUT_OUTPUT][0];
+    cv::getRectSubPix(test_mat[INPUT][0], _out.size(), center, _out, _out.type());
 }
 
 
diff --git a/modules/imgproc/test/test_pc.cpp b/modules/imgproc/test/test_pc.cpp
index 969f5bcfa1..c4152a84b7 100644
--- a/modules/imgproc/test/test_pc.cpp
+++ b/modules/imgproc/test/test_pc.cpp
@@ -42,6 +42,8 @@
 
 #include "test_precomp.hpp"
 
+#define CV_DXT_MUL_CONJ 8
+
 namespace opencv_test { namespace {
 
 /// phase correlation
@@ -181,7 +183,7 @@ void CV_DivSpectrumsTest::get_test_array_types_and_sizes( int test_case_idx, vec
 
     // Get the flag of the input.
     const int rand_int_flags = cvtest::randInt(rng);
-    flags = rand_int_flags & (CV_DXT_MUL_CONJ | CV_DXT_ROWS);
+    flags = rand_int_flags & (CV_DXT_MUL_CONJ | DFT_ROWS);
 
     // Get input type.
     const int rand_int_type = cvtest::randInt(rng);
diff --git a/modules/imgproc/test/test_templmatch.cpp b/modules/imgproc/test/test_templmatch.cpp
index 760052aeb4..880e0d71e7 100644
--- a/modules/imgproc/test/test_templmatch.cpp
+++ b/modules/imgproc/test/test_templmatch.cpp
@@ -43,167 +43,164 @@
 
 namespace opencv_test { namespace {
 
-class CV_TemplMatchTest : public cvtest::ArrayTest
-{
-public:
-    CV_TemplMatchTest();
-
-protected:
-    int read_params( const cv::FileStorage& fs );
-    void get_test_array_types_and_sizes( int test_case_idx, vector<vector<Size> >& sizes, vector<vector<int> >& types );
-    void get_minmax_bounds( int i, int j, int type, Scalar& low, Scalar& high );
-    double get_success_error_level( int test_case_idx, int i, int j );
-    void run_func();
-    void prepare_to_validation( int );
-
-    int max_template_size;
-    int method;
-    bool test_cpp;
-};
-
-
-CV_TemplMatchTest::CV_TemplMatchTest()
-{
-    test_array[INPUT].push_back(NULL);
-    test_array[INPUT].push_back(NULL);
-    test_array[OUTPUT].push_back(NULL);
-    test_array[REF_OUTPUT].push_back(NULL);
-    element_wise_relative_error = false;
-    max_template_size = 100;
-    method = 0;
-    test_cpp = false;
-}
-
-
-int CV_TemplMatchTest::read_params( const cv::FileStorage& fs )
-{
-    int code = cvtest::ArrayTest::read_params( fs );
-    if( code < 0 )
-        return code;
-
-    read( find_param( fs, "max_template_size" ), max_template_size, max_template_size );
-    max_template_size = cvtest::clipInt( max_template_size, 1, 100 );
-
-    return code;
-}
-
-
-void CV_TemplMatchTest::get_minmax_bounds( int i, int j, int type, Scalar& low, Scalar& high )
-{
-    cvtest::ArrayTest::get_minmax_bounds( i, j, type, low, high );
-    int depth = CV_MAT_DEPTH(type);
-    if( depth == CV_32F )
-    {
-        low = Scalar::all(-10.);
-        high = Scalar::all(10.);
-    }
-}
-
-
-void CV_TemplMatchTest::get_test_array_types_and_sizes( int test_case_idx,
-                                                vector<vector<Size> >& sizes, vector<vector<int> >& types )
-{
-    RNG& rng = ts->get_rng();
-    int depth = cvtest::randInt(rng) % 2, cn = cvtest::randInt(rng) & 1 ? 3 : 1;
-    cvtest::ArrayTest::get_test_array_types_and_sizes( test_case_idx, sizes, types );
-    depth = depth == 0 ? CV_8U : CV_32F;
-
-    types[INPUT][0] = types[INPUT][1] = CV_MAKETYPE(depth,cn);
-    types[OUTPUT][0] = types[REF_OUTPUT][0] = CV_32FC1;
-
-    sizes[INPUT][1].width = cvtest::randInt(rng)%MIN(sizes[INPUT][1].width,max_template_size) + 1;
-    sizes[INPUT][1].height = cvtest::randInt(rng)%MIN(sizes[INPUT][1].height,max_template_size) + 1;
-    sizes[OUTPUT][0].width = sizes[INPUT][0].width - sizes[INPUT][1].width + 1;
-    sizes[OUTPUT][0].height = sizes[INPUT][0].height - sizes[INPUT][1].height + 1;
-    sizes[REF_OUTPUT][0] = sizes[OUTPUT][0];
-
-    method = cvtest::randInt(rng)%6;
-    test_cpp = (cvtest::randInt(rng) & 256) == 0;
-}
-
-
-double CV_TemplMatchTest::get_success_error_level( int /*test_case_idx*/, int /*i*/, int /*j*/ )
-{
-    if( test_mat[INPUT][1].depth() == CV_8U ||
-        (method >= cv::TM_CCOEFF && test_mat[INPUT][1].cols*test_mat[INPUT][1].rows <= 2) )
-        return 1e-2;
-    else
-        return 1e-3;
-}
-
-
-void CV_TemplMatchTest::run_func()
-{
-    if(!test_cpp)
-        cvMatchTemplate( test_array[INPUT][0], test_array[INPUT][1], test_array[OUTPUT][0], method );
-    else
-    {
-        cv::Mat _out = cv::cvarrToMat(test_array[OUTPUT][0]);
-        cv::matchTemplate(cv::cvarrToMat(test_array[INPUT][0]), cv::cvarrToMat(test_array[INPUT][1]), _out, method);
-    }
+TEST(Imgproc_MatchTemplate, bug_9597) {
+        const uint8_t img[] = {
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 246, 246, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 246, 246, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 246, 246, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 246, 246, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 246,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 246, 246, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 246, 246, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 246, 246, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 246, 246, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 247, 247, 247, 247, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 247, 247, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245};
+        const uint8_t tmpl[] = {
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
+                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245 };
+        cv::Mat cvimg(cv::Size(61, 82), CV_8UC1, (void*)img, cv::Mat::AUTO_STEP);
+        cv::Mat cvtmpl(cv::Size(17, 17), CV_8UC1, (void*)tmpl, cv::Mat::AUTO_STEP);
+        cv::Mat result;
+        cv::matchTemplate(cvimg, cvtmpl, result, cv::TM_SQDIFF);
+        double minValue;
+        cv::minMaxLoc(result, &minValue, NULL, NULL, NULL);
+        ASSERT_GE(minValue, 0);
 }
 
+//==============================================================================
 
-static void cvTsMatchTemplate( const CvMat* img, const CvMat* templ, CvMat* result, int method )
+static void matchTemplate_reference(Mat & img, Mat & templ, Mat & result, const int method)
 {
-    int i, j, k, l;
-    int depth = CV_MAT_DEPTH(img->type), cn = CV_MAT_CN(img->type);
-    int width_n = templ->cols*cn, height = templ->rows;
-    int a_step = img->step / CV_ELEM_SIZE(img->type & CV_MAT_DEPTH_MASK);
-    int b_step = templ->step / CV_ELEM_SIZE(templ->type & CV_MAT_DEPTH_MASK);
-    CvScalar b_mean = CV_STRUCT_INITIALIZER, b_sdv = CV_STRUCT_INITIALIZER;
-    double b_denom = 1., b_sum2 = 0;
-    int area = templ->rows*templ->cols;
-
-    cvAvgSdv(templ, &b_mean, &b_sdv);
-
-    for( i = 0; i < cn; i++ )
-        b_sum2 += (b_sdv.val[i]*b_sdv.val[i] + b_mean.val[i]*b_mean.val[i])*area;
-
-    if( b_sdv.val[0]*b_sdv.val[0] + b_sdv.val[1]*b_sdv.val[1] +
-        b_sdv.val[2]*b_sdv.val[2] + b_sdv.val[3]*b_sdv.val[3] < DBL_EPSILON &&
-        method == cv::TM_CCOEFF_NORMED )
+    CV_Assert(cv::TM_SQDIFF <= method && method <= cv::TM_CCOEFF_NORMED);
+
+    const Size res_sz(img.cols - templ.cols + 1, img.rows - templ.rows + 1);
+    result.create(res_sz, CV_32FC1);
+
+    const int depth = img.depth();
+    const int cn = img.channels();
+    const int area = templ.size().area();
+    const int width_n = templ.cols * cn;
+    const int height = templ.rows;
+    int a_step = (int)(img.step / img.elemSize1());
+    int b_step = (int)(templ.step / templ.elemSize1());
+
+    Scalar b_mean = Scalar::all(0);
+    Scalar b_sdv = Scalar::all(0);
+    cv::meanStdDev(templ, b_mean, b_sdv);
+
+    double b_sum2 = 0.;
+    for (int i = 0; i < cn; i++ )
+        b_sum2 += (b_sdv.val[i] * b_sdv.val[i] + b_mean.val[i] * b_mean.val[i]) * area;
+
+    if (b_sdv.val[0] * b_sdv.val[0] + b_sdv.val[1] * b_sdv.val[1] +
+        b_sdv.val[2] * b_sdv.val[2] + b_sdv.val[3] * b_sdv.val[3] < DBL_EPSILON &&
+        method == cv::TM_CCOEFF_NORMED)
     {
-        cvSet( result, cvScalarAll(1.) );
+        result = Scalar::all(1.);
         return;
     }
 
-    if( method & 1 )
+    double b_denom = 1.;
+    if (method & 1) // _NORMED
     {
         b_denom = 0;
-        if( method != cv::TM_CCOEFF_NORMED )
+        if (method != cv::TM_CCOEFF_NORMED)
         {
             b_denom = b_sum2;
         }
         else
         {
-            for( i = 0; i < cn; i++ )
-                b_denom += b_sdv.val[i]*b_sdv.val[i]*area;
+            for (int i = 0; i < cn; i++)
+                b_denom += b_sdv.val[i] * b_sdv.val[i] * area;
         }
         b_denom = sqrt(b_denom);
-        if( b_denom == 0 )
+        if (b_denom == 0)
             b_denom = 1.;
     }
 
-    CV_Assert( cv::TM_SQDIFF <= method && method <= cv::TM_CCOEFF_NORMED );
-
-    for( i = 0; i < result->rows; i++ )
+    for (int i = 0; i < result.rows; i++)
     {
-        for( j = 0; j < result->cols; j++ )
+        for (int j = 0; j < result.cols; j++)
         {
             Scalar a_sum(0), a_sum2(0);
             Scalar ccorr(0);
             double value = 0.;
 
-            if( depth == CV_8U )
+            if (depth == CV_8U)
             {
-                const uchar* a = img->data.ptr + i*img->step + j*cn;
-                const uchar* b = templ->data.ptr;
+                const uchar* a = img.ptr<uchar>(i, j); // ??? ->data.ptr + i*img->step + j*cn;
+                const uchar* b = templ.ptr<uchar>();
 
                 if( cn == 1 || method < cv::TM_CCOEFF )
                 {
-                    for( k = 0; k < height; k++, a += a_step, b += b_step )
-                        for( l = 0; l < width_n; l++ )
+                    for (int k = 0; k < height; k++, a += a_step, b += b_step)
+                        for (int l = 0; l < width_n; l++)
                         {
                             ccorr.val[0] += a[l]*b[l];
                             a_sum.val[0] += a[l];
@@ -212,8 +209,8 @@ static void cvTsMatchTemplate( const CvMat* img, const CvMat* templ, CvMat* resu
                 }
                 else
                 {
-                    for( k = 0; k < height; k++, a += a_step, b += b_step )
-                        for( l = 0; l < width_n; l += 3 )
+                    for (int k = 0; k < height; k++, a += a_step, b += b_step)
+                        for (int l = 0; l < width_n; l += 3)
                         {
                             ccorr.val[0] += a[l]*b[l];
                             ccorr.val[1] += a[l+1]*b[l+1];
@@ -227,15 +224,15 @@ static void cvTsMatchTemplate( const CvMat* img, const CvMat* templ, CvMat* resu
                         }
                 }
             }
-            else
+            else // CV_32F
             {
-                const float* a = (const float*)(img->data.ptr + i*img->step) + j*cn;
-                const float* b = (const float*)templ->data.ptr;
+                const float* a = img.ptr<float>(i, j); // ???? (const float*)(img->data.ptr + i*img->step) + j*cn;
+                const float* b = templ.ptr<float>();
 
                 if( cn == 1 || method < cv::TM_CCOEFF )
                 {
-                    for( k = 0; k < height; k++, a += a_step, b += b_step )
-                        for( l = 0; l < width_n; l++ )
+                    for (int k = 0; k < height; k++, a += a_step, b += b_step)
+                        for (int l = 0; l < width_n; l++)
                         {
                             ccorr.val[0] += a[l]*b[l];
                             a_sum.val[0] += a[l];
@@ -244,8 +241,8 @@ static void cvTsMatchTemplate( const CvMat* img, const CvMat* templ, CvMat* resu
                 }
                 else
                 {
-                    for( k = 0; k < height; k++, a += a_step, b += b_step )
-                        for( l = 0; l < width_n; l += 3 )
+                    for (int k = 0; k < height; k++, a += a_step, b += b_step)
+                        for (int l = 0; l < width_n; l += 3)
                         {
                             ccorr.val[0] += a[l]*b[l];
                             ccorr.val[1] += a[l+1]*b[l+1];
@@ -299,130 +296,50 @@ static void cvTsMatchTemplate( const CvMat* img, const CvMat* templ, CvMat* resu
                 else
                     value = method != cv::TM_SQDIFF_NORMED ? 0 : 1;
             }
-
-            ((float*)(result->data.ptr + result->step*i))[j] = (float)value;
+            result.at<float>(i, j) = (float)value;
         }
     }
 }
 
+//==============================================================================
 
-void CV_TemplMatchTest::prepare_to_validation( int /*test_case_idx*/ )
-{
-    CvMat _input = cvMat(test_mat[INPUT][0]), _templ = cvMat(test_mat[INPUT][1]);
-    CvMat _output = cvMat(test_mat[REF_OUTPUT][0]);
-    cvTsMatchTemplate( &_input, &_templ, &_output, method );
+CV_ENUM(MatchModes, TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED);
 
-    //if( ts->get_current_test_info()->test_case_idx == 0 )
-    /*{
-        CvFileStorage* fs = cvOpenFileStorage( "_match_template.yml", 0, CV_STORAGE_WRITE );
-        cvWrite( fs, "image", &test_mat[INPUT][0] );
-        cvWrite( fs, "template", &test_mat[INPUT][1] );
-        cvWrite( fs, "ref", &test_mat[REF_OUTPUT][0] );
-        cvWrite( fs, "opencv", &test_mat[OUTPUT][0] );
-        cvWriteInt( fs, "method", method );
-        cvReleaseFileStorage( &fs );
-    }*/
+typedef testing::TestWithParam<testing::tuple<perf::MatDepth, int, MatchModes>> matchTemplate_Modes;
+
+TEST_P(matchTemplate_Modes, accuracy)
+{
+    const int data_type = CV_MAKE_TYPE(get<0>(GetParam()), get<1>(GetParam()));
+    const int method = get<2>(GetParam());
+    RNG & rng = TS::ptr()->get_rng();
 
-    if( method >= cv::TM_CCOEFF )
+    for (int ITER = 0; ITER < 20; ++ITER)
     {
-        // avoid numerical stability problems in singular cases (when the results are near to 0)
-        const double delta = 10.;
-        test_mat[REF_OUTPUT][0] += Scalar::all(delta);
-        test_mat[OUTPUT][0] += Scalar::all(delta);
+        SCOPED_TRACE(cv::format("iteration %d", ITER));
+
+        const Size imgSize(rng.uniform(128, 320), rng.uniform(128, 240));
+        const Size templSize(rng.uniform(1, 100), rng.uniform(1, 100));
+        Mat img(imgSize, data_type, Scalar::all(0));
+        Mat templ(templSize, data_type, Scalar::all(0));
+        cvtest::randUni(rng, img, Scalar::all(0), Scalar::all(255));
+        cvtest::randUni(rng, templ, Scalar::all(0), Scalar::all(255));
+
+        Mat result;
+        cv::matchTemplate(img, templ, result, method);
+
+        Mat reference;
+        matchTemplate_reference(img, templ, reference, method);
+
+        EXPECT_MAT_NEAR_RELATIVE(result, reference, 1e-3);
     }
 }
 
-TEST(Imgproc_MatchTemplate, accuracy) { CV_TemplMatchTest test; test.safe_run(); }
+INSTANTIATE_TEST_CASE_P(/**/,
+    matchTemplate_Modes,
+        testing::Combine(
+            testing::Values(CV_8U, CV_32F),
+            testing::Values(1, 3),
+            testing::Values(TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED)));
 
-}
 
-TEST(Imgproc_MatchTemplate, bug_9597) {
-        const uint8_t img[] = {
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 246, 246, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 246, 246, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 246, 246, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 246, 246, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 246,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 246, 246, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 246, 246, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 246, 246, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 246, 246, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 247, 247, 247, 247, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 247, 247, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245};
-        const uint8_t tmpl[] = {
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
-                245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245 };
-        cv::Mat cvimg(cv::Size(61, 82), CV_8UC1, (void*)img, cv::Mat::AUTO_STEP);
-        cv::Mat cvtmpl(cv::Size(17, 17), CV_8UC1, (void*)tmpl, cv::Mat::AUTO_STEP);
-        cv::Mat result;
-        cv::matchTemplate(cvimg, cvtmpl, result, cv::TM_SQDIFF);
-        double minValue;
-        cv::minMaxLoc(result, &minValue, NULL, NULL, NULL);
-        ASSERT_GE(minValue, 0);
-}
-} // namespace
+}} // namespace

From dfbd18e9aa12cd8018e387f7fe191c5ed69fda06 Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@google.com>
Date: Fri, 5 Jul 2024 07:53:28 +0200
Subject: [PATCH 18/39] Merge pull request #25864 from vrabaud:legacy

Make sure all the lines of a JPEG are read #25864

In case of corrupted JPEG, imread would still return a JPEG of the proper size (as indicated by the header) but with some uninitialized values. I do not have a short reproducer I can add as a test as this was found by our fuzzers.

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
---
 modules/imgcodecs/src/grfmt_jpeg.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/imgcodecs/src/grfmt_jpeg.cpp b/modules/imgcodecs/src/grfmt_jpeg.cpp
index 98019cc48a..a3a7f70c3c 100644
--- a/modules/imgcodecs/src/grfmt_jpeg.cpp
+++ b/modules/imgcodecs/src/grfmt_jpeg.cpp
@@ -499,7 +499,7 @@ bool  JpegDecoder::readData( Mat& img )
                 for( int iy = 0 ; iy < m_height; iy ++ )
                 {
                     uchar* data = img.ptr<uchar>(iy);
-                    jpeg_read_scanlines( cinfo, &data, 1 );
+                    if (jpeg_read_scanlines( cinfo, &data, 1 ) != 1) return false;
                 }
             }
             else
@@ -510,7 +510,7 @@ bool  JpegDecoder::readData( Mat& img )
                 for( int iy = 0 ; iy < m_height; iy ++ )
                 {
                     uchar* data = img.ptr<uchar>(iy);
-                    jpeg_read_scanlines( cinfo, buffer, 1 );
+                    if (jpeg_read_scanlines( cinfo, buffer, 1 ) != 1) return false;
 
                     if( color )
                     {

From d30b9450c1193c3111287c938c9bec87c3ed9cb0 Mon Sep 17 00:00:00 2001
From: Yuantao Feng <yuantao.feng@opencv.org.cn>
Date: Fri, 5 Jul 2024 20:46:01 +0800
Subject: [PATCH 19/39] Merge pull request #25872 from fengyuentau:core/v_erf

core: add v_erf #25872

This patch adds v_erf, which is needed by https://github.com/opencv/opencv/pull/25147.

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 .../include/opencv2/core/hal/intrin_cpp.hpp   | 10 ++++-
 .../include/opencv2/core/hal/intrin_math.hpp  | 45 +++++++++++++++++++
 modules/core/test/test_intrin_utils.hpp       | 43 ++++++++++++++++++
 3 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp
index e9e8d28eaa..e364ba359b 100644
--- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp
@@ -263,7 +263,8 @@ Most of these operations return only one value.
 
 ### Other math
 
-- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude, @ref v_exp
+- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude, @ref v_exp,
+                            @ref v_erf
 - Absolute values: @ref v_abs, @ref v_absdiff, @ref v_absdiffs
 
 ### Conversions
@@ -761,6 +762,13 @@ OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp)
 OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp)
 #define OPENCV_HAL_MATH_HAVE_LOG 1
 
+/**
+ * @brief Error function.
+ *
+ * @note Support FP32 precision for now.
+ */
+OPENCV_HAL_IMPL_MATH_FUNC(v_erf, std::erf, _Tp)
+
 //! @cond IGNORED
 OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp)
 #define OPENCV_HAL_MATH_HAVE_SIN 1
diff --git a/modules/core/include/opencv2/core/hal/intrin_math.hpp b/modules/core/include/opencv2/core/hal/intrin_math.hpp
index 0f51b9ba13..4f967cff1a 100644
--- a/modules/core/include/opencv2/core/hal/intrin_math.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_math.hpp
@@ -418,5 +418,50 @@ namespace CV__SIMD_NAMESPACE {
 #define OPENCV_HAL_MATH_HAVE_LOG 1
 //! @}
 #endif
+
+/* This implementation is derived from the approximation approach of Error Function (Erf) from PyTorch
+   https://github.com/pytorch/pytorch/blob/9c50ecc84b9a6e699a7f058891b889aafbf976c7/aten/src/ATen/cpu/vec/vec512/vec512_float.h#L189-L220
+*/
+
+#ifndef OPENCV_HAL_MATH_HAVE_ERF
+
+//! @name Error Function
+//! @{
+
+    inline v_float32 v_erf(v_float32 v) {
+        const v_float32 coef0 = vx_setall_f32(0.3275911f),
+                        coef1 = vx_setall_f32(1.061405429f),
+                        coef2 = vx_setall_f32(-1.453152027f),
+                        coef3 = vx_setall_f32(1.421413741f),
+                        coef4 = vx_setall_f32(-0.284496736f),
+                        coef5 = vx_setall_f32(0.254829592f),
+                        ones = vx_setall_f32(1.0f),
+                        neg_zeros = vx_setall_f32(-0.f);
+        v_float32 t = v_abs(v);
+        // sign(v)
+        v_float32 sign_mask = v_and(neg_zeros, v);
+
+        t = v_div(ones, v_fma(coef0, t, ones));
+        v_float32 r = v_fma(coef1, t, coef2);
+        r = v_fma(r, t, coef3);
+        r = v_fma(r, t, coef4);
+        r = v_fma(r, t, coef5);
+        // - v * v
+        v_float32 pow_2 = v_mul(v, v);
+        v_float32 neg_pow_2 = v_xor(neg_zeros, pow_2);
+        // - exp(- v * v)
+        v_float32 exp = v_exp(neg_pow_2);
+        v_float32 neg_exp = v_xor(neg_zeros, exp);
+        v_float32 res = v_mul(t, neg_exp);
+        res = v_fma(r, res, ones);
+        return v_xor(sign_mask, res);
+    }
+
+#define OPENCV_HAL_MATH_HAVE_ERF 1
+//! @}
+
+#endif // OPENCV_HAL_MATH_HAVE_ERF
+
+
 }
 #endif  // OPENCV_HAL_INTRIN_HPP
diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp
index 4893e64ba8..742136f84c 100644
--- a/modules/core/test/test_intrin_utils.hpp
+++ b/modules/core/test/test_intrin_utils.hpp
@@ -1864,6 +1864,48 @@ template<typename R> struct TheTest
 #endif
         return *this;
     }
+
+    TheTest &test_erf_fp32() {
+        int n = VTraits<R>::vlanes();
+
+        constexpr int num_loops = 10000;
+        const std::vector<LaneType> singular_inputs{INFINITY, -INFINITY, NAN};
+        constexpr double insert_singular_input_probability = 0.1;
+        cv::RNG_MT19937 rng;
+
+        for (int i = 0; i < num_loops; i++) {
+            Data<R> inputs;
+            for (int j = 0; j < n; j++) {
+                if (rng.uniform(0.f, 1.f) <= insert_singular_input_probability) {
+                    int singular_input_index = rng.uniform(0, int(singular_inputs.size()));
+                    inputs[j] = singular_inputs[singular_input_index];
+                } else {
+                    // std::exp(float) overflows at about 88.0f.
+                    // In v_erf, exp is called on input*input. So test range is [-sqrt(88.0f), sqrt(88.0f)]
+                    inputs[j] = (LaneType) rng.uniform(-9.4f, 9.4f);
+                }
+            }
+
+            Data<R> outputs = v_erf(R(inputs));
+            for (int j = 0; j < n; j++) {
+                SCOPED_TRACE(cv::format("Random test value: %f", inputs[j]));
+                if (std::isinf(inputs[j])) {
+                    if (inputs[j] < 0) {
+                        EXPECT_EQ(-1, outputs[j]);
+                    } else {
+                        EXPECT_EQ(1, outputs[j]);
+                    }
+                } else if (std::isnan(inputs[j])) {
+                    EXPECT_TRUE(std::isnan(outputs[j]));
+                } else {
+                    LaneType ref_output = std::erf(inputs[j]);
+                    EXPECT_LT(std::abs(outputs[j] - ref_output), 1e-3f * (std::abs(ref_output) + FLT_MIN * 1e4f));
+                }
+            }
+        }
+
+        return *this;
+    }
 };
 
 #define DUMP_ENTRY(type) printf("SIMD%d: %s\n", 8*VTraits<v_uint8>::vlanes(), CV__TRACE_FUNCTION);
@@ -2179,6 +2221,7 @@ void test_hal_intrin_float32()
         .test_pack_triplets()
         .test_exp_fp32()
         .test_log_fp32()
+        .test_erf_fp32()
 #if CV_SIMD_WIDTH == 32
         .test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>()
         .test_rotate<4>().test_rotate<5>().test_rotate<6>().test_rotate<7>()

From d7a237aefc76f841dba64544d09ab6df53097206 Mon Sep 17 00:00:00 2001
From: Amir Hassan <amir@viel-zu.org>
Date: Fri, 5 Jul 2024 15:39:01 +0200
Subject: [PATCH 20/39] Merge pull request #22836 from
 kallaballa:opengl_cmake_warning_linux

Explicitly prefer legacy GL in cmake on Linux? #22836

Pertaining Issue: #22835

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 cmake/OpenCVFindLibsGUI.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cmake/OpenCVFindLibsGUI.cmake b/cmake/OpenCVFindLibsGUI.cmake
index 31c19b1da7..fca3e2c52d 100644
--- a/cmake/OpenCVFindLibsGUI.cmake
+++ b/cmake/OpenCVFindLibsGUI.cmake
@@ -60,6 +60,7 @@ if(WITH_QT)
 endif()
 
 # --- OpenGl ---
+ocv_update(OpenGL_GL_PREFERENCE LEGACY)
 ocv_clear_vars(HAVE_OPENGL HAVE_QT_OPENGL)
 if(WITH_OPENGL)
   if(WITH_WIN32UI OR (HAVE_QT AND QT_QTOPENGL_FOUND) OR HAVE_GTKGLEXT)

From 0b5b40179cadad6dbf0141a37f51c9a6b6c8b3fc Mon Sep 17 00:00:00 2001
From: Kumataro <Kumataro@users.noreply.github.com>
Date: Sun, 7 Jul 2024 07:15:28 +0900
Subject: [PATCH 21/39] calib3d: doc: enable line breaks in formulas

---
 modules/calib3d/include/opencv2/calib3d.hpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp
index 0280e05e21..aedbaf930a 100644
--- a/modules/calib3d/include/opencv2/calib3d.hpp
+++ b/modules/calib3d/include/opencv2/calib3d.hpp
@@ -412,11 +412,11 @@ R & t \\
     where R is the rotation matrix corresponding to the rotation vector om: R = rodrigues(om); call x, y
     and z the 3 coordinates of Xc:
 
-    \f[x = Xc_1 \\ y = Xc_2 \\ z = Xc_3\f]
+    \f[\begin{array}{l} x = Xc_1 \\ y = Xc_2 \\ z = Xc_3 \end{array} \f]
 
     The pinhole projection coordinates of P is [a; b] where
 
-    \f[a = x / z \ and \ b = y / z \\ r^2 = a^2 + b^2 \\ \theta = atan(r)\f]
+    \f[\begin{array}{l} a = x / z \ and \ b = y / z \\ r^2 = a^2 + b^2 \\ \theta = atan(r) \end{array} \f]
 
     Fisheye distortion:
 
@@ -424,12 +424,12 @@ R & t \\
 
     The distorted point coordinates are [x'; y'] where
 
-    \f[x' = (\theta_d / r) a \\ y' = (\theta_d / r) b \f]
+    \f[\begin{array}{l} x' = (\theta_d / r) a \\ y' = (\theta_d / r) b \end{array} \f]
 
     Finally, conversion into pixel coordinates: The final pixel coordinates vector [u; v] where:
 
-    \f[u = f_x (x' + \alpha y') + c_x \\
-    v = f_y y' + c_y\f]
+    \f[\begin{array}{l} u = f_x (x' + \alpha y') + c_x \\
+    v = f_y y' + c_y \end{array} \f]
 
     Summary:
     Generic camera model @cite Kannala2006 with perspective projection and without distortion correction

From 31b308f8822a2b77aba46b1a93366c2c104e841b Mon Sep 17 00:00:00 2001
From: Dmitry Yurov <22917299+DmitryYurov@users.noreply.github.com>
Date: Mon, 8 Jul 2024 11:36:56 +0200
Subject: [PATCH 22/39] Merge pull request #25808 from
 DmitryYurov:bug-25806-checkerboard-marker-black-tile

Enable checkerboard detection with a central / corner marker on a black tile #25808

This pull request closes the issue #25806.

The issue doesn't require any documentation - it's quite intuitive that the detection result shouldn't depend on the color of the marker's tile.

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 modules/calib3d/src/chessboard.cpp         | 48 ++++++++++------------
 modules/calib3d/test/test_chesscorners.cpp | 20 +++++++++
 2 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/modules/calib3d/src/chessboard.cpp b/modules/calib3d/src/chessboard.cpp
index 1801a1cbf4..ffbddadf40 100644
--- a/modules/calib3d/src/chessboard.cpp
+++ b/modules/calib3d/src/chessboard.cpp
@@ -1625,36 +1625,30 @@ bool Chessboard::Board::normalizeMarkerOrientation()
             if(!current_cell->marker || !current_cell->right || !current_cell->right->marker)
                 continue;
 
-            if(current_cell->black)
+            if(current_cell->right->top && current_cell->right->top->marker)
             {
-                if(current_cell->right->top && current_cell->right->top->marker)
-                {
-                    rotateLeft();
-                    rotateLeft();
-                    pcell = current_cell->right;
-                    break;
-                }
-                if(current_cell->right->bottom && current_cell->right->bottom->marker)
-                {
-                    rotateLeft();
-                    pcell = current_cell->right;
-                    break;
-                }
+                rotateLeft();
+                rotateLeft();
+                pcell = current_cell->right;
+                break;
             }
-            else
+            if(current_cell->right->bottom && current_cell->right->bottom->marker)
             {
-                if(current_cell->top && current_cell->top->marker)
-                {
-                    rotateRight();
-                    pcell = current_cell;
-                    break;
-                }
-                if(current_cell->bottom && current_cell->bottom->marker)
-                {
-                    // correct orientation
-                    pcell = current_cell;
-                    break;
-                }
+                rotateLeft();
+                pcell = current_cell->right;
+                break;
+            }
+            if(current_cell->top && current_cell->top->marker)
+            {
+                rotateRight();
+                pcell = current_cell;
+                break;
+            }
+            if(current_cell->bottom && current_cell->bottom->marker)
+            {
+                // correct orientation
+                pcell = current_cell;
+                break;
             }
         }
     }
diff --git a/modules/calib3d/test/test_chesscorners.cpp b/modules/calib3d/test/test_chesscorners.cpp
index a63d5b3e83..3d13602780 100644
--- a/modules/calib3d/test/test_chesscorners.cpp
+++ b/modules/calib3d/test/test_chesscorners.cpp
@@ -679,6 +679,26 @@ TEST(Calib3d_AsymmetricCirclesPatternDetector, accuracy) { CV_ChessboardDetector
 TEST(Calib3d_AsymmetricCirclesPatternDetectorWithClustering, accuracy) { CV_ChessboardDetectorTest test( ASYMMETRIC_CIRCLES_GRID, CALIB_CB_CLUSTERING ); test.safe_run(); }
 #endif
 
+TEST(Calib3d_ChessboardWithMarkers, regression_25806_white)
+{
+    const cv::String dataDir = string(TS::ptr()->get_data_path()) + "cv/cameracalibration/";
+    const cv::Mat image = cv::imread(dataDir + "checkerboard_marker_white.png");
+
+    std::vector<Point2f> corners;
+    const bool success = cv::findChessboardCornersSB(image, Size(9, 14), corners, CALIB_CB_MARKER);
+    ASSERT_TRUE(success);
+}
+
+TEST(Calib3d_ChessboardWithMarkers, regression_25806_black)
+{
+    const cv::String dataDir = string(TS::ptr()->get_data_path()) + "cv/cameracalibration/";
+    const cv::Mat image = cv::imread(dataDir + "checkerboard_marker_black.png");
+
+    std::vector<Point2f> corners;
+    const bool success = cv::findChessboardCornersSB(image, Size(9, 14), corners, CALIB_CB_MARKER);
+    ASSERT_TRUE(success);
+}
+
 TEST(Calib3d_CirclesPatternDetectorWithClustering, accuracy)
 {
     cv::String dataDir = string(TS::ptr()->get_data_path()) + "cv/cameracalibration/circles/";

From e3858cc5a39127d098879f7ebca144a280bff3cb Mon Sep 17 00:00:00 2001
From: Yuantao Feng <yuantao.feng@opencv.org.cn>
Date: Mon, 8 Jul 2024 19:24:36 +0800
Subject: [PATCH 23/39] Merge pull request #25147 from
 fengyuentau:dnn/elementwise_layers/speedup

* added v_erf and implemented gelu acceleration via vectorization

* remove anonymous v_erf and use v_erf from intrin_math

* enable perf for ov and cuda backend
---
 modules/dnn/perf/perf_layer.cpp               |  45 ++++++
 modules/dnn/src/layers/elementwise_layers.cpp | 131 ++++++++++++++++--
 modules/dnn/src/opencl/activations.cl         |   4 +
 modules/dnn/test/test_onnx_conformance.cpp    |  44 ++++++
 ...conformance_layer_filter__openvino.inl.hpp |  30 ++++
 ..._conformance_layer_parser_denylist.inl.hpp |   4 +
 modules/dnn/test/test_onnx_importer.cpp       |   9 ++
 7 files changed, 254 insertions(+), 13 deletions(-)

diff --git a/modules/dnn/perf/perf_layer.cpp b/modules/dnn/perf/perf_layer.cpp
index acdc778b3c..ea1e70ae30 100644
--- a/modules/dnn/perf/perf_layer.cpp
+++ b/modules/dnn/perf/perf_layer.cpp
@@ -975,4 +975,49 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_Softmax, Combine(
                           /* withCann= */            false) // only test on CPU
 ));
 
+using Layer_Elementwise = TestBaseWithParam<tuple<std::vector<int>, std::string, tuple<Backend, Target>>>;
+PERF_TEST_P_(Layer_Elementwise, elementwise) {
+    std::vector<int> input_shape = get<0>(GetParam());
+    std::string op = get<1>(GetParam());
+    int backend_id = get<0>(get<2>(GetParam()));
+    int target_id = get<1>(get<2>(GetParam()));
+
+    Mat input(input_shape, CV_32F);
+    randn(input, 0.f, 1.f);
+
+    LayerParams lp;
+    lp.type = op;
+    lp.name = "TestLayer";
+
+    Net net;
+    net.addLayerToPrev(lp.name, lp.type, lp);
+
+    // Warmup
+    {
+        net.setInput(input);
+        net.setPreferableBackend(backend_id);
+        net.setPreferableTarget(target_id);
+        Mat out = net.forward();
+    }
+
+    TEST_CYCLE() {
+        net.forward();
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+INSTANTIATE_TEST_CASE_P(/**/, Layer_Elementwise, testing::Combine(
+    testing::Values(std::vector<int>{1, 50, 3072}),
+    testing::Values(std::string{"Gelu"}),
+    dnnBackendsAndTargets(/* withInferenceEngine= */ true,
+                          /* withHalide= */          false,
+                          /* withCpuOCV= */          true,
+                          /* withVkCom= */           false,
+                          /* withCUDA= */            true,
+                          /* withNgraph= */          true,
+                          /* withWebnn= */           false,
+                          /* withCann= */            false) // only test on CPU
+));
+
 } // namespace
diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp
index 6b7909b1b7..477aad88be 100644
--- a/modules/dnn/src/layers/elementwise_layers.cpp
+++ b/modules/dnn/src/layers/elementwise_layers.cpp
@@ -813,20 +813,82 @@ private:
     static const char* const ocl_kernel_name;
 };
 
-struct GeluFunctor : public BaseDefaultFunctor<GeluFunctor>
-{
-    typedef GeluLayer Layer;
+namespace {
+    // Refer to v_erf in modules/core/include/opencv2/core/hal/intrin_math.hpp
+    constexpr float c_erf_coef0 = 0.3275911f;
+    constexpr float c_erf_coef1 = 1.061405429f;
+    constexpr float c_erf_coef2 = -1.453152027f;
+    constexpr float c_erf_coef3 = 1.421413741f;
+    constexpr float c_erf_coef4 = -0.284496736f;
+    constexpr float c_erf_coef5 = 0.254829592f;
+
+    inline float erf_approx(float v) {
+        float t = 1.f / fmaf(fabsf(v), c_erf_coef0, 1.f);
+        float r = fmaf(c_erf_coef1, t, c_erf_coef2);
+        r = fmaf(r, t, c_erf_coef3);
+        r = fmaf(r, t, c_erf_coef4);
+        r = fmaf(r, t, c_erf_coef5);
+        r = 1.f - r * t * expf(-v * v);
+        return std::copysignf(r, v);
+    }
+}
 
-    explicit GeluFunctor() {}
+struct GeluFunctor : public BaseFunctor {
+    using Layer = GeluLayer;
+    int vlanes;
 
-    bool supportBackend(int backendId, int)
-    {
-        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
+    explicit GeluFunctor() {
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+        vlanes = VTraits<v_float32>::vlanes();
+#else
+        vlanes = 1;
+#endif
     }
 
-    inline float calculate(float x) const
-    {
-        return 0.5f * x * (1.0f + erf(x * M_SQRT1_2));
+    bool supportBackend(int backendId, int) {
+        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
+    }
+
+    void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const {
+        CV_UNUSED(stripeStart);
+        for (int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize) {
+            int i = 0;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+            // 0.5f * x * (1.0f + erf(x * M_SQRT1_2));
+            v_float32 half = vx_setall_f32(0.5f),
+                      one = vx_setall_f32(1.0f),
+                      reciprocal_sqrt2 = vx_setall_f32(M_SQRT1_2);
+            for (; i <= len - vlanes; i += vlanes) {
+                if (i + vlanes > len) {
+                    if (i == 0 || i == len) {
+                        break;
+                    }
+                    i = len - vlanes;
+                }
+                v_float32 x0 = vx_load(srcptr + i);
+
+                // t = x * M_SQRT1_2
+                v_float32 t0 = v_mul(reciprocal_sqrt2, x0);
+
+                // t = 1.0f + t
+                t0 = v_add(one, v_erf(t0));
+
+                // x = 0.5 * x
+                x0 = v_mul(half, x0);
+
+                // x = x * t
+                x0 = v_mul(x0, t0);
+
+                vx_store(dstptr + i, x0);
+            }
+#endif
+            // 0.5f * x * (1.0f + erf(x * M_SQRT1_2));
+            for( ; i < len; i++ )
+            {
+                float x = srcptr[i];
+                dstptr[i] = 0.5f * x * (1.0f + erf_approx(x * M_SQRT1_2));
+            }
+        }
     }
 
 #ifdef HAVE_CUDA
@@ -836,12 +898,55 @@ struct GeluFunctor : public BaseDefaultFunctor<GeluFunctor>
     }
 #endif
 
+#ifdef HAVE_OPENCL
+    bool initKernel(ocl::Kernel &ker, const UMat &src) const
+    {
+        String buildopt = oclGetTMacro(src);
+
+        if (!ker.create("GeluForward", ocl::dnn::activations_oclsrc, buildopt))
+            return false;
+
+        return true;
+    }
+
+    bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
+    {
+        std::vector<UMat> inputs;
+        std::vector<UMat> outputs;
+
+        inps.getUMatVector(inputs);
+        outs.getUMatVector(outputs);
+
+        for (size_t i = 0; i < inputs.size(); i++)
+        {
+            UMat& src = inputs[i];
+            UMat& dst = outputs[i];
+            CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset);
+
+            ocl::Kernel kernel;
+            CV_Assert(initKernel(kernel, src));
+            kernel.set(0, (int)src.total());
+            kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
+            kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
+
+            size_t gSize = src.total();
+            CV_Assert(kernel.run(1, &gSize, NULL, false));
+        }
+
+        return true;
+    }
+#endif
+
+#ifdef HAVE_DNN_NGRAPH
+    std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
+    {
+        return std::make_shared<ov::op::v0::Gelu>(node);
+    }
+#endif  // HAVE_DNN_NGRAPH
+
     int64 getFLOPSPerElement() const { return 100; }
 };
 
-template<>
-const char* const BaseDefaultFunctor<GeluFunctor>::ocl_kernel_name = "GeluForward";
-
 namespace GeluApproximationConstants
 {
     static constexpr float sqrt_2_pi = 0.7978845834732056f;
diff --git a/modules/dnn/src/opencl/activations.cl b/modules/dnn/src/opencl/activations.cl
index 96b56725fb..bbd03b2ea1 100644
--- a/modules/dnn/src/opencl/activations.cl
+++ b/modules/dnn/src/opencl/activations.cl
@@ -48,6 +48,10 @@
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 #endif
 
+#if !defined(M_SQRT1_2)
+#define M_SQRT1_2   0.707106781186547524400844362104849039  /* 1/sqrt(2)      */
+#endif
+
 __kernel void ReLUForward(const int count, __global const T* in, __global T* out
 #ifndef RELU_NO_SLOPE
 , KERNEL_ARG_DTYPE negative_slope
diff --git a/modules/dnn/test/test_onnx_conformance.cpp b/modules/dnn/test/test_onnx_conformance.cpp
index bd892adb2f..8af34695dd 100644
--- a/modules/dnn/test/test_onnx_conformance.cpp
+++ b/modules/dnn/test/test_onnx_conformance.cpp
@@ -282,6 +282,14 @@ static const TestCase testConformanceConfig[] = {
     {"test_gathernd_example_float32", 2, 1},
     {"test_gathernd_example_int32", 2, 1},
     {"test_gathernd_example_int32_batch_dim1", 2, 1},
+    {"test_gelu_default_1", 1, 1},
+    {"test_gelu_default_1_expanded", 1, 1},
+    {"test_gelu_default_2", 1, 1},
+    {"test_gelu_default_2_expanded", 1, 1},
+    {"test_gelu_tanh_1", 1, 1},
+    {"test_gelu_tanh_1_expanded", 1, 1},
+    {"test_gelu_tanh_2", 1, 1},
+    {"test_gelu_tanh_2_expanded", 1, 1},
     {"test_gemm_all_attributes", 3, 1},
     {"test_gemm_alpha", 3, 1},
     {"test_gemm_beta", 3, 1},
@@ -1123,6 +1131,19 @@ TEST_P(Test_ONNX_conformance, Layer_Test)
         if (name == "test_pow") {
             default_lInf = 0.00013; // Expected: (normInf) <= (lInf), actual: 0.00012207 vs 0.0001
         }
+        if (name == "test_gelu_tanh_1") {
+            default_l1 = 0.00011; // Expected: (normL1) <= (l1), actual: 0.000101805 vs 1e-05
+            default_lInf = 0.00016; // Expected: (normInf) <= (lInf), actual: 0.000152707 vs 0.0001
+        }
+        if (name == "test_gelu_tanh_2") {
+            if (target == DNN_TARGET_OPENCL_FP16) {
+                default_l1 = 0.00016; // Expected: (normL1) <= (l1), actual: 0.000157223 vs 9e-05
+                default_lInf = 0.0016; // Expected: (normInf) <= (lInf), actual: 0.00153041 vs 0.0005
+            } else {
+                default_l1 = 9e-5; // Expected: (normL1) <= (l1), actual: 8.80073e-05 vs 1e-05
+                default_lInf = 0.0005; // Expected: (normInf) <= (lInf), actual: 0.000455521 vs 0.0001
+            }
+        }
     }
 #ifdef HAVE_HALIDE
     else if (backend == DNN_BACKEND_HALIDE)
@@ -1146,6 +1167,15 @@ TEST_P(Test_ONNX_conformance, Layer_Test)
         {
             applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN, CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE);
         }
+
+        if (name == "test_gelu_tanh_1") {
+            default_l1 = 0.00011; // Expected: (normL1) <= (l1), actual: 0.000101805 vs 1e-05
+            default_lInf = 0.00016; // Expected: (normInf) <= (lInf), actual: 0.000152707 vs 0.0001
+        }
+        if (name == "test_gelu_tanh_2") {
+            default_l1 = 9e-5; // Expected: (normL1) <= (l1), actual: 8.80073e-05 vs 1e-05
+            default_lInf = 0.0005; // Expected: (normInf) <= (lInf), actual: 0.000455521 vs 0.0001
+        }
     }
 #endif
 #ifdef HAVE_CUDA
@@ -1159,6 +1189,20 @@ TEST_P(Test_ONNX_conformance, Layer_Test)
         {
             applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16, CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE);
         }
+
+        if (name == "test_gelu_tanh_1") {
+            default_l1 = 0.00011; // Expected: (normL1) <= (l1), actual: 0.000101815 vs 1e-05
+            default_lInf = 0.00016; // Expected: (normInf) <= (lInf), actual: 0.000152737 vs 0.0001
+        }
+        if (name == "test_gelu_tanh_2") {
+            if (target == DNN_TARGET_CUDA_FP16) {
+                default_l1 = 0.00023; // Expected: (normL1) <= (l1), actual: 0.000220591 vs 9e-05
+                default_lInf = 0.0023; // Expected: (normInf) <= (lInf), actual: 0.00220466 vs 0.0005
+            } else {
+                default_l1 = 9e-5; // Expected: (normL1) <= (l1), actual: 8.80127e-05 vs 1e-05
+                default_lInf = 0.0005; // Expected: (normInf) <= (lInf), actual: 0.000455445 vs 0.0001
+            }
+        }
     }
 #endif
     else
diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp
index 229bb9ca82..cbbc349bda 100644
--- a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp
@@ -688,6 +688,36 @@ CASE(test_gathernd_example_int32)
     // no filter
 CASE(test_gathernd_example_int32_batch_dim1)
     // no filter
+CASE(test_gelu_default_1)
+    // no filter
+CASE(test_gelu_default_1_expanded)
+    // no filter
+CASE(test_gelu_default_2)
+    // no filter
+CASE(test_gelu_default_2_expanded)
+    // no filter
+CASE(test_gelu_tanh_1)
+    if (target == DNN_TARGET_CPU) {
+        default_l1 = 0.00011; // Expected: (normL1) <= (l1), actual: 0.000101805 vs 1e-05
+        default_lInf = 0.00016; // Expected: (normInf) <= (lInf), actual: 0.000152707 vs 0.0001
+    }
+    if (target == DNN_TARGET_OPENCL) {
+        default_l1 = 0.00011; // Expected: (normL1) <= (l1), actual: 0.000101815 vs 1e-05
+        default_lInf = 0.00016; // Expected: (normInf) <= (lInf), actual: 0.000152737 vs 0.0001
+    }
+CASE(test_gelu_tanh_1_expanded)
+    // no filter
+CASE(test_gelu_tanh_2)
+    if (target == DNN_TARGET_CPU) {
+        default_l1 = 9e-5; // Expected: (normL1) <= (l1), actual: 8.80057e-05 vs 1e-05
+        default_lInf = 0.00046; // Expected: (normInf) <= (lInf), actual: 0.000455521 vs 0.0001
+    }
+    if (target == DNN_TARGET_OPENCL) {
+        default_l1 = 9e-5; // Expected: (normL1) <= (l1), actual: 8.80144e-05 vs 1e-05
+        default_lInf = 0.00046; // Expected: (normInf) <= (lInf), actual: 0.000455445 vs 0.0001
+    }
+CASE(test_gelu_tanh_2_expanded)
+    // no filter
 CASE(test_gemm_all_attributes)
     // no filter
 CASE(test_gemm_alpha)
diff --git a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp
index 243c7e704d..7253a64cef 100644
--- a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp
@@ -117,6 +117,10 @@
 "test_gathernd_example_float32",
 "test_gathernd_example_int32",
 "test_gathernd_example_int32_batch_dim1",
+"test_gelu_default_1_expanded", // parser: no corresponding layer for CastLike
+"test_gelu_default_2_expanded", // parser: no corresponding layer for CastLike
+"test_gelu_tanh_1_expanded", // parser: no corresponding layer for CastLike
+"test_gelu_tanh_2_expanded", // parser: no corresponding layer for CastLike
 "test_gemm_all_attributes",
 "test_gemm_alpha",
 "test_gemm_beta",
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
index e560ff2dbe..e58d83cdbd 100644
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -3149,6 +3149,15 @@ TEST_P(Test_ONNX_nets, ViT_B_32) {
         l1 = 0.008;
         lInf = 0.04;
     }
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
+        if (target == DNN_TARGET_CPU) {
+            l1 = 4.4e-5; // Expected: (normL1) <= (l1), actual: 4.31208e-05 vs 1e-05
+            lInf = 0.0002; // Expected: (normInf) <= (lInf), actual: 0.000194907 vs 0.0001
+        } else if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) {
+            l1 = 0.0092; // Expected: (normL1) <= (l1), actual: 0.00918349 vs 4.4e-05
+            lInf = 0.056; // Expected: (normInf) <= (lInf), actual: 0.0556431 vs 0.0002
+        }
+    }
 
     normAssert(ref, out, "ViTB_32", l1, lInf);
 }

From e962395565f8320743fbba0112b6a30f91636139 Mon Sep 17 00:00:00 2001
From: Sourav Kumar <120493586+Sourav6971@users.noreply.github.com>
Date: Tue, 9 Jul 2024 06:53:16 +0530
Subject: [PATCH 24/39] Update imgcodecs.hpp

---
 modules/imgcodecs/include/opencv2/imgcodecs.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/imgcodecs/include/opencv2/imgcodecs.hpp b/modules/imgcodecs/include/opencv2/imgcodecs.hpp
index c3a1d4b082..7441634a25 100644
--- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp
+++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp
@@ -293,7 +293,7 @@ The function imreadmulti loads a multi-page image from the specified file into a
 */
 CV_EXPORTS_W bool imreadmulti(const String& filename, CV_OUT std::vector<Mat>& mats, int flags = IMREAD_ANYCOLOR);
 
-/** @brief Loads a of images of a multi-page image from a file.
+/** @brief Loads images of a multi-page image from a file.
 
 The function imreadmulti loads a specified range from a multi-page image from the specified file into a vector of Mat objects.
 @param filename Name of file to be loaded.
@@ -305,7 +305,7 @@ The function imreadmulti loads a specified range from a multi-page image from th
 */
 CV_EXPORTS_W bool imreadmulti(const String& filename, CV_OUT std::vector<Mat>& mats, int start, int count, int flags = IMREAD_ANYCOLOR);
 
-/** @brief Returns the number of images inside the give file
+/** @brief Returns the number of images inside the given file
 
 The function imcount will return the number of pages in a multi-page image, or 1 for single-page images
 @param filename Name of file to be loaded.

From b9649435173dba9355164cde5d8a958e93fb9b91 Mon Sep 17 00:00:00 2001
From: Mironov Arseny <98156294+Fest1veNapkin@users.noreply.github.com>
Date: Tue, 9 Jul 2024 18:11:23 +0400
Subject: [PATCH 25/39] Merge pull request #25607 from
 Fest1veNapkin:imgproc_approx_bounding_poly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a new function that approximates the polygon bounding a convex hull with a certain number of sides #25607

merge PR with <https://github.com/opencv/opencv_extra/pull/1179>

This PR is based on the paper [View Frustum Optimization To Maximize Object’s Image Area](https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=1fbd43f3827fffeb76641a9c5ab5b625eb5a75ba).

# Problem
I needed to reduce the number of vertices of the convex hull so that the additional area was minimal, andall vertices of the original contour enter the new contour.

![image](https://github.com/Fest1veNapkin/opencv/assets/98156294/efac35f6-b8f0-46ec-91e4-60800432620c)

![image](https://github.com/Fest1veNapkin/opencv/assets/98156294/2292d9d7-1c10-49c9-8489-23221b4b28f7)

# Description
Initially in the contour of n vertices, at each stage we consider the intersection points of the lines formed by each adjacent edges. Each of these intersection points will form a triangle with vertices through which lines pass. Let's choose a triangle with the minimum area and merge the two vertices at the intersection point. We continue until there are more vertices than the specified number of sides of the approximated polygon.
![image](https://github.com/Fest1veNapkin/opencv/assets/98156294/b87b21c4-112e-450d-a776-2a120048ca30)

# Complexity:
Using a std::priority_queue or std::set  time complexity is **(O(n\*ln(n))**, memory **O(n)**,
n - number of vertices in convex hull.

count of sides - the number of points by which we must reduce.
![image](https://github.com/Fest1veNapkin/opencv/assets/98156294/31ad5562-a67d-4e3c-bdc2-29f8b52caf88)

## Comment
If epsilon_percentage more 0, algorithm can return more values than _side_.
Algorithm returns OutputArray. If OutputArray.type() equals 0, algorithm returns values with InputArray.type().
New test uses image which are not in opencv_extra, needs to be added.

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 doc/opencv.bib                              |   9 +
 modules/imgproc/include/opencv2/imgproc.hpp |  22 ++
 modules/imgproc/src/approx.cpp              | 237 ++++++++++++++++++++
 modules/imgproc/test/test_approxpoly.cpp    |  75 +++++++
 4 files changed, 343 insertions(+)

diff --git a/doc/opencv.bib b/doc/opencv.bib
index 6632271e4a..5531bb6dd5 100644
--- a/doc/opencv.bib
+++ b/doc/opencv.bib
@@ -1467,3 +1467,12 @@
   volume = {60},
   journal = {ISPRS Journal of Photogrammetry and Remote Sensing}
 }
+@article{LowIlie2003,
+  author   = {Kok-Lim Low, Adrian Ilie},
+  year = {2003},
+  pages = {3-15},
+  title    = {View Frustum Optimization to Maximize Object's Image Area},
+  journal  = {Journal of Graphics, (GPU, & Game) Tools (JGT)},
+  volume = {8},
+  url = {https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=1fbd43f3827fffeb76641a9c5ab5b625eb5a75ba}
+}
diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp
index a4852e80db..2f3c6f344f 100644
--- a/modules/imgproc/include/opencv2/imgproc.hpp
+++ b/modules/imgproc/include/opencv2/imgproc.hpp
@@ -4058,6 +4058,28 @@ CV_EXPORTS_W void approxPolyDP( InputArray curve,
                                 OutputArray approxCurve,
                                 double epsilon, bool closed );
 
+/** @brief Approximates a polygon with a convex hull with a specified accuracy and number of sides.
+
+The cv::approxPolyN function approximates a polygon with a convex hull
+so that the difference between the contour area of the original contour and the new polygon is minimal.
+It uses a greedy algorithm for contracting two vertices into one in such a way that the additional area is minimal.
+Straight lines formed by each edge of the convex contour are drawn and the areas of the resulting triangles are considered.
+Each vertex will lie either on the original contour or outside it.
+
+The algorithm based on the paper @cite LowIlie2003 .
+
+@param curve Input vector of a 2D points stored in std::vector or Mat, points must be float or integer.
+@param approxCurve Result of the approximation. The type is vector of a 2D point (Point2f or Point) in std::vector or Mat.
+@param nsides The parameter defines the number of sides of the result polygon.
+@param epsilon_percentage defines the percentage of the maximum of additional area.
+If it equals -1, it is not used. Otherwise algorighm stops if additional area is greater than contourArea(_curve) * percentage.
+If additional area exceeds the limit, algorithm returns as many vertices as there were at the moment the limit was exceeded.
+@param ensure_convex If it is true, algorithm creates a convex hull of input contour. Otherwise input vector should be convex.
+ */
+CV_EXPORTS_W void approxPolyN(InputArray curve, OutputArray approxCurve,
+                              int nsides, float epsilon_percentage = -1.0,
+                              bool ensure_convex = true);
+
 /** @brief Calculates a contour perimeter or a curve length.
 
 The function computes a curve length or a closed contour perimeter.
diff --git a/modules/imgproc/src/approx.cpp b/modules/imgproc/src/approx.cpp
index f05a6bcf3c..6c89f1d879 100644
--- a/modules/imgproc/src/approx.cpp
+++ b/modules/imgproc/src/approx.cpp
@@ -39,6 +39,7 @@
 //
 //M*/
 #include "precomp.hpp"
+#include <queue>
 
 /****************************************************************************************\
 *                                  Chain Approximation                                   *
@@ -860,4 +861,240 @@ cvApproxPoly( const void* array, int header_size,
     return dst_seq;
 }
 
+enum class PointStatus : int8_t
+{
+    REMOVED = -1,
+    RECALCULATE = 0,
+    CALCULATED = 1
+};
+
+struct neighbours
+{
+    PointStatus pointStatus;
+    cv::Point2f point;
+    int next;
+    int prev;
+
+    explicit neighbours(int next_ = -1, int prev_ = -1, const cv::Point2f& point_ = { -1, -1 })
+    {
+        next = next_;
+        prev = prev_;
+        point = point_;
+        pointStatus = PointStatus::CALCULATED;
+    }
+};
+
+struct changes
+{
+    float area;
+    int vertex;
+    cv::Point2f intersection;
+
+    explicit changes(float area_, int vertex_, const cv::Point2f& intersection_)
+    {
+        area = area_;
+        vertex = vertex_;
+        intersection = intersection_;
+    }
+
+    bool operator < (const changes& elem) const
+    {
+        return (area < elem.area) || ((area == elem.area) && (vertex < elem.vertex));
+    }
+    bool operator > (const changes& elem) const
+    {
+        return (area > elem.area) || ((area == elem.area) && (vertex > elem.vertex));
+    }
+};
+
+/*
+  returns intersection point and extra area
+*/
+static void recalculation(std::vector<neighbours>& hull, int vertex_id, float& area_, float& x, float& y)
+{
+    cv::Point2f vertex = hull[vertex_id].point,
+        next_vertex = hull[hull[vertex_id].next].point,
+        extra_vertex_1 = hull[hull[vertex_id].prev].point,
+        extra_vertex_2 = hull[hull[hull[vertex_id].next].next].point;
+
+    cv::Point2f curr_edge = next_vertex - vertex,
+        prev_edge = vertex - extra_vertex_1,
+        next_edge = extra_vertex_2 - next_vertex;
+
+    float cross = prev_edge.x * next_edge.y - prev_edge.y * next_edge.x;
+    if (abs(cross) < 1e-8)
+    {
+        area_ = FLT_MAX;
+        x = -1;
+        y = -1;
+        return;
+    }
+
+    float t = (curr_edge.x * next_edge.y - curr_edge.y * next_edge.x) / cross;
+    cv::Point2f intersection = vertex + cv::Point2f(prev_edge.x * t, prev_edge.y * t);
+
+    float area = 0.5f * abs((next_vertex.x - vertex.x) * (intersection.y - vertex.y)
+        - (intersection.x - vertex.x) * (next_vertex.y - vertex.y));
+
+    area_ = area;
+    x = intersection.x;
+    y = intersection.y;
+}
+
+static void update(std::vector<neighbours>& hull, int vertex_id)
+{
+    neighbours& v1 = hull[vertex_id], & removed = hull[v1.next], & v2 = hull[removed.next];
+
+    removed.pointStatus = PointStatus::REMOVED;
+    v1.pointStatus = PointStatus::RECALCULATE;
+    v2.pointStatus = PointStatus::RECALCULATE;
+    hull[v1.prev].pointStatus = PointStatus::RECALCULATE;
+    v1.next = removed.next;
+    v2.prev = removed.prev;
+}
+
+/*
+    A greedy algorithm based on contraction of vertices for approximating a convex contour by a bounding polygon
+*/
+void cv::approxPolyN(InputArray _curve, OutputArray _approxCurve,
+    int nsides, float epsilon_percentage, bool ensure_convex)
+{
+    CV_INSTRUMENT_REGION();
+
+    CV_Assert(epsilon_percentage > 0 || epsilon_percentage == -1);
+    CV_Assert(nsides > 2);
+
+    if (_approxCurve.fixedType())
+    {
+        CV_Assert(_approxCurve.type() == CV_32FC2 || _approxCurve.type() == CV_32SC2);
+    }
+
+    Mat curve;
+    int depth = _curve.depth();
+
+    CV_Assert(depth == CV_32F || depth == CV_32S);
+
+    if (ensure_convex)
+    {
+        cv::convexHull(_curve, curve);
+    }
+    else
+    {
+        CV_Assert(isContourConvex(_curve));
+        curve = _curve.getMat();
+    }
+
+    CV_Assert((curve.cols == 1 && curve.rows >= nsides)
+        || (curve.rows == 1 && curve.cols >= nsides));
+
+    if (curve.rows == 1)
+    {
+        curve = curve.reshape(0, curve.cols);
+    }
+
+    std::vector<neighbours> hull(curve.rows);
+    int size = curve.rows;
+    std::priority_queue<changes, std::vector<changes>, std::greater<changes>> areas;
+    float extra_area = 0, max_extra_area = epsilon_percentage * static_cast<float>(contourArea(_curve));
+
+    if (curve.depth() == CV_32S)
+    {
+        for (int i = 0; i < size; ++i)
+        {
+            Point t = curve.at<cv::Point>(i, 0);
+            hull[i] = neighbours(i + 1, i - 1, Point2f(static_cast<float>(t.x), static_cast<float>(t.y)));
+        }
+    }
+    else
+    {
+        for (int i = 0; i < size; ++i)
+        {
+            Point2f t = curve.at<cv::Point2f>(i, 0);
+            hull[i] = neighbours(i + 1, i - 1, t);
+        }
+    }
+    hull[0].prev = size - 1;
+    hull[size - 1].next = 0;
+
+    if (size > nsides)
+    {
+        for (int vertex_id = 0; vertex_id < size; ++vertex_id)
+        {
+            float area, new_x, new_y;
+            recalculation(hull, vertex_id, area, new_x, new_y);
+
+            areas.push(changes(area, vertex_id, Point2f(new_x, new_y)));
+        }
+    }
+
+    while (size > nsides)
+    {
+        changes base = areas.top();
+        int vertex_id = base.vertex;
+
+        if (hull[vertex_id].pointStatus == PointStatus::REMOVED)
+        {
+            areas.pop();
+        }
+        else if (hull[vertex_id].pointStatus == PointStatus::RECALCULATE)
+        {
+            float area, new_x, new_y;
+            areas.pop();
+            recalculation(hull, vertex_id, area, new_x, new_y);
+
+            areas.push(changes(area, vertex_id, Point2f(new_x, new_y)));
+            hull[vertex_id].pointStatus = PointStatus::CALCULATED;
+        }
+        else
+        {
+            if (epsilon_percentage != -1)
+            {
+                extra_area += base.area;
+                if (extra_area > max_extra_area)
+                {
+                    break;
+                }
+            }
+
+            size--;
+            hull[vertex_id].point = base.intersection;
+            update(hull, vertex_id);
+        }
+    }
+
+    if (_approxCurve.fixedType())
+    {
+        depth = _approxCurve.depth();
+    }
+    _approxCurve.create(1, size, CV_MAKETYPE(depth, 2));
+    Mat buf = _approxCurve.getMat();
+    int last_free = 0;
+
+    if (depth == CV_32S)
+    {
+        for (int i = 0; i < curve.rows; ++i)
+        {
+            if (hull[i].pointStatus != PointStatus::REMOVED)
+            {
+                Point t = Point(static_cast<int>(round(hull[i].point.x)),
+                                static_cast<int>(round(hull[i].point.y)));
+
+                buf.at<Point>(0, last_free) = t;
+                last_free++;
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < curve.rows; ++i)
+        {
+            if (hull[i].pointStatus != PointStatus::REMOVED)
+            {
+                buf.at<Point2f>(0, last_free) = hull[i].point;
+                last_free++;
+            }
+        }
+    }
+}
+
 /* End of file. */
diff --git a/modules/imgproc/test/test_approxpoly.cpp b/modules/imgproc/test/test_approxpoly.cpp
index f09475c9fc..2b07c1a7b5 100644
--- a/modules/imgproc/test/test_approxpoly.cpp
+++ b/modules/imgproc/test/test_approxpoly.cpp
@@ -377,4 +377,79 @@ TEST(Imgproc_ApproxPoly, bad_epsilon)
     ASSERT_ANY_THROW(approxPolyDP(inputPoints, outputPoints, eps, false));
 }
 
+struct ApproxPolyN: public testing::Test
+{
+    void SetUp()
+    {
+        vector<vector<Point>> inputPoints = {
+            {  {87, 103}, {100, 112}, {96, 138}, {80, 169}, {60, 183}, {38, 176}, {41, 145}, {56, 118}, {76, 104} },
+            {  {196, 102}, {205, 118}, {174, 196}, {152, 207}, {102, 194}, {100, 175}, {131, 109} },
+            {  {372, 101}, {377, 119}, {337, 238}, {324, 248}, {240, 229}, {199, 214}, {232, 123}, {245, 103} },
+            {  {463, 86}, {563, 112}, {574, 135}, {596, 221}, {518, 298}, {412, 266}, {385, 164}, {462, 86} }
+        };
+
+        Mat image(600, 600, CV_8UC1, Scalar(0));
+
+        for (vector<Point>& polygon : inputPoints) {
+            polylines(image, { polygon }, true, Scalar(255), 1);
+        }
+
+        findContours(image, contours, RETR_LIST, CHAIN_APPROX_NONE);
+    }
+
+    vector<vector<Point>> contours;
+};
+
+TEST_F(ApproxPolyN, accuracyInt)
+{
+    vector<vector<Point>> rightCorners = {
+        { {72, 187}, {37, 176}, {42, 127}, {133, 64} },
+        { {168, 212}, {92, 192}, {131, 109}, {213, 100} },
+        { {72, 187}, {37, 176}, {42, 127}, {133, 64} },
+        { {384, 100}, {333, 251}, {197, 220}, {239, 103} },
+        { {168, 212}, {92, 192}, {131, 109}, {213, 100} },
+        { {333, 251}, {197, 220}, {239, 103}, {384, 100} },
+        { {542, 6}, {596, 221}, {518, 299}, {312, 236} },
+        { {596, 221}, {518, 299}, {312, 236}, {542, 6} }
+    };
+    EXPECT_EQ(rightCorners.size(), contours.size());
+
+    for (size_t i = 0; i < contours.size(); ++i) {
+        std::vector<Point> corners;
+        approxPolyN(contours[i], corners, 4, -1, true);
+        ASSERT_EQ(rightCorners[i], corners );
+    }
+}
+
+TEST_F(ApproxPolyN, accuracyFloat)
+{
+    vector<vector<Point2f>> rightCorners = {
+        { {72.f, 187.f}, {37.f, 176.f}, {42.f, 127.f}, {133.f, 64.f} },
+        { {168.f, 212.f}, {92.f, 192.f}, {131.f, 109.f}, {213.f, 100.f} },
+        { {72.f, 187.f}, {37.f, 176.f}, {42.f, 127.f}, {133.f, 64.f} },
+        { {384.f, 100.f}, {333.f, 251.f}, {197.f, 220.f}, {239.f, 103.f} },
+        { {168.f, 212.f}, {92.f, 192.f}, {131.f, 109.f}, {213.f, 100.f} },
+        { {333.f, 251.f}, {197.f, 220.f}, {239.f, 103.f}, {384.f, 100.f} },
+        { {542.f, 6.f}, {596.f, 221.f}, {518.f, 299.f}, {312.f, 236.f} },
+        { {596.f, 221.f}, {518.f, 299.f}, {312.f, 236.f}, {542.f, 6.f} }
+    };
+    EXPECT_EQ(rightCorners.size(), contours.size());
+
+    for (size_t i = 0; i < contours.size(); ++i) {
+        std::vector<Point2f> corners;
+        approxPolyN(contours[i], corners, 4, -1, true);
+        EXPECT_LT(cvtest::norm(rightCorners[i], corners, NORM_INF), .5f);
+    }
+}
+
+TEST_F(ApproxPolyN, bad_args)
+{
+    Mat contour(10, 1, CV_32FC2);
+    vector<vector<Point>> bad_contours;
+    vector<Point> corners;
+    ASSERT_ANY_THROW(approxPolyN(contour, corners, 0));
+    ASSERT_ANY_THROW(approxPolyN(contour, corners, 3, 0));
+    ASSERT_ANY_THROW(approxPolyN(bad_contours, corners, 4));
+}
+
 }} // namespace

From 11fde3bb892b07197cb1483bcf754f098a6251fc Mon Sep 17 00:00:00 2001
From: fengyuentau <yuantao.feng@opencv.org.cn>
Date: Wed, 10 Jul 2024 14:48:45 +0800
Subject: [PATCH 26/39] fix

---
 modules/core/include/opencv2/core/hal/intrin_math.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/core/include/opencv2/core/hal/intrin_math.hpp b/modules/core/include/opencv2/core/hal/intrin_math.hpp
index 4f967cff1a..06a4e27080 100644
--- a/modules/core/include/opencv2/core/hal/intrin_math.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_math.hpp
@@ -428,7 +428,7 @@ namespace CV__SIMD_NAMESPACE {
 //! @name Error Function
 //! @{
 
-    inline v_float32 v_erf(v_float32 v) {
+    inline v_float32 v_erf(const v_float32 &v) {
         const v_float32 coef0 = vx_setall_f32(0.3275911f),
                         coef1 = vx_setall_f32(1.061405429f),
                         coef2 = vx_setall_f32(-1.453152027f),

From 06b9db6a710f9e43ce91112b09d2915075a749ba Mon Sep 17 00:00:00 2001
From: Maksim Shabunin <maksim.shabunin@gmail.com>
Date: Wed, 10 Jul 2024 11:06:25 +0300
Subject: [PATCH 27/39] imgproc: reduce template sizes in templMatch test

---
 modules/imgproc/test/test_templmatch.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/imgproc/test/test_templmatch.cpp b/modules/imgproc/test/test_templmatch.cpp
index 880e0d71e7..a10f44ce0f 100644
--- a/modules/imgproc/test/test_templmatch.cpp
+++ b/modules/imgproc/test/test_templmatch.cpp
@@ -318,7 +318,7 @@ TEST_P(matchTemplate_Modes, accuracy)
         SCOPED_TRACE(cv::format("iteration %d", ITER));
 
         const Size imgSize(rng.uniform(128, 320), rng.uniform(128, 240));
-        const Size templSize(rng.uniform(1, 100), rng.uniform(1, 100));
+        const Size templSize(rng.uniform(1, 30), rng.uniform(1, 30));
         Mat img(imgSize, data_type, Scalar::all(0));
         Mat templ(templSize, data_type, Scalar::all(0));
         cvtest::randUni(rng, img, Scalar::all(0), Scalar::all(255));

From 35ca2f78d66d8187e35430235df1d6c159302597 Mon Sep 17 00:00:00 2001
From: Aliaksei Urbanski <aliaksei.urbanski@gmail.com>
Date: Wed, 10 Jul 2024 12:39:30 +0300
Subject: [PATCH 28/39] Merge pull request #25880 from Jamim:fix/cuda-no-fp16

Fix CUDA for old GPUs without FP16 support #25880

Fixes #21461

~This is a build-time solution that reflects https://github.com/opencv/opencv/blob/4.10.0/modules/dnn/src/cuda4dnn/init.hpp#L68-L82.~
~We shouldn't add an invalid target while building with `CUDA_ARCH_BIN` < 53.~
_(please see [this discussion](https://github.com/opencv/opencv/pull/25880#discussion_r1668074505))_

This is a run-time solution that basically reverts [these lines](https://github.com/opencv/opencv/commit/d0fe6ad10967fd2b007a4cf83b00d6f8446deb42#diff-757c5ab6ddf2f99cdd09f851e3cf17abff203aff4107d908c7ad3d0466f39604L245-R245).

I've debugged these changes, [coupled with other fixes](https://github.com/gentoo/gentoo/pull/37479), on [Gentoo Linux](https://www.gentoo.org/) and [related tests passed](https://github.com/user-attachments/files/16135391/opencv-4.10.0.20240708-224733.log.gz) on my laptop with `GeForce GTX 960M`.

Alternative solution:
  - #21462

_Best regards!_

### Pull Request Readiness Checklist

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [ ] `n/a` There is accuracy test, performance test and test data in opencv_extra repository, if applicable
- [ ] `n/a` The feature is well documented and sample code can be built with the project CMake
---
 modules/dnn/src/cuda4dnn/init.hpp          | 22 +++++++++---------
 modules/dnn/src/net_impl_backend.cpp       | 14 ++++++++++++
 modules/dnn/src/registry.cpp               | 26 ++++++++++++++++++++--
 modules/dnn/test/test_common.hpp           |  2 +-
 modules/dnn/test/test_onnx_conformance.cpp |  2 +-
 5 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/modules/dnn/src/cuda4dnn/init.hpp b/modules/dnn/src/cuda4dnn/init.hpp
index a4177fce4f..e1f0a71d2f 100644
--- a/modules/dnn/src/cuda4dnn/init.hpp
+++ b/modules/dnn/src/cuda4dnn/init.hpp
@@ -15,7 +15,7 @@
 
 namespace cv { namespace dnn { namespace cuda4dnn {
 
-    void checkVersions()
+    inline void checkVersions()
     {
         // https://docs.nvidia.com/deeplearning/cudnn/developer-guide/index.html#programming-model
         // cuDNN API Compatibility
@@ -44,21 +44,23 @@ namespace cv { namespace dnn { namespace cuda4dnn {
         }
     }
 
-    int getDeviceCount()
+    inline int getDeviceCount()
     {
         return cuda::getCudaEnabledDeviceCount();
     }
 
-    int getDevice()
+    inline int getDevice()
     {
         int device_id = -1;
         CUDA4DNN_CHECK_CUDA(cudaGetDevice(&device_id));
         return device_id;
     }
 
-    bool isDeviceCompatible()
+    inline bool isDeviceCompatible(int device_id = -1)
     {
-        int device_id = getDevice();
+        if (device_id < 0)
+            device_id = getDevice();
+
         if (device_id < 0)
             return false;
 
@@ -76,9 +78,11 @@ namespace cv { namespace dnn { namespace cuda4dnn {
         return false;
     }
 
-    bool doesDeviceSupportFP16()
+    inline bool doesDeviceSupportFP16(int device_id = -1)
     {
-        int device_id = getDevice();
+        if (device_id < 0)
+            device_id = getDevice();
+
         if (device_id < 0)
             return false;
 
@@ -87,9 +91,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
         CUDA4DNN_CHECK_CUDA(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, device_id));
 
         int version = major * 10 + minor;
-        if (version < 53)
-            return false;
-        return true;
+        return (version >= 53);
     }
 
 }}} /* namespace cv::dnn::cuda4dnn */
diff --git a/modules/dnn/src/net_impl_backend.cpp b/modules/dnn/src/net_impl_backend.cpp
index b53908f8ec..5fc55adab1 100644
--- a/modules/dnn/src/net_impl_backend.cpp
+++ b/modules/dnn/src/net_impl_backend.cpp
@@ -10,6 +10,10 @@
 #include "backend.hpp"
 #include "factory.hpp"
 
+#ifdef HAVE_CUDA
+#include "cuda4dnn/init.hpp"
+#endif
+
 namespace cv {
 namespace dnn {
 CV__DNN_INLINE_NS_BEGIN
@@ -242,6 +246,16 @@ void Net::Impl::setPreferableTarget(int targetId)
 #endif
         }
 
+        if (IS_DNN_CUDA_TARGET(targetId))
+        {
+            preferableTarget = DNN_TARGET_CPU;
+#ifdef HAVE_CUDA
+            if (cuda4dnn::doesDeviceSupportFP16() && targetId == DNN_TARGET_CUDA_FP16)
+                preferableTarget = DNN_TARGET_CUDA_FP16;
+            else
+                preferableTarget = DNN_TARGET_CUDA;
+#endif
+        }
 #if !defined(__arm64__) || !__arm64__
         if (targetId == DNN_TARGET_CPU_FP16)
         {
diff --git a/modules/dnn/src/registry.cpp b/modules/dnn/src/registry.cpp
index 40630a93e4..d20658c3f6 100644
--- a/modules/dnn/src/registry.cpp
+++ b/modules/dnn/src/registry.cpp
@@ -18,6 +18,10 @@
 #include "backend.hpp"
 #include "factory.hpp"
 
+#ifdef HAVE_CUDA
+#include "cuda4dnn/init.hpp"
+#endif
+
 namespace cv {
 namespace dnn {
 CV__DNN_INLINE_NS_BEGIN
@@ -118,10 +122,28 @@ private:
 #endif
 
 #ifdef HAVE_CUDA
-        if (haveCUDA())
+        cuda4dnn::checkVersions();
+
+        bool hasCudaCompatible = false;
+        bool hasCudaFP16 = false;
+        for (int i = 0; i < cuda4dnn::getDeviceCount(); i++)
+        {
+            if (cuda4dnn::isDeviceCompatible(i))
+            {
+                hasCudaCompatible = true;
+                if (cuda4dnn::doesDeviceSupportFP16(i))
+                {
+                    hasCudaFP16 = true;
+                    break; // we already have all we need here
+                }
+            }
+        }
+
+        if (hasCudaCompatible)
         {
             backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA));
-            backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16));
+            if (hasCudaFP16)
+                backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16));
         }
 #endif
 
diff --git a/modules/dnn/test/test_common.hpp b/modules/dnn/test/test_common.hpp
index 435f481566..54ea17abea 100644
--- a/modules/dnn/test/test_common.hpp
+++ b/modules/dnn/test/test_common.hpp
@@ -211,7 +211,7 @@ public:
             if ((!l->supportBackend(backend) || l->preferableTarget != target) && !fused)
             {
                 hasFallbacks = true;
-                std::cout << "FALLBACK: Layer [" << l->type << "]:[" << l->name << "] is expected to has backend implementation" << endl;
+                std::cout << "FALLBACK: Layer [" << l->type << "]:[" << l->name << "] is expected to have backend implementation" << endl;
             }
         }
         if (hasFallbacks && raiseError)
diff --git a/modules/dnn/test/test_onnx_conformance.cpp b/modules/dnn/test/test_onnx_conformance.cpp
index 8af34695dd..47e6f22fce 100644
--- a/modules/dnn/test/test_onnx_conformance.cpp
+++ b/modules/dnn/test/test_onnx_conformance.cpp
@@ -1016,7 +1016,7 @@ public:
             if ((!l->supportBackend(backend) || l->preferableTarget != target) && !fused)
             {
                 hasFallbacks = true;
-                std::cout << "FALLBACK: Layer [" << l->type << "]:[" << l->name << "] is expected to has backend implementation" << endl;
+                std::cout << "FALLBACK: Layer [" << l->type << "]:[" << l->name << "] is expected to have backend implementation" << endl;
             }
         }
         return hasFallbacks;

From 63b9cbc2d0e2d0da67f9cbcdf2677b70c0a09fa3 Mon Sep 17 00:00:00 2001
From: Suleyman TURKMEN <sturkmen@hotmail.com>
Date: Wed, 10 Jul 2024 15:24:37 +0300
Subject: [PATCH 29/39] Update imgcodecs.hpp

---
 .../imgcodecs/include/opencv2/imgcodecs.hpp   | 36 ++++++++++++++-----
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/modules/imgcodecs/include/opencv2/imgcodecs.hpp b/modules/imgcodecs/include/opencv2/imgcodecs.hpp
index 7441634a25..735b24db91 100644
--- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp
+++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp
@@ -412,27 +412,47 @@ CV_EXPORTS_W bool imencode( const String& ext, InputArray img,
                             CV_OUT std::vector<uchar>& buf,
                             const std::vector<int>& params = std::vector<int>());
 
-/** @brief Returns true if the specified image can be decoded by OpenCV
+/** @brief Checks if the specified image file can be decoded by OpenCV.
 
-@param filename File name of the image
+The function haveImageReader checks if OpenCV is capable of reading the specified file.
+This can be useful for verifying support for a given image format before attempting to load an image.
+
+@param filename The name of the file to be checked.
+@return true if an image reader for the specified file is available and the file can be opened, false otherwise.
+
+@note The function checks the availability of image codecs that are either built into OpenCV or dynamically loaded.
+It does not check for the actual existence of the file but rather the ability to read the specified file type.
+If the file cannot be opened or the format is unsupported, the function will return false.
+
+@sa cv::haveImageWriter, cv::imread, cv::imdecode
 */
 CV_EXPORTS_W bool haveImageReader( const String& filename );
 
-/** @brief Returns true if an image with the specified filename can be encoded by OpenCV
+/** @brief Checks if the specified image file or specified file extension can be encoded by OpenCV.
 
- @param filename File name of the image
- */
+The function haveImageWriter checks if OpenCV is capable of writing images with the specified file extension.
+This can be useful for verifying support for a given image format before attempting to save an image.
+
+@param filename The name of the file or the file extension (e.g., ".jpg", ".png").
+It is recommended to provide the file extension rather than the full file name.
+@return true if an image writer for the specified extension is available, false otherwise.
+
+@note The function checks the availability of image codecs that are either built into OpenCV or dynamically loaded.
+It does not check for the actual existence of the file but rather the ability to write files of the given type.
+
+@sa cv::haveImageReader, cv::imwrite, cv::imencode
+*/
 CV_EXPORTS_W bool haveImageWriter( const String& filename );
 
-/** @brief To read Multi Page images on demand
+/** @brief To read multi-page images on demand
 
-The ImageCollection class provides iterator API to read multi page images on demand. Create iterator
+The ImageCollection class provides iterator API to read multi-page images on demand. Create iterator
 to the collection of the images and iterate over the collection. Decode the necessary page with operator*.
 
 The performance of page decoding is O(1) if collection is increment sequentially. If the user wants to access random page,
 then the time Complexity is O(n) because the collection has to be reinitialized every time in order to go to the correct page.
 However, the intermediate pages are not decoded during the process, so typically it's quite fast.
-This is required because multipage codecs does not support going backwards.
+This is required because multi-page codecs does not support going backwards.
 After decoding the one page, it is stored inside the collection cache. Hence, trying to get Mat object from already decoded page is O(1).
 If you need memory, you can use .releaseCache() method to release cached index.
 The space complexity is O(n) if all pages are decoded into memory. The user is able to decode and release images on demand.

From a9d8d45df435dea202a8e42d4a3f686fae8755a2 Mon Sep 17 00:00:00 2001
From: richard28039 <richard280390@gmail.com>
Date: Thu, 11 Jul 2024 03:15:52 +0800
Subject: [PATCH 30/39] fix the mistake

---
 doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown b/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown
index ce95234f88..8406b4746c 100644
--- a/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown
+++ b/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown
@@ -222,7 +222,7 @@ cd ..
 export OPENCV_TEST_DATA_PATH=$(pwd)
 cd <build directory of OpenCV>
 
-./bin/example_dnn_yolo_detector --model=onnx/models/yolov8n.onnx --yolo=yolov10 --width=640 --height=480  --scale=0.003921568627 --padvalue=114
+./bin/example_dnn_yolo_detector --model=onnx/models/yolov10s.onnx --yolo=yolov10 --width=640 --height=480  --scale=0.003921568627 --padvalue=114
 @endcode
 
 This will run `YOLOv10` detector on first camera found on your system. If you want to run it on a image/video file, you can use `--input` option to specify the path to the file.

From 78195bc3dfe20b96e721ae8b32d0aa3491755e78 Mon Sep 17 00:00:00 2001
From: lamiayous <124199862+lamiayous@users.noreply.github.com>
Date: Fri, 12 Jul 2024 09:38:43 +0100
Subject: [PATCH 31/39] Merge pull request #25817 from
 lamiayous:ly/extend_onnxrt_gapi_backend_handle_i32_i64_type

Handling I32/I64 data types in G-API ONNX back-end #25817

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 .../gapi/src/backends/onnx/gonnxbackend.cpp   | 48 +++++++++++++++----
 1 file changed, 39 insertions(+), 9 deletions(-)

diff --git a/modules/gapi/src/backends/onnx/gonnxbackend.cpp b/modules/gapi/src/backends/onnx/gonnxbackend.cpp
index e465a1ccc7..0d9a16a7bd 100644
--- a/modules/gapi/src/backends/onnx/gonnxbackend.cpp
+++ b/modules/gapi/src/backends/onnx/gonnxbackend.cpp
@@ -123,7 +123,7 @@ class ONNXCompiled {
     std::vector<cv::Mat> out_data;
 
     void Run(const std::vector<cv::Mat>& ins,
-             const std::vector<cv::Mat>& outs);
+             std::vector<cv::Mat>& outs);
 
     std::vector<std::string> in_names_without_const;
 public:
@@ -322,22 +322,20 @@ inline std::vector<int64_t> toORT(const cv::MatSize &sz) {
 inline void preprocess(const cv::Mat& src,
                        const cv::gimpl::onnx::TensorInfo& ti,
                              cv::Mat& dst) {
-    GAPI_Assert(src.depth() == CV_32F || src.depth() == CV_8U);
     // CNN input type
     const auto type = toCV(ti.type);
-    if (src.depth() == CV_32F) {
+    if (src.depth() != CV_8U) {
         // Just pass the tensor as-is.
         // No layout or dimension transformations done here!
         // TODO: This needs to be aligned across all NN backends.
-        GAPI_Assert(type == CV_32F && "Only 32F model input is supported for 32F input data");
         const auto tensor_dims = toORT(src.size);
         if (tensor_dims.size() == ti.dims.size()) {
             for (size_t i = 0; i < ti.dims.size(); ++i) {
                 GAPI_Assert((ti.dims[i] == -1 || ti.dims[i] == tensor_dims[i]) &&
-                            "32F tensor dimensions should match with all non-dynamic NN input dimensions");
+                            "Non-U8 tensor dimensions should match with all non-dynamic NN input dimensions");
             }
         } else {
-            GAPI_Error("32F tensor size should match with NN input");
+            GAPI_Error("Non-U8 tensor size should match with NN input");
         }
 
         dst = src;
@@ -471,6 +469,25 @@ inline Ort::Value createTensor(const Ort::MemoryInfo& memory_info,
         return createTensor<float>(memory_info, tensor_params, data);
     case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32:
         return createTensor<int32_t>(memory_info, tensor_params, data);
+    case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64:{
+        // cv::Mat does not support int64 data directly.
+        // Following steps are applied to create an ONNX tensor from cv::Mat data:
+        // - First create a new ONNX tensor 'i64_tensor' with data type int64_t using the default allocator
+        // - Next retrieve a pointer to the mutable data buffer of 'i64_tensor'
+        // - Convert the data from int32 (see toCV function) to int64 and deep copy it into 'i64_tensor'
+        auto ort_dims = toORT(data.size);
+
+        Ort::AllocatorWithDefaultOptions allocator;
+        Ort::Value i64_tensor = Ort::Value::CreateTensor<int64_t>(allocator,
+                                                                  ort_dims.data(),
+                                                                  ort_dims.size());
+        int64_t* tensor_data = i64_tensor.GetTensorMutableData<int64_t>();
+
+        cv::gimpl::convertInt32ToInt64(data.ptr<int>(),
+                                       tensor_data,
+                                       data.total());
+        return i64_tensor;
+    }
     default:
         GAPI_Error("ONNX. Unsupported data type");
     }
@@ -747,9 +764,11 @@ ONNXCompiled::ONNXCompiled(const gapi::onnx::detail::ParamDesc &pp)
                             in_tensor_info.end(),
                             [](const cv::gimpl::onnx::TensorInfo &p) {
                                 return p.type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
-                                    || p.type == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8;
+                                    || p.type == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8
+                                    || p.type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32
+                                    || p.type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
                             })
-                && "Only FP32 and U8 inputs for NN are supported");
+                && "Only FP32, INT32, INT64 and U8 inputs for NN are supported");
 
     // Put mean and std in appropriate tensor params
     if (!params.mean.empty() || !params.stdev.empty()) {
@@ -864,7 +883,7 @@ cv::Mat ONNXCompiled::allocOutput(int i) const {
 }
 
 void ONNXCompiled::Run(const std::vector<cv::Mat>& ins,
-                       const std::vector<cv::Mat>& outs) {
+                       std::vector<cv::Mat>& outs) {
     std::vector<Ort::Value> in_tensors, out_tensors;
 
     // Layer names order for run
@@ -909,6 +928,17 @@ void ONNXCompiled::Run(const std::vector<cv::Mat>& ins,
                          out_run_names.data(),
                          &out_tensors.front(),
                          params.output_names.size());
+        if (out_tensor_info[0].type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
+            // cv::Mat does not support int64 output data.
+            // Conversion from int 64 to int 32 is carried in the copyFromONNX function
+            // The output is written to out_mat
+            for (auto &&iter : ade::util::zip(ade::util::toRange(out_tensors),
+                                              ade::util::toRange(outs))) {
+                auto &out_tensor = std::get<0>(iter);
+                auto &out_mat = std::get<1>(iter);
+                copyFromONNX(out_tensor, out_mat);
+            }
+        }
     } else {
         // Hard path - run session & user-defined post-processing
         // NOTE: use another list of output names here

From 3ff97c5580dec29d6f9946bc3eabe41ab6f8f93c Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@google.com>
Date: Fri, 12 Jul 2024 13:41:17 +0200
Subject: [PATCH 32/39] Merge pull request #25899 from vrabaud:move_no_except

Mark cv::Mat(Mat&&) as noexcept #25899

This fixes https://github.com/opencv/opencv/issues/25065

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 modules/core/include/opencv2/core/base.hpp | 18 ++++++++++++++++++
 modules/core/include/opencv2/core/mat.hpp  |  2 +-
 modules/core/src/matrix.cpp                |  2 +-
 modules/core/src/system.cpp                |  6 ++++++
 4 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp
index cc4cc0ddd2..d237d57837 100644
--- a/modules/core/include/opencv2/core/base.hpp
+++ b/modules/core/include/opencv2/core/base.hpp
@@ -297,6 +297,21 @@ It is possible to alternate error processing by using redirectError().
  */
 CV_EXPORTS CV_NORETURN void error(int _code, const String& _err, const char* _func, const char* _file, int _line);
 
+/*! @brief Signals an error and terminate application.
+
+By default the function prints information about the error to stderr, then it terminates application
+with std::terminate. The function is designed for invariants check in functions and methods with
+noexcept attribute.
+@param _code - error code (Error::Code)
+@param _err - error description
+@param _func - function name. Available only when the compiler supports getting it
+@param _file - source file name where the error has occurred
+@param _line - line number in the source file where the error has occurred
+@see CV_AssertTerminate
+ */
+CV_EXPORTS CV_NORETURN void terminate(int _code, const String& _err, const char* _func, const char* _file, int _line) CV_NOEXCEPT;
+
+
 #ifdef CV_STATIC_ANALYSIS
 
 // In practice, some macro are not processed correctly (noreturn is not detected).
@@ -338,8 +353,11 @@ for example:
 The macros CV_Assert (and CV_DbgAssert(expr)) evaluate the specified expression. If it is 0, the macros
 raise an error (see cv::error). The macro CV_Assert checks the condition in both Debug and Release
 configurations while CV_DbgAssert is only retained in the Debug configuration.
+CV_AssertTerminate is analog of CV_Assert for invariants check in functions with noexcept attribute.
+It does not throw exception, but terminates the application.
 */
 #define CV_Assert( expr ) do { if(!!(expr)) ; else cv::error( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ ); } while(0)
+#define CV_AssertTerminate( expr ) do { if(!!(expr)) ; else cv::terminate( #expr, CV_Func, __FILE__, __LINE__ ); } while(0)
 
 #endif // CV_STATIC_ANALYSIS
 
diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp
index 2bfb0966c2..7af3c9cfc4 100644
--- a/modules/core/include/opencv2/core/mat.hpp
+++ b/modules/core/include/opencv2/core/mat.hpp
@@ -2119,7 +2119,7 @@ public:
     /** @overload */
     template<typename _Tp, typename Functor> void forEach(const Functor& operation) const;
 
-    Mat(Mat&& m);
+    Mat(Mat&& m) CV_NOEXCEPT;
     Mat& operator = (Mat&& m);
 
     enum { MAGIC_VAL  = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG };
diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp
index 0701542dfd..1b11e12145 100644
--- a/modules/core/src/matrix.cpp
+++ b/modules/core/src/matrix.cpp
@@ -595,7 +595,7 @@ size_t Mat::total(int startDim, int endDim) const
 }
 
 
-Mat::Mat(Mat&& m)
+Mat::Mat(Mat&& m) CV_NOEXCEPT
     : flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), data(m.data),
       datastart(m.datastart), dataend(m.dataend), datalimit(m.datalimit), allocator(m.allocator),
       u(m.u), size(&rows)
diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp
index c02944079e..8227175b6a 100644
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@@ -1316,6 +1316,12 @@ redirectError( ErrorCallback errCallback, void* userdata, void** prevUserdata)
     return prevCallback;
 }
 
+void terminate(int _code, const String& _err, const char* _func, const char* _file, int _line) CV_NOEXCEPT
+{
+    dumpException(cv::Exception(_code, _err, _func, _file, _line));
+    std::terminate();
+}
+
 }
 
 CV_IMPL int cvCheckHardwareSupport(int feature)

From 15783d65981d8978597c6b60e830e21e964cbdf9 Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <2536374+asmorkalov@users.noreply.github.com>
Date: Fri, 12 Jul 2024 15:03:33 +0300
Subject: [PATCH 33/39] Merge pull request #25792 from
 asmorkalov:as/HAL_fast_GaussianBlur

Added flag to GaussianBlur for faster but not bit-exact implementation #25792

Rationale:
Current implementation of GaussianBlur is almost always bit-exact. It helps to get predictable results according platforms, but prohibits most of approximations and optimization tricks.

The patch converts `borderType` parameter to more generic `flags` and introduces `GAUSS_ALLOW_APPROXIMATIONS` flag to allow not bit-exact implementation. With the flag IPP and generic HAL implementation are called first. The flag naming and location is a subject for discussion.

Replaces https://github.com/opencv/opencv/pull/22073
Possibly related issue: https://github.com/opencv/opencv/issues/24135

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 CMakeLists.txt                                |  6 +-
 .../config_reference.markdown                 |  1 +
 modules/core/CMakeLists.txt                   |  4 ++
 modules/core/include/opencv2/core.hpp         | 12 ++++
 modules/core/src/system.cpp                   |  9 +++
 modules/imgproc/include/opencv2/imgproc.hpp   |  4 +-
 modules/imgproc/src/smooth.dispatch.cpp       | 58 ++++++++++++++++---
 modules/imgproc/test/test_smooth_bitexact.cpp | 56 +++++++++++++++++-
 modules/python/test/test_misc.py              |  4 ++
 modules/ts/src/ts.cpp                         |  1 +
 10 files changed, 143 insertions(+), 12 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 30b205ecd8..c196d0f2be 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1258,7 +1258,11 @@ if(CMAKE_GENERATOR MATCHES "Xcode|Visual Studio|Multi-Config")
 else()
   status("    Configuration:"  ${CMAKE_BUILD_TYPE})
 endif()
-
+if(DEFINED OPENCV_ALGO_HINT_DEFAULT)
+  status("    Algorithm Hint:"  ${OPENCV_ALGO_HINT_DEFAULT})
+else()
+  status("    Algorithm Hint:" " ALGO_ACCURATE")
+endif()
 
 # ========================= CPU code generation mode =========================
 status("")
diff --git a/doc/tutorials/introduction/config_reference/config_reference.markdown b/doc/tutorials/introduction/config_reference/config_reference.markdown
index 7ced9a2536..e43b8793e5 100644
--- a/doc/tutorials/introduction/config_reference/config_reference.markdown
+++ b/doc/tutorials/introduction/config_reference/config_reference.markdown
@@ -217,6 +217,7 @@ Following options can be used to produce special builds with instrumentation or
 | `ENABLE_BUILD_HARDENING` | GCC, Clang, MSVC | Enable compiler options which reduce possibility of code exploitation.  |
 | `ENABLE_LTO` | GCC, Clang, MSVC | Enable Link Time Optimization (LTO). |
 | `ENABLE_THIN_LTO` | Clang | Enable thin LTO which incorporates intermediate bitcode to binaries allowing consumers optimize their applications later. |
+| `OPENCV_ALGO_HINT_DEFAULT` | Any | Set default OpenCV implementation hint value: `ALGO_ACCURATE` or `ALGO_APROX`. Dangerous! The option  changes behaviour globally and may affect accuracy of many algorithms. |
 
 @see [GCC instrumentation](https://gcc.gnu.org/onlinedocs/gcc/Instrumentation-Options.html)
 @see [Build hardening](https://en.wikipedia.org/wiki/Hardening_(computing))
diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt
index 16f32c994a..ea1100c954 100644
--- a/modules/core/CMakeLists.txt
+++ b/modules/core/CMakeLists.txt
@@ -186,6 +186,10 @@ if(OPENCV_SEMIHOSTING)
   ocv_target_compile_definitions(${the_module} PRIVATE "-DOPENCV_SEMIHOSTING")
 endif(OPENCV_SEMIHOSTING)
 
+if(DEFINED OPENCV_ALGO_HINT_DEFAULT)
+  ocv_target_compile_definitions(${the_module} PRIVATE "-DOPENCV_ALGO_HINT_DEFAULT=${OPENCV_ALGO_HINT_DEFAULT}")
+endif(DEFINED OPENCV_ALGO_HINT_DEFAULT)
+
 if(HAVE_HPX)
   ocv_target_link_libraries(${the_module} LINK_PRIVATE "${HPX_LIBRARIES}")
 endif()
diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp
index b58a3a6ccb..4bfb95fede 100644
--- a/modules/core/include/opencv2/core.hpp
+++ b/modules/core/include/opencv2/core.hpp
@@ -150,6 +150,18 @@ It is possible to alternate error processing by using #redirectError().
  */
 CV_EXPORTS CV_NORETURN void error(const Exception& exc);
 
+/*! @brief Flags that allow to midify some functions behavior. Used as set of flags.
+*/
+enum AlgorithmHint {
+    ALGO_DEFAULT = 0, //!< Default algorithm behaviour defined during OpenCV build
+    ALGO_ACCURATE = 1, //!< Use generic portable implementation
+    ALGO_APPROX = 2, //!< Allow alternative approximations to get faster implementation. Behaviour and result depends on a platform
+};
+
+/*! @brief Returns ImplementationHint selected by default, a.k.a. `IMPL_DEFAULT` defined during OpenCV compilation.
+ */
+CV_EXPORTS_W AlgorithmHint getDefaultAlgorithmHint();
+
 enum SortFlags { SORT_EVERY_ROW    = 0, //!< each matrix row is sorted independently
                  SORT_EVERY_COLUMN = 1, //!< each matrix column is sorted
                                         //!< independently; this flag and the previous one are
diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp
index 8227175b6a..eccef84c92 100644
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@@ -46,6 +46,7 @@
 #include <iostream>
 #include <ostream>
 
+#include <opencv2/core.hpp>
 #include <opencv2/core/utils/configuration.private.hpp>
 #include <opencv2/core/utils/trace.private.hpp>
 
@@ -2888,6 +2889,14 @@ bool restoreFPDenormalsState(const FPDenormalsModeState& state)
 
 }  // namespace details
 
+AlgorithmHint getDefaultAlgorithmHint()
+{
+#ifdef OPENCV_ALGO_HINT_DEFAULT
+    return OPENCV_ALGO_HINT_DEFAULT;
+#else
+    return ALGO_ACCURATE;
+#endif
+};
 
 } // namespace cv
 
diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp
index 2f3c6f344f..53ff5ea6bd 100644
--- a/modules/imgproc/include/opencv2/imgproc.hpp
+++ b/modules/imgproc/include/opencv2/imgproc.hpp
@@ -1536,12 +1536,14 @@ respectively (see #getGaussianKernel for details); to fully control the result r
 possible future modifications of all this semantics, it is recommended to specify all of ksize,
 sigmaX, and sigmaY.
 @param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported.
+@param hint Implementation modfication flags. See #AlgorithmHint
 
 @sa  sepFilter2D, filter2D, blur, boxFilter, bilateralFilter, medianBlur
  */
 CV_EXPORTS_W void GaussianBlur( InputArray src, OutputArray dst, Size ksize,
                                 double sigmaX, double sigmaY = 0,
-                                int borderType = BORDER_DEFAULT );
+                                int borderType = BORDER_DEFAULT,
+                                AlgorithmHint hint = cv::ALGO_DEFAULT );
 
 /** @brief Applies the bilateral filter to an image.
 
diff --git a/modules/imgproc/src/smooth.dispatch.cpp b/modules/imgproc/src/smooth.dispatch.cpp
index d0f50a73bb..6bc989e520 100644
--- a/modules/imgproc/src/smooth.dispatch.cpp
+++ b/modules/imgproc/src/smooth.dispatch.cpp
@@ -468,7 +468,7 @@ static bool openvx_gaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
 
 #endif
 
-#if defined ENABLE_IPP_GAUSSIAN_BLUR  // see CMake's OPENCV_IPP_GAUSSIAN_BLUR option
+#ifdef ENABLE_IPP_GAUSSIAN_BLUR  // see CMake's OPENCV_IPP_GAUSSIAN_BLUR option
 
 #define IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH 1
 #define IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH 1
@@ -526,14 +526,14 @@ private:
 
 #endif
 
-static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
+static bool ipp_GaussianBlur(cv::Mat& src, cv::Mat& dst, Size ksize,
                    double sigma1, double sigma2, int borderType )
 {
 #ifdef HAVE_IPP_IW
     CV_INSTRUMENT_REGION_IPP();
 
 #if IPP_VERSION_X100 < 201800 && ((defined _MSC_VER && defined _M_IX86) || (defined __GNUC__ && defined __i386__))
-    CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
+    CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
     return false; // bug on ia32
 #else
     if(sigma1 != sigma2)
@@ -548,8 +548,6 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
     // Acquire data and begin processing
     try
     {
-        Mat src = _src.getMat();
-        Mat dst = _dst.getMat();
         ::ipp::IwiImage       iwSrc      = ippiGetImage(src);
         ::ipp::IwiImage       iwDst      = ippiGetImage(dst);
         ::ipp::IwiBorderSize  borderSize = ::ipp::iwiSizeToBorderSize(ippiGetSize(ksize));
@@ -589,7 +587,7 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
     return true;
 #endif
 #else
-    CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
+    CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
     return false;
 #endif
 }
@@ -610,10 +608,13 @@ static bool validateGaussianBlurKernel(std::vector<T>& kernel)
 
 void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
                   double sigma1, double sigma2,
-                  int borderType)
+                  int borderType, AlgorithmHint hint)
 {
     CV_INSTRUMENT_REGION();
 
+    if (hint == cv::ALGO_DEFAULT)
+        hint = cv::getDefaultAlgorithmHint();
+
     CV_Assert(!_src.empty());
 
     int type = _src.type();
@@ -693,7 +694,27 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
                     src2.locateROI( wsz, ofs );
 
                 CALL_HAL(gaussianBlurBinomial, cv_hal_gaussianBlurBinomial, src2.ptr(), src2.step, dst.ptr(), dst.step, src2.cols, src2.rows, sdepth, cn,
-                         ofs.x, ofs.y, wsz.width - src2.cols - ofs.x,  wsz.height - src2.rows - ofs.y, ksize.width, borderType&~BORDER_ISOLATED);
+                         ofs.x, ofs.y, wsz.width - src2.cols - ofs.x,  wsz.height - src2.rows - ofs.y, ksize.width,
+                         borderType & ~BORDER_ISOLATED);
+            }
+
+            if (hint == ALGO_APPROX)
+            {
+                Point ofs;
+                Size wsz(src.cols, src.rows);
+                if(!(borderType & BORDER_ISOLATED))
+                    src.locateROI( wsz, ofs );
+
+                CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn,
+                        ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height,
+                        sigma1, sigma2, borderType & ~BORDER_ISOLATED);
+
+#ifdef ENABLE_IPP_GAUSSIAN_BLUR
+                // IPP is not bit-exact to OpenCV implementation
+                CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType));
+#endif
+                CV_OVX_RUN(true,
+                        openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType))
             }
 
             CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint16_t*)&fkx[0], (int)fkx.size(), (const uint16_t*)&fky[0], (int)fky.size(), borderType),
@@ -747,6 +768,25 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
                          ofs.x, ofs.y, wsz.width - src2.cols - ofs.x,  wsz.height - src2.rows - ofs.y, ksize.width, borderType&~BORDER_ISOLATED);
             }
 
+            if (hint == ALGO_APPROX)
+            {
+                Point ofs;
+                Size wsz(src.cols, src.rows);
+                if(!(borderType & BORDER_ISOLATED))
+                    src.locateROI( wsz, ofs );
+
+                CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn,
+                        ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height,
+                        sigma1, sigma2, borderType & ~BORDER_ISOLATED);
+
+#ifdef ENABLE_IPP_GAUSSIAN_BLUR
+                // IPP is not bit-exact to OpenCV implementation
+                CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType));
+#endif
+                CV_OVX_RUN(true,
+                        openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType))
+            }
+
             CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint32_t*)&fkx[0], (int)fkx.size(), (const uint32_t*)&fky[0], (int)fky.size(), borderType),
                 CV_CPU_DISPATCH_MODES_ALL);
 
@@ -772,7 +812,7 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
 
     CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn,
              ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height,
-             sigma1, sigma2, borderType&~BORDER_ISOLATED);
+             sigma1, sigma2, borderType & ~BORDER_ISOLATED);
 
     CV_OVX_RUN(true,
                openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType))
diff --git a/modules/imgproc/test/test_smooth_bitexact.cpp b/modules/imgproc/test/test_smooth_bitexact.cpp
index d4ae2af833..2d1f7b5a4e 100644
--- a/modules/imgproc/test/test_smooth_bitexact.cpp
+++ b/modules/imgproc/test/test_smooth_bitexact.cpp
@@ -244,7 +244,7 @@ static void checkGaussianBlur_8Uvs32F(const Mat& src8u, const Mat& src32f, int N
 TEST(GaussianBlur_Bitexact, regression_9863)
 {
     Mat src8u = imread(cvtest::findDataFile("shared/lena.png"));
-     Mat src32f; src8u.convertTo(src32f, CV_32F);
+    Mat src32f; src8u.convertTo(src32f, CV_32F);
 
     checkGaussianBlur_8Uvs32F(src8u, src32f, 151, 30);
 }
@@ -260,4 +260,58 @@ TEST(GaussianBlur_Bitexact, overflow_20792)
     EXPECT_GT(count, nintyPercent);
 }
 
+CV_ENUM(GaussInputType, CV_8U, CV_16S);
+CV_ENUM(GaussBorder, BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT_101);
+
+struct GaussianBlurVsBitexact: public testing::TestWithParam<tuple<GaussInputType, int, double, GaussBorder>>
+{
+    virtual void SetUp()
+    {
+        orig = imread(findDataFile("shared/lena.png"));
+        EXPECT_FALSE(orig.empty()) << "Cannot find test image shared/lena.png";
+    }
+
+    Mat orig;
+};
+
+// NOTE: The test was designed for IPP (-DOPENCV_IPP_GAUSSIAN_BLUR=ON)
+// Should be extended after new HAL integration
+TEST_P(GaussianBlurVsBitexact, approx)
+{
+    auto testParams = GetParam();
+    int dtype = get<0>(testParams);
+    int ksize = get<1>(testParams);
+    double sigma = get<2>(testParams);
+    int border = get<3>(testParams);
+
+    Mat src;
+    orig.convertTo(src, dtype);
+
+    cv::Mat gt;
+    GaussianBlur(src, gt, Size(ksize, ksize), sigma, sigma, border, ALGO_ACCURATE);
+
+    cv::Mat dst;
+    GaussianBlur(src, dst, Size(ksize, ksize), sigma, sigma, border, ALGO_APPROX);
+
+    cv::Mat diff;
+    cv::absdiff(dst, gt, diff);
+    cv::Mat flatten_diff = diff.reshape(1, diff.rows);
+
+    int nz = countNonZero(flatten_diff);
+    EXPECT_LE(nz, 0.06*src.total()); // Less 6% of different pixels
+
+    double min_val, max_val;
+    minMaxLoc(flatten_diff, &min_val, &max_val);
+    EXPECT_LE(max_val, 2); // expectes results floating +-1
+}
+
+INSTANTIATE_TEST_CASE_P(/*nothing*/, GaussianBlurVsBitexact,
+    testing::Combine(
+        GaussInputType::all(),
+        testing::Values(3, 5, 7),
+        testing::Values(0.75, 1.25),
+        GaussBorder::all()
+    )
+);
+
 }} // namespace
diff --git a/modules/python/test/test_misc.py b/modules/python/test/test_misc.py
index 08ab04d53d..ac2b02f875 100644
--- a/modules/python/test/test_misc.py
+++ b/modules/python/test/test_misc.py
@@ -987,6 +987,10 @@ class SamplesFindFile(NewOpenCVTests):
         except cv.error as _e:
             pass
 
+class AlgorithmImplHit(NewOpenCVTests):
+    def test_callable(self):
+        res = cv.getDefaultAlgorithmHint()
+        self.assertTrue(res is not None)
 
 if __name__ == '__main__':
     NewOpenCVTests.bootstrap()
diff --git a/modules/ts/src/ts.cpp b/modules/ts/src/ts.cpp
index fb60a18ff1..9fe5cb3937 100644
--- a/modules/ts/src/ts.cpp
+++ b/modules/ts/src/ts.cpp
@@ -1126,6 +1126,7 @@ void SystemInfoCollector::OnTestProgramStart(const testing::UnitTest&)
     recordPropertyVerbose("cv_vcs_version", "OpenCV VCS version", getSnippetFromConfig("Version control:", "\n"));
     recordPropertyVerbose("cv_build_type", "Build type", getSnippetFromConfig("Configuration:", "\n"), CV_TEST_BUILD_CONFIG);
     recordPropertyVerbose("cv_compiler", "Compiler", getSnippetFromConfig("C++ Compiler:", "\n"));
+    recordPropertyVerbose("implementation_hint", "Algorithm hint", getSnippetFromConfig("Algorithm Hint:", "\n"));
     const char* parallelFramework = cv::currentParallelFramework();
     if (parallelFramework)
     {

From e906f0f3b343a1db6ec7f943389e4d5c2443e093 Mon Sep 17 00:00:00 2001
From: Kumataro <Kumataro@users.noreply.github.com>
Date: Sat, 13 Jul 2024 11:16:45 +0900
Subject: [PATCH 34/39] core: hal: disable _tzcnt_u32 for ARM64EC

---
 modules/core/include/opencv2/core/hal/intrin.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp
index 9a95b3a67b..a57eb5e799 100644
--- a/modules/core/include/opencv2/core/hal/intrin.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin.hpp
@@ -64,7 +64,7 @@
 namespace {
 inline unsigned int trailingZeros32(unsigned int value) {
 #if defined(_MSC_VER)
-#if (_MSC_VER < 1700) || defined(_M_ARM) || defined(_M_ARM64)
+#if (_MSC_VER < 1700) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC)
     unsigned long index = 0;
     _BitScanForward(&index, value);
     return (unsigned int)index;

From 04f9e3cd4f34f2e3eb5459bdfdfbd59a21406c91 Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@xperience.ai>
Date: Mon, 15 Jul 2024 12:59:12 +0300
Subject: [PATCH 35/39] Restored removed test_round_pair_f64 test afetr PR
 24941.

---
 modules/core/test/test_intrin_utils.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp
index 742136f84c..ad8faf7bfb 100644
--- a/modules/core/test/test_intrin_utils.hpp
+++ b/modules/core/test/test_intrin_utils.hpp
@@ -2246,6 +2246,7 @@ void test_hal_intrin_float64()
         .test_mask()
         .test_unpack()
         .test_float_math()
+        .test_round_pair_f64()
         .test_float_cvt32()
         .test_reverse()
         .test_extract<0>().test_extract<1>()

From a6b8ea892bdd619f658c0380a317c11ea206ffdc Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@xperience.ai>
Date: Mon, 15 Jul 2024 11:28:23 +0300
Subject: [PATCH 36/39] Post-merge fixes for algorithm hint API.

---
 CMakeLists.txt                                   |  2 +-
 .../config_reference/config_reference.markdown   |  2 +-
 modules/core/include/opencv2/core.hpp            | 12 ------------
 modules/core/include/opencv2/core/utility.hpp    | 12 ++++++++++++
 modules/core/src/system.cpp                      |  3 +--
 modules/imgproc/include/opencv2/imgproc.hpp      |  2 +-
 modules/imgproc/src/smooth.dispatch.cpp          |  6 +++---
 modules/imgproc/test/test_smooth_bitexact.cpp    | 16 ++++------------
 8 files changed, 23 insertions(+), 32 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c196d0f2be..e1dd4e5cd7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1261,7 +1261,7 @@ endif()
 if(DEFINED OPENCV_ALGO_HINT_DEFAULT)
   status("    Algorithm Hint:"  ${OPENCV_ALGO_HINT_DEFAULT})
 else()
-  status("    Algorithm Hint:" " ALGO_ACCURATE")
+  status("    Algorithm Hint:" " ALGO_HINT_ACCURATE")
 endif()
 
 # ========================= CPU code generation mode =========================
diff --git a/doc/tutorials/introduction/config_reference/config_reference.markdown b/doc/tutorials/introduction/config_reference/config_reference.markdown
index e43b8793e5..ef00ca5356 100644
--- a/doc/tutorials/introduction/config_reference/config_reference.markdown
+++ b/doc/tutorials/introduction/config_reference/config_reference.markdown
@@ -217,7 +217,7 @@ Following options can be used to produce special builds with instrumentation or
 | `ENABLE_BUILD_HARDENING` | GCC, Clang, MSVC | Enable compiler options which reduce possibility of code exploitation.  |
 | `ENABLE_LTO` | GCC, Clang, MSVC | Enable Link Time Optimization (LTO). |
 | `ENABLE_THIN_LTO` | Clang | Enable thin LTO which incorporates intermediate bitcode to binaries allowing consumers optimize their applications later. |
-| `OPENCV_ALGO_HINT_DEFAULT` | Any | Set default OpenCV implementation hint value: `ALGO_ACCURATE` or `ALGO_APROX`. Dangerous! The option  changes behaviour globally and may affect accuracy of many algorithms. |
+| `OPENCV_ALGO_HINT_DEFAULT` | Any | Set default OpenCV implementation hint value: `ALGO_HINT_ACCURATE` or `ALGO_HINT_APROX`. Dangerous! The option  changes behaviour globally and may affect accuracy of many algorithms. |
 
 @see [GCC instrumentation](https://gcc.gnu.org/onlinedocs/gcc/Instrumentation-Options.html)
 @see [Build hardening](https://en.wikipedia.org/wiki/Hardening_(computing))
diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp
index 4bfb95fede..b58a3a6ccb 100644
--- a/modules/core/include/opencv2/core.hpp
+++ b/modules/core/include/opencv2/core.hpp
@@ -150,18 +150,6 @@ It is possible to alternate error processing by using #redirectError().
  */
 CV_EXPORTS CV_NORETURN void error(const Exception& exc);
 
-/*! @brief Flags that allow to midify some functions behavior. Used as set of flags.
-*/
-enum AlgorithmHint {
-    ALGO_DEFAULT = 0, //!< Default algorithm behaviour defined during OpenCV build
-    ALGO_ACCURATE = 1, //!< Use generic portable implementation
-    ALGO_APPROX = 2, //!< Allow alternative approximations to get faster implementation. Behaviour and result depends on a platform
-};
-
-/*! @brief Returns ImplementationHint selected by default, a.k.a. `IMPL_DEFAULT` defined during OpenCV compilation.
- */
-CV_EXPORTS_W AlgorithmHint getDefaultAlgorithmHint();
-
 enum SortFlags { SORT_EVERY_ROW    = 0, //!< each matrix row is sorted independently
                  SORT_EVERY_COLUMN = 1, //!< each matrix column is sorted
                                         //!< independently; this flag and the previous one are
diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp
index e491352bcf..5eed688c83 100644
--- a/modules/core/include/opencv2/core/utility.hpp
+++ b/modules/core/include/opencv2/core/utility.hpp
@@ -544,6 +544,18 @@ bool isAligned(const void* p1, const void* p2, const void* p3, const void* p4)
     return isAligned<N>(((size_t)p1)|((size_t)p2)|((size_t)p3)|((size_t)p4));
 }
 
+/*! @brief Flags that allow to midify some functions behavior. Used as set of flags.
+*/
+enum AlgorithmHint {
+    ALGO_HINT_DEFAULT = 0, //!< Default algorithm behaviour defined during OpenCV build
+    ALGO_HINT_ACCURATE = 1, //!< Use generic portable implementation
+    ALGO_HINT_APPROX = 2, //!< Allow alternative approximations to get faster implementation. Behaviour and result depends on a platform
+};
+
+/*! @brief Returns AlgorithmHint defined during OpenCV compilation. Defines #ALGO_HINT_DEFAULT behavior.
+ */
+CV_EXPORTS_W AlgorithmHint getDefaultAlgorithmHint();
+
 /** @brief Enables or disables the optimized code.
 
 The function can be used to dynamically turn on and off optimized dispatched code (code that uses SSE4.2, AVX/AVX2,
diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp
index eccef84c92..8d72d64f87 100644
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@@ -46,7 +46,6 @@
 #include <iostream>
 #include <ostream>
 
-#include <opencv2/core.hpp>
 #include <opencv2/core/utils/configuration.private.hpp>
 #include <opencv2/core/utils/trace.private.hpp>
 
@@ -2894,7 +2893,7 @@ AlgorithmHint getDefaultAlgorithmHint()
 #ifdef OPENCV_ALGO_HINT_DEFAULT
     return OPENCV_ALGO_HINT_DEFAULT;
 #else
-    return ALGO_ACCURATE;
+    return ALGO_HINT_ACCURATE;
 #endif
 };
 
diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp
index 53ff5ea6bd..4456b3a88c 100644
--- a/modules/imgproc/include/opencv2/imgproc.hpp
+++ b/modules/imgproc/include/opencv2/imgproc.hpp
@@ -1543,7 +1543,7 @@ sigmaX, and sigmaY.
 CV_EXPORTS_W void GaussianBlur( InputArray src, OutputArray dst, Size ksize,
                                 double sigmaX, double sigmaY = 0,
                                 int borderType = BORDER_DEFAULT,
-                                AlgorithmHint hint = cv::ALGO_DEFAULT );
+                                AlgorithmHint hint = cv::ALGO_HINT_DEFAULT );
 
 /** @brief Applies the bilateral filter to an image.
 
diff --git a/modules/imgproc/src/smooth.dispatch.cpp b/modules/imgproc/src/smooth.dispatch.cpp
index 6bc989e520..f7dafbd956 100644
--- a/modules/imgproc/src/smooth.dispatch.cpp
+++ b/modules/imgproc/src/smooth.dispatch.cpp
@@ -612,7 +612,7 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
 {
     CV_INSTRUMENT_REGION();
 
-    if (hint == cv::ALGO_DEFAULT)
+    if (hint == cv::ALGO_HINT_DEFAULT)
         hint = cv::getDefaultAlgorithmHint();
 
     CV_Assert(!_src.empty());
@@ -698,7 +698,7 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
                          borderType & ~BORDER_ISOLATED);
             }
 
-            if (hint == ALGO_APPROX)
+            if (hint == ALGO_HINT_APPROX)
             {
                 Point ofs;
                 Size wsz(src.cols, src.rows);
@@ -768,7 +768,7 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
                          ofs.x, ofs.y, wsz.width - src2.cols - ofs.x,  wsz.height - src2.rows - ofs.y, ksize.width, borderType&~BORDER_ISOLATED);
             }
 
-            if (hint == ALGO_APPROX)
+            if (hint == ALGO_HINT_APPROX)
             {
                 Point ofs;
                 Size wsz(src.cols, src.rows);
diff --git a/modules/imgproc/test/test_smooth_bitexact.cpp b/modules/imgproc/test/test_smooth_bitexact.cpp
index 2d1f7b5a4e..4546296877 100644
--- a/modules/imgproc/test/test_smooth_bitexact.cpp
+++ b/modules/imgproc/test/test_smooth_bitexact.cpp
@@ -288,21 +288,13 @@ TEST_P(GaussianBlurVsBitexact, approx)
     orig.convertTo(src, dtype);
 
     cv::Mat gt;
-    GaussianBlur(src, gt, Size(ksize, ksize), sigma, sigma, border, ALGO_ACCURATE);
+    GaussianBlur(src, gt, Size(ksize, ksize), sigma, sigma, border, ALGO_HINT_ACCURATE);
 
     cv::Mat dst;
-    GaussianBlur(src, dst, Size(ksize, ksize), sigma, sigma, border, ALGO_APPROX);
+    GaussianBlur(src, dst, Size(ksize, ksize), sigma, sigma, border, ALGO_HINT_APPROX);
 
-    cv::Mat diff;
-    cv::absdiff(dst, gt, diff);
-    cv::Mat flatten_diff = diff.reshape(1, diff.rows);
-
-    int nz = countNonZero(flatten_diff);
-    EXPECT_LE(nz, 0.06*src.total()); // Less 6% of different pixels
-
-    double min_val, max_val;
-    minMaxLoc(flatten_diff, &min_val, &max_val);
-    EXPECT_LE(max_val, 2); // expectes results floating +-1
+    EXPECT_LE(cvtest::norm(dst, gt, NORM_INF), 1);
+    EXPECT_LE(cvtest::norm(dst, gt, NORM_L1 | NORM_RELATIVE), 0.06); // Less 6% of different pixels
 }
 
 INSTANTIATE_TEST_CASE_P(/*nothing*/, GaussianBlurVsBitexact,

From e90935e81cd83973af7b5cb15c770a210c565e10 Mon Sep 17 00:00:00 2001
From: j3knk <96484997+j3knk@users.noreply.github.com>
Date: Mon, 15 Jul 2024 14:10:08 +0200
Subject: [PATCH 37/39] Merge pull request #25824 from
 j3knk:calib3d/fix_projectpoints

calib3d: fix Rodrigues CV_32F and CV_64F type mismatch in projectPoints #25824

Fixes #25318

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 .../misc/python/test/test_calibration.py        | 17 ++++++++++++++++-
 modules/calib3d/src/calibration.cpp             | 10 ++++++----
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/modules/calib3d/misc/python/test/test_calibration.py b/modules/calib3d/misc/python/test/test_calibration.py
index f2cf4cc493..c3093ce2ef 100644
--- a/modules/calib3d/misc/python/test/test_calibration.py
+++ b/modules/calib3d/misc/python/test/test_calibration.py
@@ -67,7 +67,22 @@ class calibration_test(NewOpenCVTests):
         self.assertLess(cv.norm(camera_matrix - cameraMatrixTest, cv.NORM_L1), normCamEps)
         self.assertLess(cv.norm(dist_coefs - distCoeffsTest, cv.NORM_L1), normDistEps)
 
-
+    def test_projectPoints(self):
+        objectPoints = np.array([[181.24588 ,  87.80361 ,  11.421074],
+            [ 87.17948 , 184.75563 ,  37.223446],
+            [ 22.558456,  45.495266, 246.05797 ]], dtype=np.float32)
+        rvec = np.array([[ 0.9357548 , -0.28316498,  0.21019171],
+            [ 0.30293274,  0.9505806 , -0.06803132],
+            [-0.18054008,  0.12733458,  0.9752903 ]], dtype=np.float32)
+        tvec = np.array([ 69.32692 ,  17.602057, 135.77672 ], dtype=np.float32)
+        cameraMatrix = np.array([[214.0047  ,  26.98735 , 253.37799 ],
+            [189.8172  ,  10.038101,  18.862494],
+            [114.07123 , 200.87277 , 194.56332 ]], dtype=np.float32)
+        distCoeffs = distCoeffs = np.zeros((4, 1), dtype=np.float32)
+
+        imagePoints, jacobian = cv.projectPoints(objectPoints, rvec, tvec, cameraMatrix, distCoeffs)
+        self.assertTrue(imagePoints is not None)
+        self.assertTrue(jacobian is not None)
 
 if __name__ == '__main__':
     NewOpenCVTests.bootstrap()
diff --git a/modules/calib3d/src/calibration.cpp b/modules/calib3d/src/calibration.cpp
index c428da6bd5..9729cb7bea 100644
--- a/modules/calib3d/src/calibration.cpp
+++ b/modules/calib3d/src/calibration.cpp
@@ -538,11 +538,11 @@ static void cvProjectPoints2Internal( const CvMat* objectPoints,
     int calc_derivatives;
     const CvPoint3D64f* M;
     CvPoint2D64f* m;
-    double r[3], R[9], dRdr[27], t[3], a[9], k[14] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0}, fx, fy, cx, cy;
+    double r[3], R[9], R_vec[9], dRdr[27], t[3], a[9], k[14] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0}, fx, fy, cx, cy;
     Matx33d matTilt = Matx33d::eye();
     Matx33d dMatTiltdTauX(0,0,0,0,0,0,0,-1,0);
     Matx33d dMatTiltdTauY(0,0,0,0,0,0,1,0,0);
-    CvMat _r, _t, _a = cvMat( 3, 3, CV_64F, a ), _k;
+    CvMat _r, _r_vec, _t, _a = cvMat( 3, 3, CV_64F, a ), _k;
     CvMat matR = cvMat( 3, 3, CV_64F, R ), _dRdr = cvMat( 3, 9, CV_64F, dRdr );
     double *dpdr_p = 0, *dpdt_p = 0, *dpdk_p = 0, *dpdf_p = 0, *dpdc_p = 0;
     double* dpdo_p = 0;
@@ -593,9 +593,11 @@ static void cvProjectPoints2Internal( const CvMat* objectPoints,
     if( r_vec->rows == 3 && r_vec->cols == 3 )
     {
         _r = cvMat( 3, 1, CV_64FC1, r );
-        cvRodrigues2( r_vec, &_r );
+        _r_vec = cvMat( r_vec->rows, r_vec->cols, CV_MAKETYPE(CV_64F,CV_MAT_CN(r_vec->type)), R_vec );
+        cvConvert( r_vec, &_r_vec );
+        cvRodrigues2( &_r_vec, &_r );
         cvRodrigues2( &_r, &matR, &_dRdr );
-        cvCopy( r_vec, &matR );
+        cvCopy( &_r_vec, &matR );
     }
     else
     {

From c53c2f68446b485e79814388c1a55d2f06591f95 Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@xperience.ai>
Date: Mon, 15 Jul 2024 16:11:27 +0300
Subject: [PATCH 38/39] Use CV_LOG_DEBUG for debug logging in chessboard
 detector.

---
 modules/calib3d/src/chessboard.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/calib3d/src/chessboard.cpp b/modules/calib3d/src/chessboard.cpp
index ffbddadf40..bded912985 100644
--- a/modules/calib3d/src/chessboard.cpp
+++ b/modules/calib3d/src/chessboard.cpp
@@ -1657,7 +1657,7 @@ bool Chessboard::Board::normalizeMarkerOrientation()
         //check for ambiguity
         if(rowCount()-pcell->bottom->getRow() > 2)
         {
-           // std::cout << "FIX board " << pcell->bottom->getRow() << " " << rowCount();
+            CV_LOG_DEBUG(NULL, "FIX board " << pcell->bottom->getRow() << " " << rowCount());
             flipVertical();
             rotateRight();
         }
@@ -2259,7 +2259,7 @@ int Chessboard::Board::detectMarkers(cv::InputArray image)
                 cell->marker = noise-signal > (noise-reference)*0.5;
             if(cell->marker)
                 count++;
-            // std::cout << x << "/" << y << " signal " << signal << " noise " << noise << " reference " << reference  << " has marker " << int(cell->marker) << std::endl;
+            CV_LOG_DEBUG(NULL, "Cell: " << x << "/" << y << " signal " << signal << " noise " << noise << " reference " << reference  << " has marker " << int(cell->marker));
         }
     }
     return count;
@@ -3373,7 +3373,7 @@ cv::Scalar Chessboard::Board::calcEdgeSharpness(cv::InputArray _img,float rise_d
     }
     if(count == 0)
     {
-        std::cout  <<"calcEdgeSharpness: checkerboard too small for calculation." << std::endl;
+        CV_LOG_DEBUG(NULL, "calcEdgeSharpness: checkerboard too small for calculation.");
         return cv::Scalar::all(9999);
     }
     sharpness = sharpness/float(count);

From 4842043c6afdd596eabd116d4180520e4392c815 Mon Sep 17 00:00:00 2001
From: Yoshiki Obinata <27789460+mqcmd196@users.noreply.github.com>
Date: Mon, 15 Jul 2024 23:06:30 +0900
Subject: [PATCH 39/39] Merge pull request #25822 from mqcmd196:gtk3-gl-support

Support OpenGL GTK3 New API #25822

Fixes #20001

GSoC2024 Project

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 CMakeLists.txt                               |   5 +-
 cmake/OpenCVFindLibsGUI.cmake                |   2 +-
 modules/core/include/opencv2/core/opengl.hpp |   2 +-
 modules/highgui/CMakeLists.txt               |   3 +
 modules/highgui/cmake/detect_gtk.cmake       |  28 +--
 modules/highgui/src/window_gtk.cpp           | 110 ++++++++++--
 samples/opengl/CMakeLists.txt                |  10 ++
 samples/opengl/opengl3_2.cpp                 | 169 +++++++++++++++++++
 8 files changed, 301 insertions(+), 28 deletions(-)
 create mode 100644 samples/opengl/opengl3_2.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c196d0f2be..f00ac637d5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1468,9 +1468,12 @@ if(WITH_GTK OR HAVE_GTK)
   else()
     status("    GTK+:" "NO")
   endif()
+
   if(HAVE_GTK)
     status(  "      GThread :" HAVE_GTHREAD THEN "YES (ver ${GTHREAD_VERSION})" ELSE NO)
-    status(  "      GtkGlExt:" HAVE_GTKGLEXT THEN "YES (ver ${GTKGLEXT_VERSION})" ELSE NO)
+    if(NOT HAVE_GTK3)
+      status(  "    GtkGlExt:" HAVE_GTKGLEXT THEN "YES (ver ${GTKGLEXT_VERSION})" ELSE NO)
+    endif()
   endif()
 endif()
 
diff --git a/cmake/OpenCVFindLibsGUI.cmake b/cmake/OpenCVFindLibsGUI.cmake
index fca3e2c52d..735aae3475 100644
--- a/cmake/OpenCVFindLibsGUI.cmake
+++ b/cmake/OpenCVFindLibsGUI.cmake
@@ -63,7 +63,7 @@ endif()
 ocv_update(OpenGL_GL_PREFERENCE LEGACY)
 ocv_clear_vars(HAVE_OPENGL HAVE_QT_OPENGL)
 if(WITH_OPENGL)
-  if(WITH_WIN32UI OR (HAVE_QT AND QT_QTOPENGL_FOUND) OR HAVE_GTKGLEXT)
+  if(WITH_WIN32UI OR (HAVE_QT AND QT_QTOPENGL_FOUND) OR HAVE_GTK3 OR (HAVE_GTK AND NOT HAVE_GTK3 AND HAVE_GTKGLEXT))
     find_package (OpenGL QUIET)
     if(OPENGL_FOUND)
       set(HAVE_OPENGL TRUE)
diff --git a/modules/core/include/opencv2/core/opengl.hpp b/modules/core/include/opencv2/core/opengl.hpp
index fceb85bd06..5d19f81f85 100644
--- a/modules/core/include/opencv2/core/opengl.hpp
+++ b/modules/core/include/opencv2/core/opengl.hpp
@@ -57,7 +57,7 @@ This section describes OpenGL interoperability.
 
 To enable OpenGL support, configure OpenCV using CMake with WITH_OPENGL=ON . Currently OpenGL is
 supported only with WIN32, GTK and Qt backends on Windows and Linux (MacOS and Android are not
-supported). For GTK backend gtkglext-1.0 library is required.
+supported). For GTK-2.0 backend gtkglext-1.0 library is required.
 
 To use OpenGL functionality you should first create OpenGL context (window or frame buffer). You can
 do this with namedWindow function or with other OpenGL toolkit (GLUT, for example).
diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt
index 0108626dfd..c1d42444c5 100644
--- a/modules/highgui/CMakeLists.txt
+++ b/modules/highgui/CMakeLists.txt
@@ -218,6 +218,9 @@ if(TARGET ocv.3rdparty.gtk3 OR TARGET ocv.3rdparty.gtk2)
   )
     if(__gtk_dependency STREQUAL "ocv.3rdparty.gtk3")
       set(OPENCV_HIGHGUI_BUILTIN_BACKEND "GTK3")
+      if(OPENGL_LIBRARIES)
+        list(APPEND HIGHGUI_LIBRARIES "${OPENGL_LIBRARIES}")
+      endif()
     elseif(__gtk_dependency STREQUAL "ocv.3rdparty.gtk2")
       set(OPENCV_HIGHGUI_BUILTIN_BACKEND "GTK2")
     else()
diff --git a/modules/highgui/cmake/detect_gtk.cmake b/modules/highgui/cmake/detect_gtk.cmake
index b7f53d1a9b..fdca580aa9 100644
--- a/modules/highgui/cmake/detect_gtk.cmake
+++ b/modules/highgui/cmake/detect_gtk.cmake
@@ -26,25 +26,27 @@ if(WITH_GTK)
   else()
     ocv_add_external_target(gthread "${GTHREAD_INCLUDE_DIRS}" "${GTHREAD_LIBRARIES}" "HAVE_GTHREAD")
   endif()
-  if((WITH_OPENGL OR HAVE_OPENGL) AND HAVE_GTK2)
-    ocv_check_modules(GTKGLEXT gtkglext-1.0)
-    if(HAVE_GTKGLEXT)
-      # HACK for https://github.com/opencv/opencv/issues/20850
-      # pkg-config reports some include directories that do not exist. Just filter them out.
-      set(GTKGLEXT_INCLUDE_DIRS_EXISTS "")
-      foreach(p ${GTKGLEXT_INCLUDE_DIRS})
-        if (EXISTS "${p}")
-          list(APPEND GTKGLEXT_INCLUDE_DIRS_EXISTS "${p}")
-        endif()
-      endforeach()
-      ocv_add_external_target(gtkglext "${GTKGLEXT_INCLUDE_DIRS_EXISTS}" "${GTKGLEXT_LIBRARIES}" "HAVE_GTKGLEXT")
+  if((WITH_OPENGL OR HAVE_OPENGL) AND (HAVE_GTK2 OR HAVE_GTK3))
+    if(HAVE_GTK2)
+      ocv_check_modules(GTKGLEXT gtkglext-1.0)
+      if(HAVE_GTKGLEXT)
+        # HACK for https://github.com/opencv/opencv/issues/20850
+        # pkg-config reports some include directories that do not exist. Just filter them out.
+        set(GTKGLEXT_INCLUDE_DIRS_EXISTS "")
+        foreach(p ${GTKGLEXT_INCLUDE_DIRS})
+          if (EXISTS "${p}")
+            list(APPEND GTKGLEXT_INCLUDE_DIRS_EXISTS "${p}")
+          endif()
+        endforeach()
+        ocv_add_external_target(gtkglext "${GTKGLEXT_INCLUDE_DIRS}" "${GTKGLEXT_LIBRARIES}" "HAVE_GTKGLEXT")
+      endif()
     endif()
   endif()
 elseif(HAVE_GTK)
   ocv_add_external_target(gtk "${GTK_INCLUDE_DIRS}" "${GTK_LIBRARIES}" "${GTK_DEFINES};HAVE_GTK")
 endif()
 
-if(WITH_OPENGL AND HAVE_GTKGLEXT)
+if(WITH_OPENGL)
   find_package(OpenGL QUIET)
   if(OPENGL_FOUND)
     set(HAVE_OPENGL TRUE)
diff --git a/modules/highgui/src/window_gtk.cpp b/modules/highgui/src/window_gtk.cpp
index 88421be8b8..aa4c8ea634 100644
--- a/modules/highgui/src/window_gtk.cpp
+++ b/modules/highgui/src/window_gtk.cpp
@@ -46,10 +46,7 @@
 
 #include <gtk/gtk.h>
 
-#if (GTK_MAJOR_VERSION == 3) && defined(HAVE_OPENGL)
-  #undef HAVE_OPENGL  // no support with GTK3
-#endif
-#if defined(HAVE_OPENGL) && !defined(HAVE_GTKGLEXT)
+#if (GTK_MAJOR_VERSION == 2) && defined(HAVE_OPENGL) && !defined(HAVE_GTKGLEXT)
   #undef HAVE_OPENGL  // gtkglext is required
 #endif
 
@@ -68,9 +65,13 @@
 #endif
 
 #ifdef HAVE_OPENGL
+  #ifdef GTK_VERSION3
+    #include <gtk/gtkglarea.h>
+  #else
     #include <gtk/gtkgl.h>
-    #include <GL/gl.h>
     #include <GL/glu.h>
+  #endif
+  #include <GL/gl.h>
 #endif
 
 #include <opencv2/core/utils/logger.hpp>
@@ -570,7 +571,7 @@ struct CvWindow : CvUIBase
         last_key(0), flags(0), status(0),
         on_mouse(NULL), on_mouse_param(NULL)
 #ifdef HAVE_OPENGL
-        ,useGl(false), glDrawCallback(NULL), glDrawData(NULL)
+        ,useGl(false), glDrawCallback(NULL), glDrawData(NULL), glArea(NULL)
 #endif
     {
         CV_LOG_INFO(NULL, "OpenCV/UI: creating GTK window: " << window_name);
@@ -597,6 +598,7 @@ struct CvWindow : CvUIBase
 
     CvOpenGlDrawCallback glDrawCallback;
     void* glDrawData;
+    GtkWidget* glArea;
 #endif
 };
 
@@ -640,7 +642,7 @@ CV_IMPL int cvInitSystem( int argc, char** argv )
 
         setlocale(LC_NUMERIC,"C");
 
-        #ifdef HAVE_OPENGL
+        #if defined(HAVE_OPENGL) && not defined(GTK_VERSION3) // GTK3+ uses GtkGLArea so no need to check for GtkGLExt
             if (!gtk_gl_init_check(&argc, &argv))
             {
                 hasError = true;
@@ -907,11 +909,42 @@ double cvGetOpenGlProp_GTK(const char* name)
 // OpenGL support
 
 #ifdef HAVE_OPENGL
-
 namespace
 {
+
+#ifdef GTK_VERSION3
+
+    void glRealizeCallback(GtkGLArea* area, gpointer user_data) {
+        CV_UNUSED(user_data);
+        gtk_gl_area_make_current(area);
+        if (gtk_gl_area_get_error(area) != NULL)
+            CV_Error(cv::Error::OpenGlApiCallError, "OpenGL context is not initialized");
+    }
+
+    gboolean glRenderCallback(GtkGLArea* area, GdkGLContext* context, gpointer user_data) {
+        CV_UNUSED(context);
+        CvWindow* window = (CvWindow*)user_data;
+        gtk_gl_area_make_current(area);
+        if (gtk_gl_area_get_error(area) != NULL) {
+            CV_Error(cv::Error::OpenGlApiCallError, "OpenGL context is not initialized");
+            return FALSE;
+        }
+        if(window->glDrawCallback) {
+            window->glDrawCallback(window->glDrawData);
+        }
+//        gtk_gl_area_queue_render(area);
+        return TRUE;
+    }
+
+#endif
+
     void createGlContext(CvWindow* window)
     {
+        #ifdef GTK_VERSION3
+        g_signal_connect(window->glArea, "realize", G_CALLBACK(glRealizeCallback), window);
+        g_signal_connect(window->glArea, "render", G_CALLBACK(glRenderCallback), window);
+        #else
+
         GdkGLConfig* glconfig;
 
         // Try double-buffered visual
@@ -923,11 +956,24 @@ namespace
         if (!gtk_widget_set_gl_capability(window->widget, glconfig, NULL, TRUE, GDK_GL_RGBA_TYPE))
             CV_Error( cv::Error::OpenGlApiCallError, "Can't Create A GL Device Context" );
 
+        #endif
+
         window->useGl = true;
     }
 
     void drawGl(CvWindow* window)
     {
+        #ifdef GTK_VERSION3
+
+        GtkGLArea* gtkGlArea = GTK_GL_AREA(window->glArea);
+        if (gtk_gl_area_get_error(gtkGlArea) != NULL)
+            CV_Error(cv::Error::OpenGlApiCallError, "Can't Activate The GL Rendering Context");
+
+        if (window->glDrawCallback)
+            window->glDrawCallback(window->glDrawData);
+
+        #else
+
         GdkGLContext* glcontext = gtk_widget_get_gl_context(window->widget);
         GdkGLDrawable* gldrawable = gtk_widget_get_gl_drawable(window->widget);
 
@@ -947,6 +993,8 @@ namespace
             glFlush();
 
         gdk_gl_drawable_gl_end(gldrawable);
+
+        #endif
     }
 }
 
@@ -1041,12 +1089,27 @@ static std::shared_ptr<CvWindow> namedWindow_(const std::string& name, int flags
 
     window->frame = gtk_window_new( GTK_WINDOW_TOPLEVEL );
 
-    window->paned = gtk_vbox_new( FALSE, 0 );
     window->widget = cvImageWidgetNew( flags );
+
+#if defined(HAVE_OPENGL) && defined(GTK_VERSION3)
+    if (flags & cv::WINDOW_OPENGL) {
+        window->glArea = gtk_gl_area_new();
+        gtk_container_add(GTK_CONTAINER(window->frame), window->glArea);
+        gtk_widget_show(window->glArea);
+    } else {
+        window->paned = gtk_vbox_new( FALSE, 0 );
+        gtk_box_pack_end( GTK_BOX(window->paned), window->widget, TRUE, TRUE, 0 );
+        gtk_widget_show( window->widget );
+        gtk_container_add( GTK_CONTAINER(window->frame), window->paned );
+        gtk_widget_show( window->paned );
+    }
+#else
+    window->paned = gtk_vbox_new( FALSE, 0 );
     gtk_box_pack_end( GTK_BOX(window->paned), window->widget, TRUE, TRUE, 0 );
     gtk_widget_show( window->widget );
     gtk_container_add( GTK_CONTAINER(window->frame), window->paned );
     gtk_widget_show( window->paned );
+#endif
 
 #ifndef HAVE_OPENGL
     if (flags & cv::WINDOW_OPENGL)
@@ -1122,9 +1185,6 @@ static std::shared_ptr<CvWindow> namedWindow_(const std::string& name, int flags
 
 CV_IMPL void cvSetOpenGlContext(const char* name)
 {
-    GdkGLContext* glcontext;
-    GdkGLDrawable* gldrawable;
-
     CV_Assert(name && "NULL name string");
 
     CV_LOCK_MUTEX();
@@ -1136,11 +1196,24 @@ CV_IMPL void cvSetOpenGlContext(const char* name)
     if (!window->useGl)
         CV_Error( cv::Error::OpenGlNotSupported, "Window doesn't support OpenGL" );
 
+#ifdef GTK_VERSION3
+
+    if(gtk_gl_area_get_error(GTK_GL_AREA(window->glArea)) != NULL)
+        CV_Error( cv::Error::OpenGlApiCallError, "Can't Activate The GL Rendering Context");
+
+#else
+
+    GdkGLContext* glcontext;
+    GdkGLDrawable* gldrawable;
+
     glcontext = gtk_widget_get_gl_context(window->widget);
     gldrawable = gtk_widget_get_gl_drawable(window->widget);
 
     if (!gdk_gl_drawable_make_current(gldrawable, glcontext))
         CV_Error( cv::Error::OpenGlApiCallError, "Can't Activate The GL Rendering Context" );
+
+#endif
+
 }
 
 CV_IMPL void cvUpdateWindow(const char* name)
@@ -1154,7 +1227,20 @@ CV_IMPL void cvUpdateWindow(const char* name)
         return;
 
     // window does not refresh without this
+#ifdef GTK_VERSION3
+
+    if ( GTK_IS_GL_AREA(window->glArea) ){
+        gtk_gl_area_queue_render(GTK_GL_AREA(window->glArea));
+    } else {
+        gtk_widget_queue_draw( GTK_WIDGET(window->widget));
+    }
+
+#else
+
     gtk_widget_queue_draw( GTK_WIDGET(window->widget) );
+
+#endif
+
 }
 
 CV_IMPL void cvSetOpenGlDrawCallback(const char* name, CvOpenGlDrawCallback callback, void* userdata)
diff --git a/samples/opengl/CMakeLists.txt b/samples/opengl/CMakeLists.txt
index 158151c300..c31b141e21 100644
--- a/samples/opengl/CMakeLists.txt
+++ b/samples/opengl/CMakeLists.txt
@@ -6,6 +6,9 @@ if(UNIX)
   find_package(X11 QUIET)
 endif()
 
+find_package(PkgConfig QUIET)
+pkg_search_module(EPOXY QUIET epoxy)
+
 SET(OPENCV_OPENGL_SAMPLES_REQUIRED_DEPS
   opencv_core
   opencv_imgproc
@@ -21,6 +24,9 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
   if(NOT X11_FOUND)
     ocv_list_filterout(all_samples "opengl_interop")
   endif()
+  if(NOT EPOXY_FOUND)
+    ocv_list_filterout(all_samples "opengl3_2")
+  endif()
   foreach(sample_filename ${all_samples})
     ocv_define_sample(tgt ${sample_filename} opengl)
     ocv_target_link_libraries(${tgt} PRIVATE "${OPENGL_LIBRARIES}" "${OPENCV_OPENGL_SAMPLES_REQUIRED_DEPS}")
@@ -28,6 +34,10 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
       ocv_target_link_libraries(${tgt} PRIVATE ${X11_LIBRARIES})
       ocv_target_include_directories(${tgt} ${X11_INCLUDE_DIR})
     endif()
+    if(sample_filename STREQUAL "opengl3_2.cpp")
+      ocv_target_link_libraries(${tgt} PRIVATE ${EPOXY_LIBRARIES})
+      ocv_target_include_directories(${tgt} PRIVATE ${EPOXY_INCLUDE_DIRS})
+    endif()
   endforeach()
 endif()
 
diff --git a/samples/opengl/opengl3_2.cpp b/samples/opengl/opengl3_2.cpp
new file mode 100644
index 0000000000..1a8e9a804c
--- /dev/null
+++ b/samples/opengl/opengl3_2.cpp
@@ -0,0 +1,169 @@
+#include <iostream>
+
+#include <epoxy/gl.h>
+
+#ifdef _WIN32
+    #define WIN32_LEAN_AND_MEAN 1
+    #define NOMINMAX 1
+    #include <windows.h>
+#endif
+
+#if defined(__APPLE__)
+    #include <OpenGL/gl.h>
+    #include <OpenGL/glu.h>
+#else
+    #include <GL/gl.h>
+    #include <GL/glu.h>
+#endif
+
+#include "opencv2/core.hpp"
+#include "opencv2/core/opengl.hpp"
+#include "opencv2/core/cuda.hpp"
+#include "opencv2/highgui.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::cuda;
+
+const int win_width = 800;
+const int win_height = 640;
+
+struct DrawData
+{
+    GLuint vao, vbo, program, textureID;
+};
+
+static cv::Mat rot(float angle)
+{
+    cv::Mat R_y = (cv::Mat_<float>(4,4) <<
+        cos(angle), 0, sin(angle), 0,
+        0, 1, 0, 0,
+        -sin(angle), 0, cos(angle), 0,
+        0, 0, 0, 1);
+
+    return R_y;
+}
+
+static GLuint create_shader(const char* source, GLenum type) {
+    GLuint shader = glCreateShader(type);
+    glShaderSource(shader, 1, &source, NULL);
+    glCompileShader(shader);
+    return shader;
+}
+
+static void draw(void* userdata) {
+    DrawData* data = static_cast<DrawData*>(userdata);
+    static float angle = 0.0f;
+    angle += 1.f;
+
+    cv::Mat trans = rot(CV_PI * angle / 360.f);
+
+    glClearColor(0.0, 0.0, 0.0, 1.0);
+    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+
+    glUseProgram(data->program);
+    glUniformMatrix4fv(glGetUniformLocation(data->program, "transform"), 1, GL_FALSE, trans.ptr<float>());
+    glBindTexture(GL_TEXTURE_2D, data->textureID);
+    glBindVertexArray(data->vao);
+    glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+    glBindVertexArray(0);
+}
+
+int main(int argc, char* argv[])
+{
+    string filename;
+    if (argc < 2)
+    {
+        cout << "Usage: " << argv[0] << " image" << endl;
+        filename = "baboon.jpg";
+    }
+    else
+        filename = argv[1];
+
+    Mat img = imread(samples::findFile(filename));
+    if (img.empty())
+    {
+        cerr << "Can't open image " << filename << endl;
+        return -1;
+    }
+    flip(img, img, 0);
+
+    namedWindow("OpenGL", WINDOW_OPENGL);
+    resizeWindow("OpenGL", win_width, win_height);
+
+    DrawData data;
+
+    glEnable(GL_DEPTH_TEST);
+    const char *vertex_shader_source =
+            "#version 330 core\n"
+            "layout (location = 0) in vec3 position;\n"
+            "layout (location = 1) in vec2 texCoord;\n"
+            "out vec2 TexCoord;\n"
+            "uniform mat4 transform;\n"
+            "void main() {\n"
+            "   gl_Position = transform * vec4(position, 1.0);\n"
+            "   TexCoord = texCoord;\n"
+            "}\n";
+    const char *fragment_shader_source =
+            "#version 330 core\n"
+            "in vec2 TexCoord;\n"
+            "out vec4 color;\n"
+            "uniform sampler2D ourTexture;\n"
+            "void main() {\n"
+            "   color = texture(ourTexture, TexCoord);\n"
+            "}\n";
+    data.program = glCreateProgram();
+    GLuint vertex_shader = create_shader(vertex_shader_source, GL_VERTEX_SHADER);
+    GLuint fragment_shader = create_shader(fragment_shader_source, GL_FRAGMENT_SHADER);
+    glAttachShader(data.program, vertex_shader);
+    glAttachShader(data.program, fragment_shader);
+    glLinkProgram(data.program);
+    glUseProgram(data.program);
+
+    GLfloat vertices[] = {
+            // Positions        // Texture Coords
+            1.0f,  1.0f, 0.0f,  1.0f, 1.0f,   // Top Right
+            1.0f, -1.0f, 0.0f,  1.0f, 0.0f,   // Bottom Right
+            -1.0f,  1.0f, 0.0f,  0.0f, 1.0f,   // Top Left
+            -1.0f, -1.0f, 0.0f,  0.0f, 0.0f    // Bottom Left
+    };
+
+    glGenVertexArrays(1, &data.vao);
+    glGenBuffers(1, &data.vbo);
+    glBindVertexArray(data.vao);
+    glBindBuffer(GL_ARRAY_BUFFER, data.vbo);
+    glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
+
+    // Position attribute
+    glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (GLvoid*)0);
+    glEnableVertexAttribArray(0);
+    // Texture Coord attribute
+    glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (GLvoid*)(3 * sizeof(GLfloat)));
+    glEnableVertexAttribArray(1);
+    glBindVertexArray(0); // Unbind VAO
+
+
+//        Image to texture
+    glGenTextures(1, &data.textureID);
+    glBindTexture(GL_TEXTURE_2D, data.textureID);
+    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, img.cols, img.rows, 0, GL_BGR, GL_UNSIGNED_BYTE, img.data);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+    glBindTexture(GL_TEXTURE_2D, 0);
+
+    setOpenGlDrawCallback("OpenGL", draw, &data);
+
+    for (;;)
+    {
+        updateWindow("OpenGL");
+        char key = (char)waitKey(40);
+        if (key == 27)
+            break;
+    }
+
+    setOpenGlDrawCallback("OpenGL", 0, 0);
+    destroyAllWindows();
+    return 0;
+}