Merge branch 4.x

4 months ago · bd3b734731
parent ddfb5cf776 5741f22f56
commit bd3b734731
24 changed files with 311 additions and 62 deletions
--- a/modules/cudaarithm/include/opencv2/cudaarithm.hpp
+++ b/modules/cudaarithm/include/opencv2/cudaarithm.hpp
@ -75,61 +75,137 @@ namespace cv { namespace cuda {
@param src1 First source matrix or scalar.
@param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 .
@param dst Destination matrix that has the same size and number of channels as the input array(s).
-The depth is defined by dtype or src1 depth.
+The depth is defined by dtype or @p src1 depth.
@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
 destination array to be changed. The mask can be used only with single channel images.
@param dtype Optional depth of the output array.
@param stream Stream for the asynchronous version.

-@sa add
+@warning In python both @p src1 and @p src2 have to be matrices, see @ref addWithScalar for scalar overload.
+
+@sa cv::add, addWithScalar
 */
 CV_EXPORTS_W void add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null());

+/** @brief Computes a matrix-scalar sum.
+
+@param src1 First source matrix.
+@param src2 Second source scalar.
+@param dst Destination matrix that has the same size and number of channels as the input array.
+The depth is defined by dtype or @p src1 depth.
+@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
+destination array to be changed. The mask can be used only with single channel images.
+@param dtype Optional depth of the output array.
+@param stream Stream for the asynchronous version.
+
+@sa add
+ */
+CV_EXPORTS_W void inline addWithScalar(InputArray src1, Scalar src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null()) {
+    add(src1, src2, dst, mask, dtype, stream);
+}
+
 /** @brief Computes a matrix-matrix or matrix-scalar difference.

@param src1 First source matrix or scalar.
-@param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 .
+@param src2 Second source matrix or scalar. Matrix should have the same size and type as @p src1.
@param dst Destination matrix that has the same size and number of channels as the input array(s).
-The depth is defined by dtype or src1 depth.
+The depth is defined by dtype or @p src1 depth.
@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
 destination array to be changed. The mask can be used only with single channel images.
@param dtype Optional depth of the output array.
@param stream Stream for the asynchronous version.

-@sa subtract
+@warning In python both @p src1 and @p src2 have to be matrices, see @ref subtractWithScalar for scalar overload.
+
+@sa cv::subtract, subtractWithScalar
 */
 CV_EXPORTS_W void subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null());

+/** @brief Computes matrix-scalar difference.
+
+@param src1 First source matrix.
+@param src2 Second source scalar.
+@param dst Destination matrix that has the same size and number of channels as the input array.
+The depth is defined by dtype or @p src1 depth.
+@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
+destination array to be changed. The mask can be used only with single channel images.
+@param dtype Optional depth of the output array.
+@param stream Stream for the asynchronous version.
+
+@sa cv::subtract
+ */
+CV_EXPORTS_W void inline subtractWithScalar(InputArray src1, Scalar src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null()) {
+    subtract(src1, src2, dst, mask, dtype, stream);
+}
+
 /** @brief Computes a matrix-matrix or matrix-scalar per-element product.

@param src1 First source matrix or scalar.
@param src2 Second source matrix or scalar.
@param dst Destination matrix that has the same size and number of channels as the input array(s).
-The depth is defined by dtype or src1 depth.
+The depth is defined by dtype or @p src1 depth.
@param scale Optional scale factor.
@param dtype Optional depth of the output array.
@param stream Stream for the asynchronous version.

-@sa multiply
+@warning In python both @p src1 and @p src2 have to be matrices, see @ref multiplyWithScalar for scalar overload.
+
+@sa cv::multiply, multiplyWithScalar
 */
 CV_EXPORTS_W void multiply(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null());

+/** @brief Computes a matrix-scalar per-element product.
+
+@param src1 First source matrix.
+@param src2 Second source scalar.
+@param dst Destination matrix that has the same size and number of channels as the input array.
+The depth is defined by dtype or @p src1 depth.
+@param scale Optional scale factor.
+@param dtype Optional depth of the output array.
+@param stream Stream for the asynchronous version.
+
+@sa multiply
+ */
+CV_EXPORTS_W void inline multiplyWithScalar(InputArray src1, Scalar src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()) {
+    multiply(src1, src2, dst, scale, dtype, stream);
+}
+
 /** @brief Computes a matrix-matrix or matrix-scalar division.

-@param src1 First source matrix or a scalar.
+@param src1 First source matrix or scalar.
@param src2 Second source matrix or scalar.
@param dst Destination matrix that has the same size and number of channels as the input array(s).
-The depth is defined by dtype or src1 depth.
+The depth is defined by dtype or @p src1 depth.
@param scale Optional scale factor.
@param dtype Optional depth of the output array.
@param stream Stream for the asynchronous version.

 This function, in contrast to divide, uses a round-down rounding mode.

-@sa divide
+@warning In python both @p src1 and @p src2 have to be matrices, see @ref divideWithScalar for scalar overload.
+
+@sa cv::divide, divideWithScalar
 */
 CV_EXPORTS_W void divide(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null());

+/** @brief Computes a matrix-scalar division.
+
+@param src1 First source matrix.
+@param src2 Second source scalar.
+@param dst Destination matrix that has the same size and number of channels as the input array.
+The depth is defined by dtype or @p src1 depth.
+@param scale Optional scale factor.
+@param dtype Optional depth of the output array.
+@param stream Stream for the asynchronous version.
+
+This function, in contrast to divide, uses a round-down rounding mode.
+
+@sa divide
+ */
+CV_EXPORTS_W void inline divideWithScalar(InputArray src1, Scalar src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()) {
+    divide(src1, src2, dst, scale, dtype, stream);
+}
+
 /** @brief Computes per-element absolute difference of two matrices (or of a matrix and scalar).

@param src1 First source matrix or scalar.
@ -137,10 +213,25 @@ CV_EXPORTS_W void divide(InputArray src1, InputArray src2, OutputArray dst, doub
@param dst Destination matrix that has the same size and type as the input array(s).
@param stream Stream for the asynchronous version.

-@sa absdiff
+@warning In python both @p src1 and @p src2 have to be matrices, see @ref absdiffWithScalar for scalar overload.
+
+@sa cv::absdiff, absdiffWithScalar
 */
 CV_EXPORTS_W void absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null());

+/** @brief Computes per-element absolute difference of a matrix and scalar.
+
+@param src1 First source matrix.
+@param src2 Second source scalar.
+@param dst Destination matrix that has the same size and type as the input array.
+@param stream Stream for the asynchronous version.
+
+@sa absdiff
+ */
+CV_EXPORTS_W void inline absdiffWithScalar(InputArray src1, Scalar src2, OutputArray dst, Stream& stream = Stream::Null()) {
+    absdiff(src1, src2, dst, stream);
+}
+
 /** @brief Computes an absolute value of each matrix element.

@param src Source matrix.
@ -218,10 +309,32 @@ CV_EXPORTS_W void pow(InputArray src, double power, OutputArray dst, Stream& str
 -   **CMP_NE:** a(.) != b(.)
@param stream Stream for the asynchronous version.

-@sa compare
+@warning In python both @p src1 and @p src2 have to be matrices, see @ref compareWithScalar for scalar overload.
+
+@sa cv::compare, compareWithScalar
 */
 CV_EXPORTS_W void compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop, Stream& stream = Stream::Null());

+/** @brief Compares elements of a matrix and scalar.
+
+@param src1 First source matrix.
+@param src2 Second source scalar.
+@param dst Destination matrix that has the same size as the input array and type \ref CV_8U.
+@param cmpop Flag specifying the relation between the elements to be checked:
+-   **CMP_EQ:** a(.) == b(.)
+-   **CMP_GT:** a(.) \> b(.)
+-   **CMP_GE:** a(.) \>= b(.)
+-   **CMP_LT:** a(.) \< b(.)
+-   **CMP_LE:** a(.) \<= b(.)
+-   **CMP_NE:** a(.) != b(.)
+@param stream Stream for the asynchronous version.
+
+@sa compare
+ */
+CV_EXPORTS_W void inline compareWithScalar(InputArray src1, Scalar src2, OutputArray dst, int cmpop, Stream& stream = Stream::Null()) {
+    compare(src1, src2, dst, cmpop, stream);
+}
+
 /** @brief Performs a per-element bitwise inversion.

@param src Source matrix.
@ -240,9 +353,28 @@ CV_EXPORTS_W void bitwise_not(InputArray src, OutputArray dst, InputArray mask =
@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
 destination array to be changed. The mask can be used only with single channel images.
@param stream Stream for the asynchronous version.
+
+@warning In python both @p src1 and @p src2 have to be matrices, see @ref bitwise_or_with_scalar for scalar overload.
+
+@sa cv::bitwise_or, bitwise_or_with_scalar
 */
 CV_EXPORTS_W void bitwise_or(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null());

+/** @brief Performs a per-element bitwise disjunction of a matrix and scalar.
+
+@param src1 First source matrix.
+@param src2 Second source scalar.
+@param dst Destination matrix that has the same size and type as the input array.
+@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
+destination array to be changed. The mask can be used only with single channel images.
+@param stream Stream for the asynchronous version.
+
+@sa bitwise_or
+ */
+CV_EXPORTS_W void inline bitwise_or_with_scalar(InputArray src1, Scalar src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()) {
+    bitwise_or(src1, src2, dst, mask, stream);
+}
+
 /** @brief Performs a per-element bitwise conjunction of two matrices (or of matrix and scalar).

@param src1 First source matrix or scalar.
@ -251,20 +383,58 @@ CV_EXPORTS_W void bitwise_or(InputArray src1, InputArray src2, OutputArray dst,
@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
 destination array to be changed. The mask can be used only with single channel images.
@param stream Stream for the asynchronous version.
+
+@warning In python both @p src1 and @p src2 have to be matrices, see @ref bitwise_and_with_scalar for scalar overload.
+
+@sa bitwise_and_with_scalar
 */
 CV_EXPORTS_W void bitwise_and(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null());

+/** @brief Performs a per-element bitwise conjunction of a matrix and a scalar.
+
+@param src1 First source matrix.
+@param src2 Second source scalar.
+@param dst Destination matrix that has the same size and type as the input array.
+@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
+destination array to be changed. The mask can be used only with single channel images.
+@param stream Stream for the asynchronous version.
+
+@sa bitwise_and
+ */
+CV_EXPORTS_W void inline bitwise_and_with_scalar(InputArray src1, Scalar src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()) {
+    bitwise_and(src1, src2, dst, mask, stream);
+}
+
 /** @brief Performs a per-element bitwise exclusive or operation of two matrices (or of matrix and scalar).

@param src1 First source matrix or scalar.
@param src2 Second source matrix or scalar.
-@param dst Destination matrix that has the same size and type as the input array(s).
+@param dst Destination matrix that has the same size and type as the input array.
@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
 destination array to be changed. The mask can be used only with single channel images.
@param stream Stream for the asynchronous version.
+
+@warning In python both @p src1 and @p src2 have to be matrices, see @ref bitwise_xor_with_scalar for scalar overload.
+
+@sa cv::bitwise_xor, bitwise_xor_with_scalar
 */
 CV_EXPORTS_W void bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null());

+/** @brief Performs a per-element bitwise exclusive or operation of a matrix and a scalar.
+
+@param src1 First source matrix.
+@param src2 Second source scalar.
+@param dst Destination matrix that has the same size and type as the input array(s).
+@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
+destination array to be changed. The mask can be used only with single channel images.
+@param stream Stream for the asynchronous version.
+
+@sa bitwise_xor
+ */
+CV_EXPORTS_W void inline bitwise_xor_with_scalar(InputArray src1, Scalar src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()) {
+    bitwise_xor(src1, src2, dst, mask, stream);
+}
+
 /** @brief Performs pixel by pixel right shift of an image by a constant value.

@param src Source matrix. Supports 1, 3 and 4 channels images with integers elements.
@ -299,10 +469,25 @@ CV_WRAP inline void lshift(InputArray src, Scalar val, OutputArray dst, Stream&
@param dst Destination matrix that has the same size and type as the input array(s).
@param stream Stream for the asynchronous version.

-@sa min
+@warning In python both @p src1 and @p src2 have to be matrices, see @ref minWithScalar for scalar overload.
+
+@sa cv::min, minWithScalar
 */
 CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null());

+/** @brief Computes the per-element minimum or a matrix and a scalar.
+
+@param src1 First source matrix.
+@param src2 Second source scalar.
+@param dst Destination matrix that has the same size and type as the input array.
+@param stream Stream for the asynchronous version.
+
+@sa min
+ */
+CV_EXPORTS_W void inline minWithScalar(InputArray src1, Scalar src2, OutputArray dst, Stream& stream = Stream::Null()) {
+    min(src1, src2, dst, stream);
+}
+
 /** @brief Computes the per-element maximum of two matrices (or a matrix and a scalar).

@param src1 First source matrix or scalar.
@ -310,10 +495,25 @@ CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst, Stream&
@param dst Destination matrix that has the same size and type as the input array(s).
@param stream Stream for the asynchronous version.

-@sa max
+@warning In python both @p src1 and @p src2 have to be matrices, see @ref maxWithScalar for scalar overload.
+
+@sa cv::max, maxWithScalar
 */
 CV_EXPORTS_W void max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null());

+/** @brief Computes the per-element maximum of a matrix and a scalar.
+
+@param src1 First source matrix.
+@param src2 Second source scalar.
+@param dst Destination matrix that has the same size and type as the input array.
+@param stream Stream for the asynchronous version.
+
+@sa max
+ */
+CV_EXPORTS_W void inline maxWithScalar(InputArray src1, Scalar src2, OutputArray dst, Stream& stream = Stream::Null()) {
+    max(src1, src2, dst, stream);
+}
+
 /** @brief Computes the weighted sum of two arrays.

@param src1 First source array.
--- a/modules/cudaarithm/misc/python/test/test_cudaarithm.py
+++ b/modules/cudaarithm/misc/python/test/test_cudaarithm.py
@ -38,6 +38,7 @@ class cudaarithm_test(NewOpenCVTests):
    def test_arithmetic(self):
        npMat1 = np.random.random((128, 128, 3)) - 0.5
        npMat2 = np.random.random((128, 128, 3)) - 0.5
+        scalar = np.random.random()

        cuMat1 = cv.cuda_GpuMat()
        cuMat2 = cv.cuda_GpuMat()
@ -48,36 +49,54 @@ class cudaarithm_test(NewOpenCVTests):
        self.assertTrue(np.allclose(cv.cuda.add(cuMat1, cuMat2).download(),
                                         cv.add(npMat1, npMat2)))

+        self.assertTrue(np.allclose(cv.cuda.addWithScalar(cuMat1, [scalar]*3).download(),
+                                    cv.add(npMat1, scalar)))
+
        cv.cuda.add(cuMat1, cuMat2, cuMatDst)
        self.assertTrue(np.allclose(cuMatDst.download(),cv.add(npMat1, npMat2)))

        self.assertTrue(np.allclose(cv.cuda.subtract(cuMat1, cuMat2).download(),
                                         cv.subtract(npMat1, npMat2)))

+        self.assertTrue(np.allclose(cv.cuda.subtractWithScalar(cuMat1, [scalar]*3).download(),
+                                         cv.subtract(npMat1, scalar)))
+
        cv.cuda.subtract(cuMat1, cuMat2, cuMatDst)
        self.assertTrue(np.allclose(cuMatDst.download(),cv.subtract(npMat1, npMat2)))

        self.assertTrue(np.allclose(cv.cuda.multiply(cuMat1, cuMat2).download(),
                                         cv.multiply(npMat1, npMat2)))

+        self.assertTrue(np.allclose(cv.cuda.multiplyWithScalar(cuMat1, [scalar]*3).download(),
+                                         cv.multiply(npMat1, scalar)))
+
        cv.cuda.multiply(cuMat1, cuMat2, cuMatDst)
        self.assertTrue(np.allclose(cuMatDst.download(),cv.multiply(npMat1, npMat2)))

        self.assertTrue(np.allclose(cv.cuda.divide(cuMat1, cuMat2).download(),
                                         cv.divide(npMat1, npMat2)))

+        self.assertTrue(np.allclose(cv.cuda.divideWithScalar(cuMat1, [scalar]*3).download(),
+                                         cv.divide(npMat1, scalar)))
+
        cv.cuda.divide(cuMat1, cuMat2, cuMatDst)
        self.assertTrue(np.allclose(cuMatDst.download(),cv.divide(npMat1, npMat2)))

        self.assertTrue(np.allclose(cv.cuda.absdiff(cuMat1, cuMat2).download(),
                                         cv.absdiff(npMat1, npMat2)))

+        self.assertTrue(np.allclose(cv.cuda.absdiffWithScalar(cuMat1, [scalar]*3).download(),
+                                         cv.absdiff(npMat1, scalar)))
+
        cv.cuda.absdiff(cuMat1, cuMat2, cuMatDst)
        self.assertTrue(np.allclose(cuMatDst.download(),cv.absdiff(npMat1, npMat2)))

        self.assertTrue(np.allclose(cv.cuda.compare(cuMat1, cuMat2, cv.CMP_GE).download(),
                                         cv.compare(npMat1, npMat2, cv.CMP_GE)))

+        self.assertTrue(np.allclose(cv.cuda.compareWithScalar(cuMat1, [scalar]*3, cv.CMP_GE).download(),
+                                         cv.compare(npMat1, scalar, cv.CMP_GE)))
+
        cuMatDst1 = cv.cuda_GpuMat(cuMat1.size(),cv.CV_8UC3)
        cv.cuda.compare(cuMat1, cuMat2, cv.CMP_GE, cuMatDst1)
        self.assertTrue(np.allclose(cuMatDst1.download(),cv.compare(npMat1, npMat2, cv.CMP_GE)))
@ -111,6 +130,7 @@ class cudaarithm_test(NewOpenCVTests):
    def test_logical(self):
        npMat1 = (np.random.random((128, 128)) * 255).astype(np.uint8)
        npMat2 = (np.random.random((128, 128)) * 255).astype(np.uint8)
+        scalar = np.random.random()

        cuMat1 = cv.cuda_GpuMat()
        cuMat2 = cv.cuda_GpuMat()
@ -121,18 +141,27 @@ class cudaarithm_test(NewOpenCVTests):
        self.assertTrue(np.allclose(cv.cuda.bitwise_or(cuMat1, cuMat2).download(),
                                         cv.bitwise_or(npMat1, npMat2)))

+        self.assertTrue(np.allclose(cv.cuda.bitwise_or_with_scalar(cuMat1, scalar).download(),
+                                         cv.bitwise_or(npMat1, scalar)))
+
        cv.cuda.bitwise_or(cuMat1, cuMat2, cuMatDst)
        self.assertTrue(np.allclose(cuMatDst.download(),cv.bitwise_or(npMat1, npMat2)))

        self.assertTrue(np.allclose(cv.cuda.bitwise_and(cuMat1, cuMat2).download(),
                                         cv.bitwise_and(npMat1, npMat2)))

+        self.assertTrue(np.allclose(cv.cuda.bitwise_and_with_scalar(cuMat1, scalar).download(),
+                                         cv.bitwise_and(npMat1, scalar)))
+
        cv.cuda.bitwise_and(cuMat1, cuMat2, cuMatDst)
        self.assertTrue(np.allclose(cuMatDst.download(),cv.bitwise_and(npMat1, npMat2)))

        self.assertTrue(np.allclose(cv.cuda.bitwise_xor(cuMat1, cuMat2).download(),
                                         cv.bitwise_xor(npMat1, npMat2)))

+        self.assertTrue(np.allclose(cv.cuda.bitwise_xor_with_scalar(cuMat1, scalar).download(),
+                                         cv.bitwise_xor(npMat1, scalar)))
+
        cv.cuda.bitwise_xor(cuMat1, cuMat2, cuMatDst)
        self.assertTrue(np.allclose(cuMatDst.download(),cv.bitwise_xor(npMat1, npMat2)))

@ -145,12 +174,18 @@ class cudaarithm_test(NewOpenCVTests):
        self.assertTrue(np.allclose(cv.cuda.min(cuMat1, cuMat2).download(),
                                         cv.min(npMat1, npMat2)))

+        self.assertTrue(np.allclose(cv.cuda.minWithScalar(cuMat1, scalar).download(),
+                                         cv.min(npMat1, scalar)))
+
        cv.cuda.min(cuMat1, cuMat2, cuMatDst)
        self.assertTrue(np.allclose(cuMatDst.download(),cv.min(npMat1, npMat2)))

        self.assertTrue(np.allclose(cv.cuda.max(cuMat1, cuMat2).download(),
                                         cv.max(npMat1, npMat2)))

+        self.assertTrue(np.allclose(cv.cuda.maxWithScalar(cuMat1, scalar).download(),
+                                         cv.max(npMat1, scalar)))
+
        cv.cuda.max(cuMat1, cuMat2, cuMatDst)
        self.assertTrue(np.allclose(cuMatDst.download(),cv.max(npMat1, npMat2)))

--- a/modules/cudaarithm/src/arithm.cpp
+++ b/modules/cudaarithm/src/arithm.cpp
@ -54,6 +54,8 @@ void cv::cuda::mulAndScaleSpectrums(InputArray, InputArray, OutputArray, int, fl

 void cv::cuda::dft(InputArray, OutputArray, Size, int, Stream&) { throw_no_cuda(); }

+Ptr<DFT> cv::cuda::createDFT(Size, int) { throw_no_cuda(); return Ptr<DFT>(); }
+
 Ptr<Convolution> cv::cuda::createConvolution(Size) { throw_no_cuda(); return Ptr<Convolution>(); }

 #else /* !defined (HAVE_CUDA) */
--- a/modules/cudaarithm/src/cuda/polar_cart.cu
+++ b/modules/cudaarithm/src/cuda/polar_cart.cu
@ -289,9 +289,9 @@ namespace
        const T scale = angleInDegrees ? static_cast<T>(CV_PI / 180.0) : static_cast<T>(1.0);

        if (mag.empty())
-            polarToCartImpl_<T, false> << <grid, block, 0, stream >> >(mag, angle, x, y, scale);
+            polarToCartImpl_<T, false> <<<grid, block, 0, stream >>>(mag, angle, x, y, scale);
        else
-            polarToCartImpl_<T, true> << <grid, block, 0, stream >> >(mag, angle, x, y, scale);
+            polarToCartImpl_<T, true> <<<grid, block, 0, stream >>>(mag, angle, x, y, scale);
    }

    template <typename T>
@ -305,9 +305,9 @@ namespace
        const T scale = angleInDegrees ? static_cast<T>(CV_PI / 180.0) : static_cast<T>(1.0);

        if (mag.empty())
-            polarToCartDstInterleavedImpl_<T, false> << <grid, block, 0, stream >> >(mag, angle, xy, scale);
+            polarToCartDstInterleavedImpl_<T, false> <<<grid, block, 0, stream >>>(mag, angle, xy, scale);
        else
-            polarToCartDstInterleavedImpl_<T, true> << <grid, block, 0, stream >> >(mag, angle, xy, scale);
+            polarToCartDstInterleavedImpl_<T, true> <<<grid, block, 0, stream >>>(mag, angle, xy, scale);
    }

    template <typename T>
@ -320,7 +320,7 @@ namespace

        const T scale = angleInDegrees ? static_cast<T>(CV_PI / 180.0) : static_cast<T>(1.0);

-        polarToCartInterleavedImpl_<T> << <grid, block, 0, stream >> >(magAngle, xy, scale);
+        polarToCartInterleavedImpl_<T> <<<grid, block, 0, stream >>>(magAngle, xy, scale);
    }
 }

--- a/modules/cudaarithm/src/element_operations.cpp
+++ b/modules/cudaarithm/src/element_operations.cpp
@ -84,8 +84,13 @@ void cv::cuda::magnitude(InputArray, InputArray, OutputArray, Stream&) { throw_n
 void cv::cuda::magnitudeSqr(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
 void cv::cuda::magnitudeSqr(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
 void cv::cuda::phase(InputArray, InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
+void cv::cuda::phase(InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
 void cv::cuda::cartToPolar(InputArray, InputArray, OutputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
+void cv::cuda::cartToPolar(InputArray, OutputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
+void cv::cuda::cartToPolar(InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
 void cv::cuda::polarToCart(InputArray, InputArray, OutputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
+void cv::cuda::polarToCart(InputArray, InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
+void cv::cuda::polarToCart(InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }

 #else

--- a/modules/cudaarithm/src/reductions.cpp
+++ b/modules/cudaarithm/src/reductions.cpp
@ -69,8 +69,10 @@ void cv::cuda::countNonZero(InputArray, OutputArray, Stream&) { throw_no_cuda();

 void cv::cuda::reduce(InputArray, OutputArray, int, int, int, Stream&) { throw_no_cuda(); }

-void cv::cuda::meanStdDev(InputArray, Scalar&, Scalar&) { throw_no_cuda(); }
+void cv::cuda::meanStdDev(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
 void cv::cuda::meanStdDev(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::meanStdDev(InputArray, Scalar&, Scalar&, InputArray) { throw_no_cuda(); }
+void cv::cuda::meanStdDev(InputArray, Scalar&, Scalar&) { throw_no_cuda(); }

 void cv::cuda::rectStdDev(InputArray, InputArray, OutputArray, Rect, Stream&) { throw_no_cuda(); }

--- a/modules/cudacodec/src/cuda/nv12_to_rgb.cu
+++ b/modules/cudacodec/src/cuda/nv12_to_rgb.cu
@ -179,9 +179,9 @@ void nv12ToBgra(const GpuMat& decodedFrame, GpuMat& outFrame, int width, int hei
    dim3 block(32, 8);
    dim3 grid(divUp(width, 2 * block.x), divUp(height, block.y));
    if (videoFullRangeFlag)
-        NV12_to_BGRA<true> << <grid, block, 0, stream >> > (decodedFrame.ptr<uchar>(), decodedFrame.step, outFrame.ptr<uint>(), outFrame.step, width, height);
+        NV12_to_BGRA<true> <<<grid, block, 0, stream >>> (decodedFrame.ptr<uchar>(), decodedFrame.step, outFrame.ptr<uint>(), outFrame.step, width, height);
    else
-        NV12_to_BGRA<false> << <grid, block, 0, stream >> > (decodedFrame.ptr<uchar>(), decodedFrame.step, outFrame.ptr<uint>(), outFrame.step, width, height);
+        NV12_to_BGRA<false> <<<grid, block, 0, stream >>> (decodedFrame.ptr<uchar>(), decodedFrame.step, outFrame.ptr<uint>(), outFrame.step, width, height);
    CV_CUDEV_SAFE_CALL(cudaGetLastError());
    if (stream == 0)
        CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());
--- a/modules/cudaimgproc/src/color.cpp
+++ b/modules/cudaimgproc/src/color.cpp
@ -51,7 +51,7 @@ void cv::cuda::cvtColor(InputArray, OutputArray, int, int, Stream&) { throw_no_c

 void cv::cuda::demosaicing(InputArray, OutputArray, int, int, Stream&) { throw_no_cuda(); }

-void cv::cuda::swapChannels(InputOutputArray, const int[], Stream&) { throw_no_cuda(); }
+void cv::cuda::swapChannels(InputOutputArray, const int[4], Stream&) { throw_no_cuda(); }

 void cv::cuda::gammaCorrection(InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }

--- a/modules/cudaimgproc/src/connectedcomponents.cpp
+++ b/modules/cudaimgproc/src/connectedcomponents.cpp
@ -9,8 +9,8 @@ using namespace cv::cuda;

 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)

-void cv::cuda::connectedComponents(InputArray img_, OutputArray labels_, int connectivity,
-    int ltype, ConnectedComponentsAlgorithmsTypes ccltype) { throw_no_cuda(); }
+void cv::cuda::connectedComponents(InputArray, OutputArray, int, int, ConnectedComponentsAlgorithmsTypes) { throw_no_cuda(); }
+void cv::cuda::connectedComponents(InputArray, OutputArray, int, int) { throw_no_cuda(); }

 #else /* !defined (HAVE_CUDA) */

--- a/modules/cudaimgproc/src/cuda/canny.cu
+++ b/modules/cudaimgproc/src/cuda/canny.cu
@ -428,7 +428,7 @@ namespace canny
            cudaSafeCall( cudaMemsetAsync(d_counter, 0, sizeof(int), stream) );

            const dim3 block(128);
-            const dim3 grid(::min(count, 65535u), divUp(count, 65535), 1);
+            const dim3 grid(std::min(count, 65535), divUp(count, 65535), 1);

            edgesHysteresisGlobalKernel<<<grid, block, 0, stream>>>(map, st1, st2, d_counter, count);
            cudaSafeCall( cudaGetLastError() );
@ -439,7 +439,7 @@ namespace canny
            cudaSafeCall( cudaMemcpyAsync(&count, d_counter, sizeof(int), cudaMemcpyDeviceToHost, stream) );
            cudaSafeCall( cudaStreamSynchronize(stream) );

-            count = min(count, map.cols * map.rows);
+            count = std::min(count, map.cols * map.rows);

            //std::swap(st1, st2);
            short2* tmp = st1;
--- a/modules/cudaimgproc/src/cuda/connectedcomponents.cu
+++ b/modules/cudaimgproc/src/cuda/connectedcomponents.cu
@ -317,19 +317,19 @@ void BlockBasedKomuraEquivalence(const cv::cuda::GpuMat& img, cv::cuda::GpuMat&
    grid_size = dim3((((img.cols + 1) / 2) - 1) / kblock_cols + 1, (((img.rows + 1) / 2) - 1) / kblock_rows + 1, 1);
    block_size = dim3(kblock_cols, kblock_rows, 1);

-    InitLabeling << <grid_size, block_size >> > (img, labels, last_pixel);
+    InitLabeling <<<grid_size, block_size >>> (img, labels, last_pixel);
    cudaSafeCall(cudaGetLastError());

-    Compression << <grid_size, block_size >> > (labels);
+    Compression <<<grid_size, block_size >>> (labels);
    cudaSafeCall(cudaGetLastError());

-    Merge << <grid_size, block_size >> > (labels, last_pixel);
+    Merge <<<grid_size, block_size >>> (labels, last_pixel);
    cudaSafeCall(cudaGetLastError());

-    Compression << <grid_size, block_size >> > (labels);
+    Compression <<<grid_size, block_size >>> (labels);
    cudaSafeCall(cudaGetLastError());

-    FinalLabeling << <grid_size, block_size >> > (img, labels);
+    FinalLabeling <<<grid_size, block_size >>> (img, labels);
    cudaSafeCall(cudaGetLastError());

    if (last_pixel_allocated) {
--- a/modules/cudaimgproc/src/cuda/generalized_hough.cu
+++ b/modules/cudaimgproc/src/cuda/generalized_hough.cu
@ -302,7 +302,7 @@ namespace cv { namespace cuda { namespace device
            int totalCount;
            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );

-            totalCount = ::min(totalCount, maxSize);
+            totalCount = std::min(totalCount, maxSize);

            return totalCount;
        }
@ -812,7 +812,7 @@ namespace cv { namespace cuda { namespace device
            int totalCount;
            cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );

-            totalCount = ::min(totalCount, maxSize);
+            totalCount = std::min(totalCount, maxSize);

            return totalCount;
        }
--- a/modules/cudaimgproc/src/cuda/hough_circles.cu
+++ b/modules/cudaimgproc/src/cuda/hough_circles.cu
@ -238,7 +238,7 @@ namespace cv { namespace cuda { namespace device
            cudaSafeCall( cudaMemcpyAsync(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost, stream) );
            cudaSafeCall( cudaStreamSynchronize(stream) );

-            totalCount = ::min(totalCount, maxCircles);
+            totalCount = std::min(totalCount, maxCircles);

            return totalCount;
        }
--- a/modules/cudaimgproc/src/cuda/hough_lines.cu
+++ b/modules/cudaimgproc/src/cuda/hough_lines.cu
@ -189,7 +189,7 @@ namespace cv { namespace cuda { namespace device

            cudaSafeCall( cudaStreamSynchronize(stream) );

-            totalCount = ::min(totalCount, maxSize);
+            totalCount = std::min(totalCount, maxSize);

            if (doSort && totalCount > 0)
            {
--- a/modules/cudaimgproc/src/cuda/hough_segments.cu
+++ b/modules/cudaimgproc/src/cuda/hough_segments.cu
@ -241,7 +241,7 @@ namespace cv { namespace cuda { namespace device

            cudaSafeCall( cudaStreamSynchronize(stream) );

-            totalCount = ::min(totalCount, maxSize);
+            totalCount = std::min(totalCount, maxSize);
            return totalCount;
        }
    }
--- a/modules/cudaimgproc/src/cuda/moments.cu
+++ b/modules/cudaimgproc/src/cuda/moments.cu
@ -139,7 +139,7 @@ template <typename TSrc, typename TMoments, int nMoments> struct momentsDispatch
    static void call(const PtrStepSz<TSrc> src, PtrStepSz<TMoments> moments, const bool binary, const int offsetX, const cudaStream_t stream) {
        dim3 blockSize(blockSizeX, blockSizeY);
        dim3 gridSize = dim3(divUp(src.rows, blockSizeY));
-        spatialMoments<TSrc, TMoments, false, false, nMoments> << <gridSize, blockSize, 0, stream >> > (src, binary, moments.ptr());
+        spatialMoments<TSrc, TMoments, false, false, nMoments> <<<gridSize, blockSize, 0, stream >>> (src, binary, moments.ptr());
        if (stream == 0)
            cudaSafeCall(cudaStreamSynchronize(stream));
    };
@ -150,9 +150,9 @@ template <typename TSrc, int nMoments> struct momentsDispatcherChar {
        dim3 blockSize(blockSizeX, blockSizeY);
        dim3 gridSize = dim3(divUp(src.rows, blockSizeY));
        if (offsetX)
-            spatialMoments<TSrc, float, true, false, nMoments> << <gridSize, blockSize, 0, stream >> > (src, binary, moments.ptr(), offsetX);
+            spatialMoments<TSrc, float, true, false, nMoments> <<<gridSize, blockSize, 0, stream >>> (src, binary, moments.ptr(), offsetX);
        else
-            spatialMoments<TSrc, float, true, true, nMoments> << <gridSize, blockSize, 0, stream >> > (src, binary, moments.ptr());
+            spatialMoments<TSrc, float, true, true, nMoments> <<<gridSize, blockSize, 0, stream >>> (src, binary, moments.ptr());

        if (stream == 0)
            cudaSafeCall(cudaStreamSynchronize(stream));
--- a/modules/cudaimgproc/src/histogram.cpp
+++ b/modules/cudaimgproc/src/histogram.cpp
@ -48,6 +48,7 @@ using namespace cv::cuda;
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)

 void cv::cuda::calcHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::calcHist(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }

 void cv::cuda::equalizeHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); }

--- a/modules/cudaimgproc/src/moments.cpp
+++ b/modules/cudaimgproc/src/moments.cpp
@ -3,15 +3,10 @@
 // of this distribution and at http://opencv.org/license.html.

 #include "precomp.hpp"
-#include "cuda/moments.cuh"

 using namespace cv;
 using namespace cv::cuda;

-int cv::cuda::numMoments(const MomentsOrder order) {
-    return order == MomentsOrder::FIRST_ORDER_MOMENTS ? device::imgproc::n1 : order == MomentsOrder::SECOND_ORDER_MOMENTS ? device::imgproc::n12 : device::imgproc::n123;
-}
-
 template<typename T>
 cv::Moments convertSpatialMomentsT(Mat spatialMoments, const MomentsOrder order) {
    switch (order) {
@ -32,10 +27,17 @@ cv::Moments cv::cuda::convertSpatialMoments(Mat spatialMoments, const MomentsOrd
 }

 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
+    int cv::cuda::numMoments(MomentsOrder) { throw_no_cuda(); return 0; }
    Moments cv::cuda::moments(InputArray src, const bool binary, const MomentsOrder order, const int momentsType) { throw_no_cuda(); }
-    void spatialMoments(InputArray src, OutputArray moments, const bool binary, const MomentsOrder order, const int momentsType, Stream& stream) { throw_no_cuda(); }
+    void cv::cuda::spatialMoments(InputArray src, OutputArray moments, const bool binary, const MomentsOrder order, const int momentsType, Stream& stream) { throw_no_cuda(); }
 #else /* !defined (HAVE_CUDA) */

+#include "cuda/moments.cuh"
+
+int cv::cuda::numMoments(const MomentsOrder order) {
+    return order == MomentsOrder::FIRST_ORDER_MOMENTS ? device::imgproc::n1 : order == MomentsOrder::SECOND_ORDER_MOMENTS ? device::imgproc::n12 : device::imgproc::n123;
+}
+
 namespace cv { namespace cuda { namespace device { namespace imgproc {
        template <typename TSrc, typename TMoments>
        void moments(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream);
--- a/modules/cudaoptflow/src/cuda/nvidiaOpticalFlow.cu
+++ b/modules/cudaoptflow/src/cuda/nvidiaOpticalFlow.cu
@ -90,7 +90,7 @@ void FlowUpsample(void* srcDevPtr, uint32_t nSrcWidth, uint32_t nSrcPitch, uint3

        dim3 blockDim(BLOCKDIM_X, BLOCKDIM_Y);
        dim3 gridDim((nDstWidth + blockDim.x - 1) / blockDim.x, (nDstHeight + blockDim.y - 1) / blockDim.y);
-        NearestNeighborFlowKernel << <gridDim, blockDim >> > (0, srcDevPtr, nSrcWidth, nSrcPitch, nSrcHeight,
+        NearestNeighborFlowKernel <<<gridDim, blockDim >>> (0, srcDevPtr, nSrcWidth, nSrcPitch, nSrcHeight,
            0, dstDevPtr, nDstWidth, nDstPitch, nDstHeight,
            nScaleFactor);

--- a/modules/cudaoptflow/src/farneback.cpp
+++ b/modules/cudaoptflow/src/farneback.cpp
@ -47,7 +47,7 @@ using namespace cv::cuda;

 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)

-Ptr<FarnebackOpticalFlow> cv::cuda::FarnebackOpticalFlow::create(int, double, bool, int, int, int, double, int) { throw_no_cuda(); return Ptr<FarnebackOpticalFlow>(); }
+Ptr<cv::cuda::FarnebackOpticalFlow> cv::cuda::FarnebackOpticalFlow::create(int, double, bool, int, int, int, double, int) { throw_no_cuda(); return Ptr<FarnebackOpticalFlow>(); }

 #else

--- a/modules/cudaoptflow/src/precomp.hpp
+++ b/modules/cudaoptflow/src/precomp.hpp
@ -52,7 +52,9 @@
 #include "opencv2/video.hpp"

 #include "opencv2/core/private.cuda.hpp"
+#if defined HAVE_CUDA
 #include "opencv2/core/cuda/vec_traits.hpp"
+#endif
 #include "opencv2/opencv_modules.hpp"

 #ifdef HAVE_OPENCV_CUDALEGACY
--- a/modules/cudev/include/opencv2/cudev/grid/detail/minmaxloc.hpp
+++ b/modules/cudev/include/opencv2/cudev/grid/detail/minmaxloc.hpp
@ -148,8 +148,8 @@ namespace grid_minmaxloc_detail
        block = dim3(Policy::block_size_x, Policy::block_size_y);
        grid = dim3(divUp(cols, block.x * Policy::patch_size_x), divUp(rows, block.y * Policy::patch_size_y));

-        grid.x = ::min(grid.x, block.x);
-        grid.y = ::min(grid.y, block.y);
+        grid.x = std::min(grid.x, block.x);
+        grid.y = std::min(grid.y, block.y);
    }

    template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
--- a/modules/hfs/src/cuda/gslic_seg_engine_gpu.cu
+++ b/modules/hfs/src/cuda/gslic_seg_engine_gpu.cu
@ -75,7 +75,7 @@ void SegEngineGPU::cvtImgSpace(Ptr<UChar4Image> inimg, Ptr<Float4Image> outimg)

    dim3 blockSize(HFS_BLOCK_DIM, HFS_BLOCK_DIM);
    dim3 gridSize = getGridSize(img_size, blockSize);
-    cvtImgSpaceDevice << <gridSize, blockSize >> >(inimg_ptr, img_size, outimg_ptr);
+    cvtImgSpaceDevice <<<gridSize, blockSize >>>(inimg_ptr, img_size, outimg_ptr);
 }

 void SegEngineGPU::initClusterCenters()
@ -85,7 +85,7 @@ void SegEngineGPU::initClusterCenters()

    dim3 blockSize(HFS_BLOCK_DIM, HFS_BLOCK_DIM);
    dim3 gridSize = getGridSize(map_size, blockSize);
-    initClusterCentersDevice << <gridSize, blockSize >> >
+    initClusterCentersDevice <<<gridSize, blockSize >>>
        (img_ptr, map_size, img_size, spixel_size, spixel_list);
 }

@ -98,7 +98,7 @@ void SegEngineGPU::findCenterAssociation()
    dim3 blockSize(HFS_BLOCK_DIM, HFS_BLOCK_DIM);
    dim3 gridSize = getGridSize(img_size, blockSize);

-    findCenterAssociationDevice << <gridSize, blockSize >> >
+    findCenterAssociationDevice <<<gridSize, blockSize >>>
        (img_ptr, spixel_list, map_size, img_size,
            spixel_size, slic_settings.coh_weight,
            max_xy_dist, max_color_dist, idx_ptr);
@ -116,13 +116,13 @@ void SegEngineGPU::updateClusterCenter()
    dim3 blockSize(HFS_BLOCK_DIM, HFS_BLOCK_DIM);
    dim3 gridSize(map_size.x, map_size.y, no_grid_per_center);

-    updateClusterCenterDevice << <gridSize, blockSize >> >
+    updateClusterCenterDevice <<<gridSize, blockSize >>>
        (img_ptr, idx_ptr, map_size, img_size,
            spixel_size, no_blocks_per_line, accum_map_ptr);

    dim3 gridSize2(map_size.x, map_size.y);

-    finalizeReductionResultDevice << <gridSize2, blockSize >> >
+    finalizeReductionResultDevice <<<gridSize2, blockSize >>>
        (accum_map_ptr, map_size, no_grid_per_center, spixel_list_ptr);
 }

@ -134,13 +134,13 @@ void SegEngineGPU::enforceConnectivity()
    dim3 blockSize(HFS_BLOCK_DIM, HFS_BLOCK_DIM);
    dim3 gridSize = getGridSize(img_size, blockSize);

-    enforceConnectivityDevice << <gridSize, blockSize >> >
+    enforceConnectivityDevice <<<gridSize, blockSize >>>
        (idx_ptr, img_size, tmp_idx_ptr);
-    enforceConnectivityDevice << <gridSize, blockSize >> >
+    enforceConnectivityDevice <<<gridSize, blockSize >>>
        (tmp_idx_ptr, img_size, idx_ptr);
-    enforceConnectivityDevice1_2 << <gridSize, blockSize >> >
+    enforceConnectivityDevice1_2 <<<gridSize, blockSize >>>
        (idx_ptr, img_size, tmp_idx_ptr);
-    enforceConnectivityDevice1_2 << <gridSize, blockSize >> >
+    enforceConnectivityDevice1_2 <<<gridSize, blockSize >>>
        (tmp_idx_ptr, img_size, idx_ptr);
 }

--- a/modules/hfs/src/cuda/magnitude.cu
+++ b/modules/hfs/src/cuda/magnitude.cu
@ -194,7 +194,7 @@ void Magnitude::derrivativeXYGpu()
    dim3 gridSize((int)ceil((float)img_size.x / (float)blockSize.x),
        (int)ceil((float)img_size.y / (float)blockSize.y));

-    derrivativeXYDevice << <gridSize, blockSize >> >
+    derrivativeXYDevice <<<gridSize, blockSize >>>
        (gray_ptr, dx_ptr, dy_ptr, mag_ptr, img_size);
 }

@ -209,7 +209,7 @@ void Magnitude::nonMaxSuppGpu()
    dim3 gridSize((int)ceil((float)img_size.x / (float)blockSize.x),
        (int)ceil((float)img_size.y / (float)blockSize.y));

-    nonMaxSuppDevice << <gridSize, blockSize >> >
+    nonMaxSuppDevice <<<gridSize, blockSize >>>
        (nms_ptr, dx_ptr, dy_ptr, mag_ptr, img_size);
 }