Merge branch 4.x

pull/3819/head
Alexander Smorkalov 4 months ago
commit bd3b734731
  1. 230
      modules/cudaarithm/include/opencv2/cudaarithm.hpp
  2. 35
      modules/cudaarithm/misc/python/test/test_cudaarithm.py
  3. 2
      modules/cudaarithm/src/arithm.cpp
  4. 10
      modules/cudaarithm/src/cuda/polar_cart.cu
  5. 5
      modules/cudaarithm/src/element_operations.cpp
  6. 4
      modules/cudaarithm/src/reductions.cpp
  7. 4
      modules/cudacodec/src/cuda/nv12_to_rgb.cu
  8. 2
      modules/cudaimgproc/src/color.cpp
  9. 4
      modules/cudaimgproc/src/connectedcomponents.cpp
  10. 4
      modules/cudaimgproc/src/cuda/canny.cu
  11. 10
      modules/cudaimgproc/src/cuda/connectedcomponents.cu
  12. 4
      modules/cudaimgproc/src/cuda/generalized_hough.cu
  13. 2
      modules/cudaimgproc/src/cuda/hough_circles.cu
  14. 2
      modules/cudaimgproc/src/cuda/hough_lines.cu
  15. 2
      modules/cudaimgproc/src/cuda/hough_segments.cu
  16. 6
      modules/cudaimgproc/src/cuda/moments.cu
  17. 1
      modules/cudaimgproc/src/histogram.cpp
  18. 14
      modules/cudaimgproc/src/moments.cpp
  19. 2
      modules/cudaoptflow/src/cuda/nvidiaOpticalFlow.cu
  20. 2
      modules/cudaoptflow/src/farneback.cpp
  21. 2
      modules/cudaoptflow/src/precomp.hpp
  22. 4
      modules/cudev/include/opencv2/cudev/grid/detail/minmaxloc.hpp
  23. 18
      modules/hfs/src/cuda/gslic_seg_engine_gpu.cu
  24. 4
      modules/hfs/src/cuda/magnitude.cu

@ -75,61 +75,137 @@ namespace cv { namespace cuda {
@param src1 First source matrix or scalar.
@param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 .
@param dst Destination matrix that has the same size and number of channels as the input array(s).
The depth is defined by dtype or src1 depth.
The depth is defined by dtype or @p src1 depth.
@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
destination array to be changed. The mask can be used only with single channel images.
@param dtype Optional depth of the output array.
@param stream Stream for the asynchronous version.
@sa add
@warning In python both @p src1 and @p src2 have to be matrices, see @ref addWithScalar for scalar overload.
@sa cv::add, addWithScalar
*/
CV_EXPORTS_W void add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null());
/** @brief Computes a matrix-scalar sum.
@param src1 First source matrix.
@param src2 Second source scalar.
@param dst Destination matrix that has the same size and number of channels as the input array.
The depth is defined by dtype or @p src1 depth.
@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
destination array to be changed. The mask can be used only with single channel images.
@param dtype Optional depth of the output array.
@param stream Stream for the asynchronous version.
@sa add
*/
CV_EXPORTS_W void inline addWithScalar(InputArray src1, Scalar src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null()) {
add(src1, src2, dst, mask, dtype, stream);
}
/** @brief Computes a matrix-matrix or matrix-scalar difference.
@param src1 First source matrix or scalar.
@param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 .
@param src2 Second source matrix or scalar. Matrix should have the same size and type as @p src1.
@param dst Destination matrix that has the same size and number of channels as the input array(s).
The depth is defined by dtype or src1 depth.
The depth is defined by dtype or @p src1 depth.
@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
destination array to be changed. The mask can be used only with single channel images.
@param dtype Optional depth of the output array.
@param stream Stream for the asynchronous version.
@sa subtract
@warning In python both @p src1 and @p src2 have to be matrices, see @ref subtractWithScalar for scalar overload.
@sa cv::subtract, subtractWithScalar
*/
CV_EXPORTS_W void subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null());
/** @brief Computes matrix-scalar difference.
@param src1 First source matrix.
@param src2 Second source scalar.
@param dst Destination matrix that has the same size and number of channels as the input array.
The depth is defined by dtype or @p src1 depth.
@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
destination array to be changed. The mask can be used only with single channel images.
@param dtype Optional depth of the output array.
@param stream Stream for the asynchronous version.
@sa cv::subtract
*/
CV_EXPORTS_W void inline subtractWithScalar(InputArray src1, Scalar src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null()) {
subtract(src1, src2, dst, mask, dtype, stream);
}
/** @brief Computes a matrix-matrix or matrix-scalar per-element product.
@param src1 First source matrix or scalar.
@param src2 Second source matrix or scalar.
@param dst Destination matrix that has the same size and number of channels as the input array(s).
The depth is defined by dtype or src1 depth.
The depth is defined by dtype or @p src1 depth.
@param scale Optional scale factor.
@param dtype Optional depth of the output array.
@param stream Stream for the asynchronous version.
@sa multiply
@warning In python both @p src1 and @p src2 have to be matrices, see @ref multiplyWithScalar for scalar overload.
@sa cv::multiply, multiplyWithScalar
*/
CV_EXPORTS_W void multiply(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null());
/** @brief Computes a matrix-scalar per-element product.
@param src1 First source matrix.
@param src2 Second source scalar.
@param dst Destination matrix that has the same size and number of channels as the input array.
The depth is defined by dtype or @p src1 depth.
@param scale Optional scale factor.
@param dtype Optional depth of the output array.
@param stream Stream for the asynchronous version.
@sa multiply
*/
CV_EXPORTS_W void inline multiplyWithScalar(InputArray src1, Scalar src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()) {
multiply(src1, src2, dst, scale, dtype, stream);
}
/** @brief Computes a matrix-matrix or matrix-scalar division.
@param src1 First source matrix or a scalar.
@param src1 First source matrix or scalar.
@param src2 Second source matrix or scalar.
@param dst Destination matrix that has the same size and number of channels as the input array(s).
The depth is defined by dtype or src1 depth.
The depth is defined by dtype or @p src1 depth.
@param scale Optional scale factor.
@param dtype Optional depth of the output array.
@param stream Stream for the asynchronous version.
This function, in contrast to divide, uses a round-down rounding mode.
@sa divide
@warning In python both @p src1 and @p src2 have to be matrices, see @ref divideWithScalar for scalar overload.
@sa cv::divide, divideWithScalar
*/
CV_EXPORTS_W void divide(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null());
/** @brief Computes a matrix-scalar division.
@param src1 First source matrix.
@param src2 Second source scalar.
@param dst Destination matrix that has the same size and number of channels as the input array.
The depth is defined by dtype or @p src1 depth.
@param scale Optional scale factor.
@param dtype Optional depth of the output array.
@param stream Stream for the asynchronous version.
This function, in contrast to divide, uses a round-down rounding mode.
@sa divide
*/
CV_EXPORTS_W void inline divideWithScalar(InputArray src1, Scalar src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()) {
divide(src1, src2, dst, scale, dtype, stream);
}
/** @brief Computes per-element absolute difference of two matrices (or of a matrix and scalar).
@param src1 First source matrix or scalar.
@ -137,10 +213,25 @@ CV_EXPORTS_W void divide(InputArray src1, InputArray src2, OutputArray dst, doub
@param dst Destination matrix that has the same size and type as the input array(s).
@param stream Stream for the asynchronous version.
@sa absdiff
@warning In python both @p src1 and @p src2 have to be matrices, see @ref absdiffWithScalar for scalar overload.
@sa cv::absdiff, absdiffWithScalar
*/
CV_EXPORTS_W void absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null());
/** @brief Computes per-element absolute difference of a matrix and scalar.
@param src1 First source matrix.
@param src2 Second source scalar.
@param dst Destination matrix that has the same size and type as the input array.
@param stream Stream for the asynchronous version.
@sa absdiff
*/
CV_EXPORTS_W void inline absdiffWithScalar(InputArray src1, Scalar src2, OutputArray dst, Stream& stream = Stream::Null()) {
absdiff(src1, src2, dst, stream);
}
/** @brief Computes an absolute value of each matrix element.
@param src Source matrix.
@ -218,10 +309,32 @@ CV_EXPORTS_W void pow(InputArray src, double power, OutputArray dst, Stream& str
- **CMP_NE:** a(.) != b(.)
@param stream Stream for the asynchronous version.
@sa compare
@warning In python both @p src1 and @p src2 have to be matrices, see @ref compareWithScalar for scalar overload.
@sa cv::compare, compareWithScalar
*/
CV_EXPORTS_W void compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop, Stream& stream = Stream::Null());
/** @brief Compares elements of a matrix and scalar.
@param src1 First source matrix.
@param src2 Second source scalar.
@param dst Destination matrix that has the same size as the input array and type \ref CV_8U.
@param cmpop Flag specifying the relation between the elements to be checked:
- **CMP_EQ:** a(.) == b(.)
- **CMP_GT:** a(.) \> b(.)
- **CMP_GE:** a(.) \>= b(.)
- **CMP_LT:** a(.) \< b(.)
- **CMP_LE:** a(.) \<= b(.)
- **CMP_NE:** a(.) != b(.)
@param stream Stream for the asynchronous version.
@sa compare
*/
CV_EXPORTS_W void inline compareWithScalar(InputArray src1, Scalar src2, OutputArray dst, int cmpop, Stream& stream = Stream::Null()) {
compare(src1, src2, dst, cmpop, stream);
}
/** @brief Performs a per-element bitwise inversion.
@param src Source matrix.
@ -240,9 +353,28 @@ CV_EXPORTS_W void bitwise_not(InputArray src, OutputArray dst, InputArray mask =
@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
destination array to be changed. The mask can be used only with single channel images.
@param stream Stream for the asynchronous version.
@warning In python both @p src1 and @p src2 have to be matrices, see @ref bitwise_or_with_scalar for scalar overload.
@sa cv::bitwise_or, bitwise_or_with_scalar
*/
CV_EXPORTS_W void bitwise_or(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null());
/** @brief Performs a per-element bitwise disjunction of a matrix and scalar.
@param src1 First source matrix.
@param src2 Second source scalar.
@param dst Destination matrix that has the same size and type as the input array.
@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
destination array to be changed. The mask can be used only with single channel images.
@param stream Stream for the asynchronous version.
@sa bitwise_or
*/
CV_EXPORTS_W void inline bitwise_or_with_scalar(InputArray src1, Scalar src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()) {
bitwise_or(src1, src2, dst, mask, stream);
}
/** @brief Performs a per-element bitwise conjunction of two matrices (or of matrix and scalar).
@param src1 First source matrix or scalar.
@ -251,20 +383,58 @@ CV_EXPORTS_W void bitwise_or(InputArray src1, InputArray src2, OutputArray dst,
@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
destination array to be changed. The mask can be used only with single channel images.
@param stream Stream for the asynchronous version.
@warning In python both @p src1 and @p src2 have to be matrices, see @ref bitwise_and_with_scalar for scalar overload.
@sa bitwise_and_with_scalar
*/
CV_EXPORTS_W void bitwise_and(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null());
/** @brief Performs a per-element bitwise conjunction of a matrix and a scalar.
@param src1 First source matrix.
@param src2 Second source scalar.
@param dst Destination matrix that has the same size and type as the input array.
@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
destination array to be changed. The mask can be used only with single channel images.
@param stream Stream for the asynchronous version.
@sa bitwise_and
*/
CV_EXPORTS_W void inline bitwise_and_with_scalar(InputArray src1, Scalar src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()) {
bitwise_and(src1, src2, dst, mask, stream);
}
/** @brief Performs a per-element bitwise exclusive or operation of two matrices (or of matrix and scalar).
@param src1 First source matrix or scalar.
@param src2 Second source matrix or scalar.
@param dst Destination matrix that has the same size and type as the input array(s).
@param dst Destination matrix that has the same size and type as the input array.
@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
destination array to be changed. The mask can be used only with single channel images.
@param stream Stream for the asynchronous version.
@warning In python both @p src1 and @p src2 have to be matrices, see @ref bitwise_xor_with_scalar for scalar overload.
@sa cv::bitwise_xor, bitwise_xor_with_scalar
*/
CV_EXPORTS_W void bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null());
/** @brief Performs a per-element bitwise exclusive or operation of a matrix and a scalar.
@param src1 First source matrix.
@param src2 Second source scalar.
@param dst Destination matrix that has the same size and type as the input array(s).
@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the
destination array to be changed. The mask can be used only with single channel images.
@param stream Stream for the asynchronous version.
@sa bitwise_xor
*/
CV_EXPORTS_W void inline bitwise_xor_with_scalar(InputArray src1, Scalar src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()) {
bitwise_xor(src1, src2, dst, mask, stream);
}
/** @brief Performs pixel by pixel right shift of an image by a constant value.
@param src Source matrix. Supports 1, 3 and 4 channels images with integers elements.
@ -299,10 +469,25 @@ CV_WRAP inline void lshift(InputArray src, Scalar val, OutputArray dst, Stream&
@param dst Destination matrix that has the same size and type as the input array(s).
@param stream Stream for the asynchronous version.
@sa min
@warning In python both @p src1 and @p src2 have to be matrices, see @ref minWithScalar for scalar overload.
@sa cv::min, minWithScalar
*/
CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null());
/** @brief Computes the per-element minimum or a matrix and a scalar.
@param src1 First source matrix.
@param src2 Second source scalar.
@param dst Destination matrix that has the same size and type as the input array.
@param stream Stream for the asynchronous version.
@sa min
*/
CV_EXPORTS_W void inline minWithScalar(InputArray src1, Scalar src2, OutputArray dst, Stream& stream = Stream::Null()) {
min(src1, src2, dst, stream);
}
/** @brief Computes the per-element maximum of two matrices (or a matrix and a scalar).
@param src1 First source matrix or scalar.
@ -310,10 +495,25 @@ CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst, Stream&
@param dst Destination matrix that has the same size and type as the input array(s).
@param stream Stream for the asynchronous version.
@sa max
@warning In python both @p src1 and @p src2 have to be matrices, see @ref maxWithScalar for scalar overload.
@sa cv::max, maxWithScalar
*/
CV_EXPORTS_W void max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null());
/** @brief Computes the per-element maximum of a matrix and a scalar.
@param src1 First source matrix.
@param src2 Second source scalar.
@param dst Destination matrix that has the same size and type as the input array.
@param stream Stream for the asynchronous version.
@sa max
*/
CV_EXPORTS_W void inline maxWithScalar(InputArray src1, Scalar src2, OutputArray dst, Stream& stream = Stream::Null()) {
max(src1, src2, dst, stream);
}
/** @brief Computes the weighted sum of two arrays.
@param src1 First source array.

@ -38,6 +38,7 @@ class cudaarithm_test(NewOpenCVTests):
def test_arithmetic(self):
npMat1 = np.random.random((128, 128, 3)) - 0.5
npMat2 = np.random.random((128, 128, 3)) - 0.5
scalar = np.random.random()
cuMat1 = cv.cuda_GpuMat()
cuMat2 = cv.cuda_GpuMat()
@ -48,36 +49,54 @@ class cudaarithm_test(NewOpenCVTests):
self.assertTrue(np.allclose(cv.cuda.add(cuMat1, cuMat2).download(),
cv.add(npMat1, npMat2)))
self.assertTrue(np.allclose(cv.cuda.addWithScalar(cuMat1, [scalar]*3).download(),
cv.add(npMat1, scalar)))
cv.cuda.add(cuMat1, cuMat2, cuMatDst)
self.assertTrue(np.allclose(cuMatDst.download(),cv.add(npMat1, npMat2)))
self.assertTrue(np.allclose(cv.cuda.subtract(cuMat1, cuMat2).download(),
cv.subtract(npMat1, npMat2)))
self.assertTrue(np.allclose(cv.cuda.subtractWithScalar(cuMat1, [scalar]*3).download(),
cv.subtract(npMat1, scalar)))
cv.cuda.subtract(cuMat1, cuMat2, cuMatDst)
self.assertTrue(np.allclose(cuMatDst.download(),cv.subtract(npMat1, npMat2)))
self.assertTrue(np.allclose(cv.cuda.multiply(cuMat1, cuMat2).download(),
cv.multiply(npMat1, npMat2)))
self.assertTrue(np.allclose(cv.cuda.multiplyWithScalar(cuMat1, [scalar]*3).download(),
cv.multiply(npMat1, scalar)))
cv.cuda.multiply(cuMat1, cuMat2, cuMatDst)
self.assertTrue(np.allclose(cuMatDst.download(),cv.multiply(npMat1, npMat2)))
self.assertTrue(np.allclose(cv.cuda.divide(cuMat1, cuMat2).download(),
cv.divide(npMat1, npMat2)))
self.assertTrue(np.allclose(cv.cuda.divideWithScalar(cuMat1, [scalar]*3).download(),
cv.divide(npMat1, scalar)))
cv.cuda.divide(cuMat1, cuMat2, cuMatDst)
self.assertTrue(np.allclose(cuMatDst.download(),cv.divide(npMat1, npMat2)))
self.assertTrue(np.allclose(cv.cuda.absdiff(cuMat1, cuMat2).download(),
cv.absdiff(npMat1, npMat2)))
self.assertTrue(np.allclose(cv.cuda.absdiffWithScalar(cuMat1, [scalar]*3).download(),
cv.absdiff(npMat1, scalar)))
cv.cuda.absdiff(cuMat1, cuMat2, cuMatDst)
self.assertTrue(np.allclose(cuMatDst.download(),cv.absdiff(npMat1, npMat2)))
self.assertTrue(np.allclose(cv.cuda.compare(cuMat1, cuMat2, cv.CMP_GE).download(),
cv.compare(npMat1, npMat2, cv.CMP_GE)))
self.assertTrue(np.allclose(cv.cuda.compareWithScalar(cuMat1, [scalar]*3, cv.CMP_GE).download(),
cv.compare(npMat1, scalar, cv.CMP_GE)))
cuMatDst1 = cv.cuda_GpuMat(cuMat1.size(),cv.CV_8UC3)
cv.cuda.compare(cuMat1, cuMat2, cv.CMP_GE, cuMatDst1)
self.assertTrue(np.allclose(cuMatDst1.download(),cv.compare(npMat1, npMat2, cv.CMP_GE)))
@ -111,6 +130,7 @@ class cudaarithm_test(NewOpenCVTests):
def test_logical(self):
npMat1 = (np.random.random((128, 128)) * 255).astype(np.uint8)
npMat2 = (np.random.random((128, 128)) * 255).astype(np.uint8)
scalar = np.random.random()
cuMat1 = cv.cuda_GpuMat()
cuMat2 = cv.cuda_GpuMat()
@ -121,18 +141,27 @@ class cudaarithm_test(NewOpenCVTests):
self.assertTrue(np.allclose(cv.cuda.bitwise_or(cuMat1, cuMat2).download(),
cv.bitwise_or(npMat1, npMat2)))
self.assertTrue(np.allclose(cv.cuda.bitwise_or_with_scalar(cuMat1, scalar).download(),
cv.bitwise_or(npMat1, scalar)))
cv.cuda.bitwise_or(cuMat1, cuMat2, cuMatDst)
self.assertTrue(np.allclose(cuMatDst.download(),cv.bitwise_or(npMat1, npMat2)))
self.assertTrue(np.allclose(cv.cuda.bitwise_and(cuMat1, cuMat2).download(),
cv.bitwise_and(npMat1, npMat2)))
self.assertTrue(np.allclose(cv.cuda.bitwise_and_with_scalar(cuMat1, scalar).download(),
cv.bitwise_and(npMat1, scalar)))
cv.cuda.bitwise_and(cuMat1, cuMat2, cuMatDst)
self.assertTrue(np.allclose(cuMatDst.download(),cv.bitwise_and(npMat1, npMat2)))
self.assertTrue(np.allclose(cv.cuda.bitwise_xor(cuMat1, cuMat2).download(),
cv.bitwise_xor(npMat1, npMat2)))
self.assertTrue(np.allclose(cv.cuda.bitwise_xor_with_scalar(cuMat1, scalar).download(),
cv.bitwise_xor(npMat1, scalar)))
cv.cuda.bitwise_xor(cuMat1, cuMat2, cuMatDst)
self.assertTrue(np.allclose(cuMatDst.download(),cv.bitwise_xor(npMat1, npMat2)))
@ -145,12 +174,18 @@ class cudaarithm_test(NewOpenCVTests):
self.assertTrue(np.allclose(cv.cuda.min(cuMat1, cuMat2).download(),
cv.min(npMat1, npMat2)))
self.assertTrue(np.allclose(cv.cuda.minWithScalar(cuMat1, scalar).download(),
cv.min(npMat1, scalar)))
cv.cuda.min(cuMat1, cuMat2, cuMatDst)
self.assertTrue(np.allclose(cuMatDst.download(),cv.min(npMat1, npMat2)))
self.assertTrue(np.allclose(cv.cuda.max(cuMat1, cuMat2).download(),
cv.max(npMat1, npMat2)))
self.assertTrue(np.allclose(cv.cuda.maxWithScalar(cuMat1, scalar).download(),
cv.max(npMat1, scalar)))
cv.cuda.max(cuMat1, cuMat2, cuMatDst)
self.assertTrue(np.allclose(cuMatDst.download(),cv.max(npMat1, npMat2)))

@ -54,6 +54,8 @@ void cv::cuda::mulAndScaleSpectrums(InputArray, InputArray, OutputArray, int, fl
void cv::cuda::dft(InputArray, OutputArray, Size, int, Stream&) { throw_no_cuda(); }
Ptr<DFT> cv::cuda::createDFT(Size, int) { throw_no_cuda(); return Ptr<DFT>(); }
Ptr<Convolution> cv::cuda::createConvolution(Size) { throw_no_cuda(); return Ptr<Convolution>(); }
#else /* !defined (HAVE_CUDA) */

@ -289,9 +289,9 @@ namespace
const T scale = angleInDegrees ? static_cast<T>(CV_PI / 180.0) : static_cast<T>(1.0);
if (mag.empty())
polarToCartImpl_<T, false> << <grid, block, 0, stream >> >(mag, angle, x, y, scale);
polarToCartImpl_<T, false> <<<grid, block, 0, stream >>>(mag, angle, x, y, scale);
else
polarToCartImpl_<T, true> << <grid, block, 0, stream >> >(mag, angle, x, y, scale);
polarToCartImpl_<T, true> <<<grid, block, 0, stream >>>(mag, angle, x, y, scale);
}
template <typename T>
@ -305,9 +305,9 @@ namespace
const T scale = angleInDegrees ? static_cast<T>(CV_PI / 180.0) : static_cast<T>(1.0);
if (mag.empty())
polarToCartDstInterleavedImpl_<T, false> << <grid, block, 0, stream >> >(mag, angle, xy, scale);
polarToCartDstInterleavedImpl_<T, false> <<<grid, block, 0, stream >>>(mag, angle, xy, scale);
else
polarToCartDstInterleavedImpl_<T, true> << <grid, block, 0, stream >> >(mag, angle, xy, scale);
polarToCartDstInterleavedImpl_<T, true> <<<grid, block, 0, stream >>>(mag, angle, xy, scale);
}
template <typename T>
@ -320,7 +320,7 @@ namespace
const T scale = angleInDegrees ? static_cast<T>(CV_PI / 180.0) : static_cast<T>(1.0);
polarToCartInterleavedImpl_<T> << <grid, block, 0, stream >> >(magAngle, xy, scale);
polarToCartInterleavedImpl_<T> <<<grid, block, 0, stream >>>(magAngle, xy, scale);
}
}

@ -84,8 +84,13 @@ void cv::cuda::magnitude(InputArray, InputArray, OutputArray, Stream&) { throw_n
void cv::cuda::magnitudeSqr(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::magnitudeSqr(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::phase(InputArray, InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
void cv::cuda::phase(InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
void cv::cuda::cartToPolar(InputArray, InputArray, OutputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
void cv::cuda::cartToPolar(InputArray, OutputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
void cv::cuda::cartToPolar(InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
void cv::cuda::polarToCart(InputArray, InputArray, OutputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
void cv::cuda::polarToCart(InputArray, InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
void cv::cuda::polarToCart(InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
#else

@ -69,8 +69,10 @@ void cv::cuda::countNonZero(InputArray, OutputArray, Stream&) { throw_no_cuda();
void cv::cuda::reduce(InputArray, OutputArray, int, int, int, Stream&) { throw_no_cuda(); }
void cv::cuda::meanStdDev(InputArray, Scalar&, Scalar&) { throw_no_cuda(); }
void cv::cuda::meanStdDev(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::meanStdDev(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::meanStdDev(InputArray, Scalar&, Scalar&, InputArray) { throw_no_cuda(); }
void cv::cuda::meanStdDev(InputArray, Scalar&, Scalar&) { throw_no_cuda(); }
void cv::cuda::rectStdDev(InputArray, InputArray, OutputArray, Rect, Stream&) { throw_no_cuda(); }

@ -179,9 +179,9 @@ void nv12ToBgra(const GpuMat& decodedFrame, GpuMat& outFrame, int width, int hei
dim3 block(32, 8);
dim3 grid(divUp(width, 2 * block.x), divUp(height, block.y));
if (videoFullRangeFlag)
NV12_to_BGRA<true> << <grid, block, 0, stream >> > (decodedFrame.ptr<uchar>(), decodedFrame.step, outFrame.ptr<uint>(), outFrame.step, width, height);
NV12_to_BGRA<true> <<<grid, block, 0, stream >>> (decodedFrame.ptr<uchar>(), decodedFrame.step, outFrame.ptr<uint>(), outFrame.step, width, height);
else
NV12_to_BGRA<false> << <grid, block, 0, stream >> > (decodedFrame.ptr<uchar>(), decodedFrame.step, outFrame.ptr<uint>(), outFrame.step, width, height);
NV12_to_BGRA<false> <<<grid, block, 0, stream >>> (decodedFrame.ptr<uchar>(), decodedFrame.step, outFrame.ptr<uint>(), outFrame.step, width, height);
CV_CUDEV_SAFE_CALL(cudaGetLastError());
if (stream == 0)
CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());

@ -51,7 +51,7 @@ void cv::cuda::cvtColor(InputArray, OutputArray, int, int, Stream&) { throw_no_c
void cv::cuda::demosaicing(InputArray, OutputArray, int, int, Stream&) { throw_no_cuda(); }
void cv::cuda::swapChannels(InputOutputArray, const int[], Stream&) { throw_no_cuda(); }
void cv::cuda::swapChannels(InputOutputArray, const int[4], Stream&) { throw_no_cuda(); }
void cv::cuda::gammaCorrection(InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }

@ -9,8 +9,8 @@ using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
void cv::cuda::connectedComponents(InputArray img_, OutputArray labels_, int connectivity,
int ltype, ConnectedComponentsAlgorithmsTypes ccltype) { throw_no_cuda(); }
void cv::cuda::connectedComponents(InputArray, OutputArray, int, int, ConnectedComponentsAlgorithmsTypes) { throw_no_cuda(); }
void cv::cuda::connectedComponents(InputArray, OutputArray, int, int) { throw_no_cuda(); }
#else /* !defined (HAVE_CUDA) */

@ -428,7 +428,7 @@ namespace canny
cudaSafeCall( cudaMemsetAsync(d_counter, 0, sizeof(int), stream) );
const dim3 block(128);
const dim3 grid(::min(count, 65535u), divUp(count, 65535), 1);
const dim3 grid(std::min(count, 65535), divUp(count, 65535), 1);
edgesHysteresisGlobalKernel<<<grid, block, 0, stream>>>(map, st1, st2, d_counter, count);
cudaSafeCall( cudaGetLastError() );
@ -439,7 +439,7 @@ namespace canny
cudaSafeCall( cudaMemcpyAsync(&count, d_counter, sizeof(int), cudaMemcpyDeviceToHost, stream) );
cudaSafeCall( cudaStreamSynchronize(stream) );
count = min(count, map.cols * map.rows);
count = std::min(count, map.cols * map.rows);
//std::swap(st1, st2);
short2* tmp = st1;

@ -317,19 +317,19 @@ void BlockBasedKomuraEquivalence(const cv::cuda::GpuMat& img, cv::cuda::GpuMat&
grid_size = dim3((((img.cols + 1) / 2) - 1) / kblock_cols + 1, (((img.rows + 1) / 2) - 1) / kblock_rows + 1, 1);
block_size = dim3(kblock_cols, kblock_rows, 1);
InitLabeling << <grid_size, block_size >> > (img, labels, last_pixel);
InitLabeling <<<grid_size, block_size >>> (img, labels, last_pixel);
cudaSafeCall(cudaGetLastError());
Compression << <grid_size, block_size >> > (labels);
Compression <<<grid_size, block_size >>> (labels);
cudaSafeCall(cudaGetLastError());
Merge << <grid_size, block_size >> > (labels, last_pixel);
Merge <<<grid_size, block_size >>> (labels, last_pixel);
cudaSafeCall(cudaGetLastError());
Compression << <grid_size, block_size >> > (labels);
Compression <<<grid_size, block_size >>> (labels);
cudaSafeCall(cudaGetLastError());
FinalLabeling << <grid_size, block_size >> > (img, labels);
FinalLabeling <<<grid_size, block_size >>> (img, labels);
cudaSafeCall(cudaGetLastError());
if (last_pixel_allocated) {

@ -302,7 +302,7 @@ namespace cv { namespace cuda { namespace device
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize);
totalCount = std::min(totalCount, maxSize);
return totalCount;
}
@ -812,7 +812,7 @@ namespace cv { namespace cuda { namespace device
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize);
totalCount = std::min(totalCount, maxSize);
return totalCount;
}

@ -238,7 +238,7 @@ namespace cv { namespace cuda { namespace device
cudaSafeCall( cudaMemcpyAsync(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost, stream) );
cudaSafeCall( cudaStreamSynchronize(stream) );
totalCount = ::min(totalCount, maxCircles);
totalCount = std::min(totalCount, maxCircles);
return totalCount;
}

@ -189,7 +189,7 @@ namespace cv { namespace cuda { namespace device
cudaSafeCall( cudaStreamSynchronize(stream) );
totalCount = ::min(totalCount, maxSize);
totalCount = std::min(totalCount, maxSize);
if (doSort && totalCount > 0)
{

@ -241,7 +241,7 @@ namespace cv { namespace cuda { namespace device
cudaSafeCall( cudaStreamSynchronize(stream) );
totalCount = ::min(totalCount, maxSize);
totalCount = std::min(totalCount, maxSize);
return totalCount;
}
}

@ -139,7 +139,7 @@ template <typename TSrc, typename TMoments, int nMoments> struct momentsDispatch
static void call(const PtrStepSz<TSrc> src, PtrStepSz<TMoments> moments, const bool binary, const int offsetX, const cudaStream_t stream) {
dim3 blockSize(blockSizeX, blockSizeY);
dim3 gridSize = dim3(divUp(src.rows, blockSizeY));
spatialMoments<TSrc, TMoments, false, false, nMoments> << <gridSize, blockSize, 0, stream >> > (src, binary, moments.ptr());
spatialMoments<TSrc, TMoments, false, false, nMoments> <<<gridSize, blockSize, 0, stream >>> (src, binary, moments.ptr());
if (stream == 0)
cudaSafeCall(cudaStreamSynchronize(stream));
};
@ -150,9 +150,9 @@ template <typename TSrc, int nMoments> struct momentsDispatcherChar {
dim3 blockSize(blockSizeX, blockSizeY);
dim3 gridSize = dim3(divUp(src.rows, blockSizeY));
if (offsetX)
spatialMoments<TSrc, float, true, false, nMoments> << <gridSize, blockSize, 0, stream >> > (src, binary, moments.ptr(), offsetX);
spatialMoments<TSrc, float, true, false, nMoments> <<<gridSize, blockSize, 0, stream >>> (src, binary, moments.ptr(), offsetX);
else
spatialMoments<TSrc, float, true, true, nMoments> << <gridSize, blockSize, 0, stream >> > (src, binary, moments.ptr());
spatialMoments<TSrc, float, true, true, nMoments> <<<gridSize, blockSize, 0, stream >>> (src, binary, moments.ptr());
if (stream == 0)
cudaSafeCall(cudaStreamSynchronize(stream));

@ -48,6 +48,7 @@ using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
void cv::cuda::calcHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::calcHist(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::equalizeHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); }

@ -3,15 +3,10 @@
// of this distribution and at http://opencv.org/license.html.
#include "precomp.hpp"
#include "cuda/moments.cuh"
using namespace cv;
using namespace cv::cuda;
int cv::cuda::numMoments(const MomentsOrder order) {
return order == MomentsOrder::FIRST_ORDER_MOMENTS ? device::imgproc::n1 : order == MomentsOrder::SECOND_ORDER_MOMENTS ? device::imgproc::n12 : device::imgproc::n123;
}
template<typename T>
cv::Moments convertSpatialMomentsT(Mat spatialMoments, const MomentsOrder order) {
switch (order) {
@ -32,10 +27,17 @@ cv::Moments cv::cuda::convertSpatialMoments(Mat spatialMoments, const MomentsOrd
}
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
int cv::cuda::numMoments(MomentsOrder) { throw_no_cuda(); return 0; }
Moments cv::cuda::moments(InputArray src, const bool binary, const MomentsOrder order, const int momentsType) { throw_no_cuda(); }
void spatialMoments(InputArray src, OutputArray moments, const bool binary, const MomentsOrder order, const int momentsType, Stream& stream) { throw_no_cuda(); }
void cv::cuda::spatialMoments(InputArray src, OutputArray moments, const bool binary, const MomentsOrder order, const int momentsType, Stream& stream) { throw_no_cuda(); }
#else /* !defined (HAVE_CUDA) */
#include "cuda/moments.cuh"
int cv::cuda::numMoments(const MomentsOrder order) {
return order == MomentsOrder::FIRST_ORDER_MOMENTS ? device::imgproc::n1 : order == MomentsOrder::SECOND_ORDER_MOMENTS ? device::imgproc::n12 : device::imgproc::n123;
}
namespace cv { namespace cuda { namespace device { namespace imgproc {
template <typename TSrc, typename TMoments>
void moments(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream);

@ -90,7 +90,7 @@ void FlowUpsample(void* srcDevPtr, uint32_t nSrcWidth, uint32_t nSrcPitch, uint3
dim3 blockDim(BLOCKDIM_X, BLOCKDIM_Y);
dim3 gridDim((nDstWidth + blockDim.x - 1) / blockDim.x, (nDstHeight + blockDim.y - 1) / blockDim.y);
NearestNeighborFlowKernel << <gridDim, blockDim >> > (0, srcDevPtr, nSrcWidth, nSrcPitch, nSrcHeight,
NearestNeighborFlowKernel <<<gridDim, blockDim >>> (0, srcDevPtr, nSrcWidth, nSrcPitch, nSrcHeight,
0, dstDevPtr, nDstWidth, nDstPitch, nDstHeight,
nScaleFactor);

@ -47,7 +47,7 @@ using namespace cv::cuda;
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
Ptr<FarnebackOpticalFlow> cv::cuda::FarnebackOpticalFlow::create(int, double, bool, int, int, int, double, int) { throw_no_cuda(); return Ptr<FarnebackOpticalFlow>(); }
Ptr<cv::cuda::FarnebackOpticalFlow> cv::cuda::FarnebackOpticalFlow::create(int, double, bool, int, int, int, double, int) { throw_no_cuda(); return Ptr<FarnebackOpticalFlow>(); }
#else

@ -52,7 +52,9 @@
#include "opencv2/video.hpp"
#include "opencv2/core/private.cuda.hpp"
#if defined HAVE_CUDA
#include "opencv2/core/cuda/vec_traits.hpp"
#endif
#include "opencv2/opencv_modules.hpp"
#ifdef HAVE_OPENCV_CUDALEGACY

@ -148,8 +148,8 @@ namespace grid_minmaxloc_detail
block = dim3(Policy::block_size_x, Policy::block_size_y);
grid = dim3(divUp(cols, block.x * Policy::patch_size_x), divUp(rows, block.y * Policy::patch_size_y));
grid.x = ::min(grid.x, block.x);
grid.y = ::min(grid.y, block.y);
grid.x = std::min(grid.x, block.x);
grid.y = std::min(grid.y, block.y);
}
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>

@ -75,7 +75,7 @@ void SegEngineGPU::cvtImgSpace(Ptr<UChar4Image> inimg, Ptr<Float4Image> outimg)
dim3 blockSize(HFS_BLOCK_DIM, HFS_BLOCK_DIM);
dim3 gridSize = getGridSize(img_size, blockSize);
cvtImgSpaceDevice << <gridSize, blockSize >> >(inimg_ptr, img_size, outimg_ptr);
cvtImgSpaceDevice <<<gridSize, blockSize >>>(inimg_ptr, img_size, outimg_ptr);
}
void SegEngineGPU::initClusterCenters()
@ -85,7 +85,7 @@ void SegEngineGPU::initClusterCenters()
dim3 blockSize(HFS_BLOCK_DIM, HFS_BLOCK_DIM);
dim3 gridSize = getGridSize(map_size, blockSize);
initClusterCentersDevice << <gridSize, blockSize >> >
initClusterCentersDevice <<<gridSize, blockSize >>>
(img_ptr, map_size, img_size, spixel_size, spixel_list);
}
@ -98,7 +98,7 @@ void SegEngineGPU::findCenterAssociation()
dim3 blockSize(HFS_BLOCK_DIM, HFS_BLOCK_DIM);
dim3 gridSize = getGridSize(img_size, blockSize);
findCenterAssociationDevice << <gridSize, blockSize >> >
findCenterAssociationDevice <<<gridSize, blockSize >>>
(img_ptr, spixel_list, map_size, img_size,
spixel_size, slic_settings.coh_weight,
max_xy_dist, max_color_dist, idx_ptr);
@ -116,13 +116,13 @@ void SegEngineGPU::updateClusterCenter()
dim3 blockSize(HFS_BLOCK_DIM, HFS_BLOCK_DIM);
dim3 gridSize(map_size.x, map_size.y, no_grid_per_center);
updateClusterCenterDevice << <gridSize, blockSize >> >
updateClusterCenterDevice <<<gridSize, blockSize >>>
(img_ptr, idx_ptr, map_size, img_size,
spixel_size, no_blocks_per_line, accum_map_ptr);
dim3 gridSize2(map_size.x, map_size.y);
finalizeReductionResultDevice << <gridSize2, blockSize >> >
finalizeReductionResultDevice <<<gridSize2, blockSize >>>
(accum_map_ptr, map_size, no_grid_per_center, spixel_list_ptr);
}
@ -134,13 +134,13 @@ void SegEngineGPU::enforceConnectivity()
dim3 blockSize(HFS_BLOCK_DIM, HFS_BLOCK_DIM);
dim3 gridSize = getGridSize(img_size, blockSize);
enforceConnectivityDevice << <gridSize, blockSize >> >
enforceConnectivityDevice <<<gridSize, blockSize >>>
(idx_ptr, img_size, tmp_idx_ptr);
enforceConnectivityDevice << <gridSize, blockSize >> >
enforceConnectivityDevice <<<gridSize, blockSize >>>
(tmp_idx_ptr, img_size, idx_ptr);
enforceConnectivityDevice1_2 << <gridSize, blockSize >> >
enforceConnectivityDevice1_2 <<<gridSize, blockSize >>>
(idx_ptr, img_size, tmp_idx_ptr);
enforceConnectivityDevice1_2 << <gridSize, blockSize >> >
enforceConnectivityDevice1_2 <<<gridSize, blockSize >>>
(tmp_idx_ptr, img_size, idx_ptr);
}

@ -194,7 +194,7 @@ void Magnitude::derrivativeXYGpu()
dim3 gridSize((int)ceil((float)img_size.x / (float)blockSize.x),
(int)ceil((float)img_size.y / (float)blockSize.y));
derrivativeXYDevice << <gridSize, blockSize >> >
derrivativeXYDevice <<<gridSize, blockSize >>>
(gray_ptr, dx_ptr, dy_ptr, mag_ptr, img_size);
}
@ -209,7 +209,7 @@ void Magnitude::nonMaxSuppGpu()
dim3 gridSize((int)ceil((float)img_size.x / (float)blockSize.x),
(int)ceil((float)img_size.y / (float)blockSize.y));
nonMaxSuppDevice << <gridSize, blockSize >> >
nonMaxSuppDevice <<<gridSize, blockSize >>>
(nms_ptr, dx_ptr, dy_ptr, mag_ptr, img_size);
}

Loading…
Cancel
Save