From c56bdbc1c5768dfb76eccb531268b20f5ebe262f Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 9 Apr 2013 14:18:18 +0400 Subject: [PATCH] moved integral to gpuarithm module --- modules/gpu/include/opencv2/gpu.hpp | 12 -- modules/gpu/perf/perf_imgproc.cpp | 57 --------- modules/gpu/src/imgproc.cpp | 108 ---------------- modules/gpu/test/test_imgproc.cpp | 37 ------ modules/gpuarithm/CMakeLists.txt | 2 +- .../gpuarithm/include/opencv2/gpuarithm.hpp | 12 ++ modules/gpuarithm/perf/perf_core.cpp | 61 ++++++++- modules/gpuarithm/src/arithm.cpp | 118 ++++++++++++++++++ .../src/cuda/integral.cu} | 0 modules/gpuarithm/src/precomp.hpp | 7 ++ modules/gpuarithm/test/test_core.cpp | 41 +++++- 11 files changed, 236 insertions(+), 219 deletions(-) rename modules/{gpu/src/cuda/integral_image.cu => gpuarithm/src/cuda/integral.cu} (100%) diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp index 642c327a35..ead3ab3332 100644 --- a/modules/gpu/include/opencv2/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu.hpp @@ -168,18 +168,6 @@ CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null()); -//! computes the integral image -//! sum will have CV_32S type, but will contain unsigned int values -//! supports only CV_8UC1 source type -CV_EXPORTS void integral(const GpuMat& src, GpuMat& sum, Stream& stream = Stream::Null()); -//! buffered version -CV_EXPORTS void integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& stream = Stream::Null()); - -//! computes squared integral image -//! result matrix will have 64F type, but will contain 64U values -//! supports source images of 8UC1 type only -CV_EXPORTS void sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& stream = Stream::Null()); - //! computes Harris cornerness criteria at each image pixel CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101); CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101); diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp index 02a1e6cd46..d26bb844bd 100644 --- a/modules/gpu/perf/perf_imgproc.cpp +++ b/modules/gpu/perf/perf_imgproc.cpp @@ -363,63 +363,6 @@ PERF_TEST_P(Sz_Depth_Op, ImgProc_Threshold, } } -////////////////////////////////////////////////////////////////////// -// Integral - -PERF_TEST_P(Sz, ImgProc_Integral, - GPU_TYPICAL_MAT_SIZES) -{ - const cv::Size size = GetParam(); - - cv::Mat src(size, CV_8UC1); - declare.in(src, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - cv::gpu::GpuMat d_buf; - - TEST_CYCLE() cv::gpu::integralBuffered(d_src, dst, d_buf); - - GPU_SANITY_CHECK(dst); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::integral(src, dst); - - CPU_SANITY_CHECK(dst); - } -} - -////////////////////////////////////////////////////////////////////// -// IntegralSqr - -PERF_TEST_P(Sz, ImgProc_IntegralSqr, - GPU_TYPICAL_MAT_SIZES) -{ - const cv::Size size = GetParam(); - - cv::Mat src(size, CV_8UC1); - declare.in(src, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::sqrIntegral(d_src, dst); - - GPU_SANITY_CHECK(dst); - } - else - { - FAIL_NO_CPU(); - } -} - ////////////////////////////////////////////////////////////////////// // HistEvenC1 diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp index 54a6937006..3a967fb35a 100644 --- a/modules/gpu/src/imgproc.cpp +++ b/modules/gpu/src/imgproc.cpp @@ -55,9 +55,6 @@ void cv::gpu::buildWarpPlaneMaps(Size, Rect, const Mat&, const Mat&, const Mat&, void cv::gpu::buildWarpCylindricalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } void cv::gpu::buildWarpSphericalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int, Stream&) { throw_no_cuda(); } -void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } -void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } -void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } void cv::gpu::evenLevels(GpuMat&, int, int, int) { throw_no_cuda(); } void cv::gpu::histEven(const GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); } void cv::gpu::histEven(const GpuMat&, GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); } @@ -412,111 +409,6 @@ void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, d funcs[src.depth()][src.channels() - 1](src, dst, dsize, angle, xShift, yShift, interpolation, StreamAccessor::getStream(stream)); } -//////////////////////////////////////////////////////////////////////// -// integral - -void cv::gpu::integral(const GpuMat& src, GpuMat& sum, Stream& s) -{ - GpuMat buffer; - integralBuffered(src, sum, buffer, s); -} - -namespace cv { namespace gpu { namespace cudev -{ - namespace imgproc - { - void shfl_integral_gpu(const PtrStepSzb& img, PtrStepSz integral, cudaStream_t stream); - } -}}} - -void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& s) -{ - CV_Assert(src.type() == CV_8UC1); - - cudaStream_t stream = StreamAccessor::getStream(s); - - cv::Size whole; - cv::Point offset; - - src.locateROI(whole, offset); - - if (deviceSupports(WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048 - && offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (static_cast(src.step) - offset.x)) - { - ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer); - - cv::gpu::cudev::imgproc::shfl_integral_gpu(src, buffer, stream); - - sum.create(src.rows + 1, src.cols + 1, CV_32SC1); - if (s) - s.enqueueMemSet(sum, Scalar::all(0)); - else - sum.setTo(Scalar::all(0)); - - GpuMat inner = sum(Rect(1, 1, src.cols, src.rows)); - GpuMat res = buffer(Rect(0, 0, src.cols, src.rows)); - - if (s) - s.enqueueCopy(res, inner); - else - res.copyTo(inner); - } - else - { - sum.create(src.rows + 1, src.cols + 1, CV_32SC1); - - NcvSize32u roiSize; - roiSize.width = src.cols; - roiSize.height = src.rows; - - cudaDeviceProp prop; - cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) ); - - Ncv32u bufSize; - ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) ); - ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer); - - - NppStStreamHandler h(stream); - - ncvSafeCall( nppiStIntegral_8u32u_C1R(const_cast(src.ptr()), static_cast(src.step), - sum.ptr(), static_cast(sum.step), roiSize, buffer.ptr(), bufSize, prop) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } -} - -////////////////////////////////////////////////////////////////////////////// -// sqrIntegral - -void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s) -{ - CV_Assert(src.type() == CV_8U); - - NcvSize32u roiSize; - roiSize.width = src.cols; - roiSize.height = src.rows; - - cudaDeviceProp prop; - cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) ); - - Ncv32u bufSize; - ncvSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop)); - GpuMat buf(1, bufSize, CV_8U); - - cudaStream_t stream = StreamAccessor::getStream(s); - - NppStStreamHandler h(stream); - - sqsum.create(src.rows + 1, src.cols + 1, CV_64F); - ncvSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast(src.ptr(0)), static_cast(src.step), - sqsum.ptr(0), static_cast(sqsum.step), roiSize, buf.ptr(0), bufSize, prop)); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); -} - //////////////////////////////////////////////////////////////////////// // Histogram diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp index a38f27b740..ffc413ee0a 100644 --- a/modules/gpu/test/test_imgproc.cpp +++ b/modules/gpu/test/test_imgproc.cpp @@ -46,43 +46,6 @@ using namespace cvtest; -/////////////////////////////////////////////////////////////////////////////////////////////////////// -// Integral - -PARAM_TEST_CASE(Integral, cv::gpu::DeviceInfo, cv::Size, UseRoi) -{ - cv::gpu::DeviceInfo devInfo; - cv::Size size; - bool useRoi; - - virtual void SetUp() - { - devInfo = GET_PARAM(0); - size = GET_PARAM(1); - useRoi = GET_PARAM(2); - - cv::gpu::setDevice(devInfo.deviceID()); - } -}; - -GPU_TEST_P(Integral, Accuracy) -{ - cv::Mat src = randomMat(size, CV_8UC1); - - cv::gpu::GpuMat dst = createMat(cv::Size(src.cols + 1, src.rows + 1), CV_32SC1, useRoi); - cv::gpu::integral(loadMat(src, useRoi), dst); - - cv::Mat dst_gold; - cv::integral(src, dst_gold, CV_32S); - - EXPECT_MAT_NEAR(dst_gold, dst, 0.0); -} - -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Integral, testing::Combine( - ALL_DEVICES, - DIFFERENT_SIZES, - WHOLE_SUBMAT)); - /////////////////////////////////////////////////////////////////////////////////////////////////////// // HistEven diff --git a/modules/gpuarithm/CMakeLists.txt b/modules/gpuarithm/CMakeLists.txt index 04a6b2cc20..75cab4b31c 100644 --- a/modules/gpuarithm/CMakeLists.txt +++ b/modules/gpuarithm/CMakeLists.txt @@ -6,7 +6,7 @@ set(the_description "GPU-accelerated Operations on Matrices") ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations) -ocv_define_module(gpuarithm opencv_core OPTIONAL opencv_imgproc) +ocv_define_module(gpuarithm opencv_core OPTIONAL opencv_gpunvidia opencv_imgproc) if(HAVE_CUBLAS) CUDA_ADD_CUBLAS_TO_TARGET(${the_module}) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 03458ea045..8829e43a73 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -283,6 +283,18 @@ CV_EXPORTS void rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, co CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value = Scalar(), Stream& stream = Stream::Null()); +//! computes the integral image +//! sum will have CV_32S type, but will contain unsigned int values +//! supports only CV_8UC1 source type +CV_EXPORTS void integral(const GpuMat& src, GpuMat& sum, Stream& stream = Stream::Null()); +//! buffered version +CV_EXPORTS void integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& stream = Stream::Null()); + +//! computes squared integral image +//! result matrix will have 64F type, but will contain 64U values +//! supports source images of 8UC1 type only +CV_EXPORTS void sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& stream = Stream::Null()); + }} // namespace cv { namespace gpu { #endif /* __OPENCV_GPUARITHM_HPP__ */ diff --git a/modules/gpuarithm/perf/perf_core.cpp b/modules/gpuarithm/perf/perf_core.cpp index a9a6e360b5..603d2448e8 100644 --- a/modules/gpuarithm/perf/perf_core.cpp +++ b/modules/gpuarithm/perf/perf_core.cpp @@ -2156,11 +2156,11 @@ PERF_TEST_P(Sz_Depth_NormType, Core_Normalize, } } +#ifdef HAVE_OPENCV_IMGPROC + ////////////////////////////////////////////////////////////////////// // CopyMakeBorder -#ifdef HAVE_OPENCV_IMGPROC - DEF_PARAM_TEST(Sz_Depth_Cn_Border, cv::Size, MatDepth, MatCn, BorderMode); PERF_TEST_P(Sz_Depth_Cn_Border, ImgProc_CopyMakeBorder, @@ -2198,4 +2198,61 @@ PERF_TEST_P(Sz_Depth_Cn_Border, ImgProc_CopyMakeBorder, } } +////////////////////////////////////////////////////////////////////// +// Integral + +PERF_TEST_P(Sz, ImgProc_Integral, + GPU_TYPICAL_MAT_SIZES) +{ + const cv::Size size = GetParam(); + + cv::Mat src(size, CV_8UC1); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_GPU()) + { + const cv::gpu::GpuMat d_src(src); + cv::gpu::GpuMat dst; + cv::gpu::GpuMat d_buf; + + TEST_CYCLE() cv::gpu::integralBuffered(d_src, dst, d_buf); + + GPU_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::integral(src, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// IntegralSqr + +PERF_TEST_P(Sz, ImgProc_IntegralSqr, + GPU_TYPICAL_MAT_SIZES) +{ + const cv::Size size = GetParam(); + + cv::Mat src(size, CV_8UC1); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_GPU()) + { + const cv::gpu::GpuMat d_src(src); + cv::gpu::GpuMat dst; + + TEST_CYCLE() cv::gpu::sqrIntegral(d_src, dst); + + GPU_SANITY_CHECK(dst); + } + else + { + FAIL_NO_CPU(); + } +} + #endif diff --git a/modules/gpuarithm/src/arithm.cpp b/modules/gpuarithm/src/arithm.cpp index 40242876d4..baf598d969 100644 --- a/modules/gpuarithm/src/arithm.cpp +++ b/modules/gpuarithm/src/arithm.cpp @@ -61,6 +61,9 @@ void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, void cv::gpu::normalize(const GpuMat&, GpuMat&, double, double, int, int, const GpuMat&) { throw_no_cuda(); } void cv::gpu::normalize(const GpuMat&, GpuMat&, double, double, int, int, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); } void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, int, const Scalar&, Stream&) { throw_no_cuda(); } +void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } #else /* !defined (HAVE_CUDA) */ @@ -718,4 +721,119 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom } } +//////////////////////////////////////////////////////////////////////// +// integral + +void cv::gpu::integral(const GpuMat& src, GpuMat& sum, Stream& s) +{ + GpuMat buffer; + integralBuffered(src, sum, buffer, s); +} + +namespace cv { namespace gpu { namespace cudev +{ + namespace imgproc + { + void shfl_integral_gpu(const PtrStepSzb& img, PtrStepSz integral, cudaStream_t stream); + } +}}} + +void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& s) +{ + CV_Assert(src.type() == CV_8UC1); + + cudaStream_t stream = StreamAccessor::getStream(s); + + cv::Size whole; + cv::Point offset; + + src.locateROI(whole, offset); + + if (deviceSupports(WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048 + && offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (static_cast(src.step) - offset.x)) + { + ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer); + + cv::gpu::cudev::imgproc::shfl_integral_gpu(src, buffer, stream); + + sum.create(src.rows + 1, src.cols + 1, CV_32SC1); + if (s) + s.enqueueMemSet(sum, Scalar::all(0)); + else + sum.setTo(Scalar::all(0)); + + GpuMat inner = sum(Rect(1, 1, src.cols, src.rows)); + GpuMat res = buffer(Rect(0, 0, src.cols, src.rows)); + + if (s) + s.enqueueCopy(res, inner); + else + res.copyTo(inner); + } + else + { +#ifndef HAVE_OPENCV_GPUNVIDIA + throw_no_cuda(); +#else + sum.create(src.rows + 1, src.cols + 1, CV_32SC1); + + NcvSize32u roiSize; + roiSize.width = src.cols; + roiSize.height = src.rows; + + cudaDeviceProp prop; + cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) ); + + Ncv32u bufSize; + ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) ); + ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer); + + NppStStreamHandler h(stream); + + ncvSafeCall( nppiStIntegral_8u32u_C1R(const_cast(src.ptr()), static_cast(src.step), + sum.ptr(), static_cast(sum.step), roiSize, buffer.ptr(), bufSize, prop) ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); +#endif + } +} + +////////////////////////////////////////////////////////////////////////////// +// sqrIntegral + +void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s) +{ +#ifndef HAVE_OPENCV_GPUNVIDIA + (void) src; + (void) sqsum; + (void) s; + throw_no_cuda(); +#else + CV_Assert(src.type() == CV_8U); + + NcvSize32u roiSize; + roiSize.width = src.cols; + roiSize.height = src.rows; + + cudaDeviceProp prop; + cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) ); + + Ncv32u bufSize; + ncvSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop)); + GpuMat buf(1, bufSize, CV_8U); + + cudaStream_t stream = StreamAccessor::getStream(s); + + NppStStreamHandler h(stream); + + sqsum.create(src.rows + 1, src.cols + 1, CV_64F); + ncvSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast(src.ptr(0)), static_cast(src.step), + sqsum.ptr(0), static_cast(sqsum.step), roiSize, buf.ptr(0), bufSize, prop)); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); +#endif +} + #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/src/cuda/integral_image.cu b/modules/gpuarithm/src/cuda/integral.cu similarity index 100% rename from modules/gpu/src/cuda/integral_image.cu rename to modules/gpuarithm/src/cuda/integral.cu diff --git a/modules/gpuarithm/src/precomp.hpp b/modules/gpuarithm/src/precomp.hpp index 7d36adb465..6e21684aa4 100644 --- a/modules/gpuarithm/src/precomp.hpp +++ b/modules/gpuarithm/src/precomp.hpp @@ -51,6 +51,13 @@ #include "opencv2/core/gpu_private.hpp" +#include "opencv2/opencv_modules.hpp" + +#ifdef HAVE_OPENCV_GPUNVIDIA +# include "opencv2/gpunvidia.hpp" +# include "opencv2/gpunvidia/private.hpp" +#endif + #ifdef HAVE_CUBLAS #include #endif diff --git a/modules/gpuarithm/test/test_core.cpp b/modules/gpuarithm/test/test_core.cpp index 613d7122ac..36c155480f 100644 --- a/modules/gpuarithm/test/test_core.cpp +++ b/modules/gpuarithm/test/test_core.cpp @@ -3607,11 +3607,11 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Normalize, testing::Combine( testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2), NormCode(cv::NORM_INF), NormCode(cv::NORM_MINMAX)), WHOLE_SUBMAT)); +#ifdef HAVE_OPENCV_IMGPROC + ////////////////////////////////////////////////////////////////////////////// // CopyMakeBorder -#ifdef HAVE_OPENCV_IMGPROC - namespace { IMPLEMENT_PARAM_CLASS(Border, int) @@ -3669,6 +3669,43 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CopyMakeBorder, testing::Combine( ALL_BORDER_TYPES, WHOLE_SUBMAT)); +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// Integral + +PARAM_TEST_CASE(Integral, cv::gpu::DeviceInfo, cv::Size, UseRoi) +{ + cv::gpu::DeviceInfo devInfo; + cv::Size size; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + useRoi = GET_PARAM(2); + + cv::gpu::setDevice(devInfo.deviceID()); + } +}; + +GPU_TEST_P(Integral, Accuracy) +{ + cv::Mat src = randomMat(size, CV_8UC1); + + cv::gpu::GpuMat dst = createMat(cv::Size(src.cols + 1, src.rows + 1), CV_32SC1, useRoi); + cv::gpu::integral(loadMat(src, useRoi), dst); + + cv::Mat dst_gold; + cv::integral(src, dst_gold, CV_32S); + + EXPECT_MAT_NEAR(dst_gold, dst, 0.0); +} + +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Integral, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + WHOLE_SUBMAT)); + #endif #endif // HAVE_CUDA