diff --git a/modules/photo/include/opencv2/photo/cuda.hpp b/modules/photo/include/opencv2/photo/cuda.hpp index 4b69afa7be..474e6f8317 100644 --- a/modules/photo/include/opencv2/photo/cuda.hpp +++ b/modules/photo/include/opencv2/photo/cuda.hpp @@ -64,64 +64,66 @@ BORDER_REPLICATE , BORDER_CONSTANT , BORDER_REFLECT and BORDER_WRAP are supporte @sa fastNlMeansDenoising */ -CV_EXPORTS void nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, int borderMode = BORDER_DEFAULT, Stream& s = Stream::Null()); +CV_EXPORTS void nonLocalMeans(InputArray src, OutputArray dst, + float h, + int search_window = 21, + int block_size = 7, + int borderMode = BORDER_DEFAULT, + Stream& stream = Stream::Null()); + +/** @brief Perform image denoising using Non-local Means Denoising algorithm + with several computational +optimizations. Noise expected to be a gaussian white noise + +@param src Input 8-bit 1-channel, 2-channel or 3-channel image. +@param dst Output image with the same size and type as src . +@param h Parameter regulating filter strength. Big h value perfectly removes noise but also +removes image details, smaller h value preserves details but also preserves some noise +@param search_window Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater search_window - greater +denoising time. Recommended value 21 pixels +@param block_size Size in pixels of the template patch that is used to compute weights. Should be +odd. Recommended value 7 pixels +@param s Stream for the asynchronous invocations. + +This function expected to be applied to grayscale images. For colored images look at +FastNonLocalMeansDenoising::labMethod. -/** @brief The class implements fast approximate Non Local Means Denoising algorithm. +@sa + fastNlMeansDenoising + */ +CV_EXPORTS void fastNlMeansDenoising(InputArray src, OutputArray dst, + float h, + int search_window = 21, + int block_size = 7, + Stream& stream = Stream::Null()); + +/** @brief Modification of fastNlMeansDenoising function for colored images + +@param src Input 8-bit 3-channel image. +@param dst Output image with the same size and type as src . +@param h_luminance Parameter regulating filter strength. Big h value perfectly removes noise but +also removes image details, smaller h value preserves details but also preserves some noise +@param photo_render float The same as h but for color components. For most images value equals 10 will be +enought to remove colored noise and do not distort colors +@param search_window Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater search_window - greater +denoising time. Recommended value 21 pixels +@param block_size Size in pixels of the template patch that is used to compute weights. Should be +odd. Recommended value 7 pixels +@param s Stream for the asynchronous invocations. + +The function converts image to CIELAB colorspace and then separately denoise L and AB components +with given h parameters using FastNonLocalMeansDenoising::simpleMethod function. + +@sa + fastNlMeansDenoisingColored */ -class CV_EXPORTS FastNonLocalMeansDenoising -{ -public: - /** @brief Perform image denoising using Non-local Means Denoising algorithm - with several computational - optimizations. Noise expected to be a gaussian white noise - - @param src Input 8-bit 1-channel, 2-channel or 3-channel image. - @param dst Output image with the same size and type as src . - @param h Parameter regulating filter strength. Big h value perfectly removes noise but also - removes image details, smaller h value preserves details but also preserves some noise - @param search_window Size in pixels of the window that is used to compute weighted average for - given pixel. Should be odd. Affect performance linearly: greater search_window - greater - denoising time. Recommended value 21 pixels - @param block_size Size in pixels of the template patch that is used to compute weights. Should be - odd. Recommended value 7 pixels - @param s Stream for the asynchronous invocations. - - This function expected to be applied to grayscale images. For colored images look at - FastNonLocalMeansDenoising::labMethod. - - @sa - fastNlMeansDenoising - */ - void simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, Stream& s = Stream::Null()); - - /** @brief Modification of FastNonLocalMeansDenoising::simpleMethod for color images - - @param src Input 8-bit 3-channel image. - @param dst Output image with the same size and type as src . - @param h_luminance Parameter regulating filter strength. Big h value perfectly removes noise but - also removes image details, smaller h value preserves details but also preserves some noise - @param photo_render float The same as h but for color components. For most images value equals 10 will be - enought to remove colored noise and do not distort colors - @param search_window Size in pixels of the window that is used to compute weighted average for - given pixel. Should be odd. Affect performance linearly: greater search_window - greater - denoising time. Recommended value 21 pixels - @param block_size Size in pixels of the template patch that is used to compute weights. Should be - odd. Recommended value 7 pixels - @param s Stream for the asynchronous invocations. - - The function converts image to CIELAB colorspace and then separately denoise L and AB components - with given h parameters using FastNonLocalMeansDenoising::simpleMethod function. - - @sa - fastNlMeansDenoisingColored - */ - void labMethod(const GpuMat& src, GpuMat& dst, float h_luminance, float photo_render, int search_window = 21, int block_size = 7, Stream& s = Stream::Null()); - -private: - - GpuMat buffer, extended_src_buffer; - GpuMat lab, l, ab; -}; +CV_EXPORTS void fastNlMeansDenoisingColored(InputArray src, OutputArray dst, + float h_luminance, float photo_render, + int search_window = 21, + int block_size = 7, + Stream& stream = Stream::Null()); //! @} photo diff --git a/modules/photo/perf/perf_cuda.cpp b/modules/photo/perf/perf_cuda.cpp index 318ec17dfd..4496599d1b 100644 --- a/modules/photo/perf/perf_cuda.cpp +++ b/modules/photo/perf/perf_cuda.cpp @@ -126,12 +126,10 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, CUDA_FastNonLocalMeans, if (PERF_RUN_CUDA()) { - cv::cuda::FastNonLocalMeansDenoising fnlmd; - const cv::cuda::GpuMat d_src(src); cv::cuda::GpuMat dst; - TEST_CYCLE() fnlmd.simpleMethod(d_src, dst, h, search_widow_size, block_size); + TEST_CYCLE() cv::cuda::fastNlMeansDenoising(d_src, dst, h, search_widow_size, block_size); CUDA_SANITY_CHECK(dst); } @@ -171,12 +169,10 @@ PERF_TEST_P(Sz_Depth_WinSz_BlockSz, CUDA_FastNonLocalMeansColored, if (PERF_RUN_CUDA()) { - cv::cuda::FastNonLocalMeansDenoising fnlmd; - const cv::cuda::GpuMat d_src(src); cv::cuda::GpuMat dst; - TEST_CYCLE() fnlmd.labMethod(d_src, dst, h, h, search_widow_size, block_size); + TEST_CYCLE() cv::cuda::fastNlMeansDenoisingColored(d_src, dst, h, h, search_widow_size, block_size); CUDA_SANITY_CHECK(dst); } diff --git a/modules/photo/src/denoising.cuda.cpp b/modules/photo/src/denoising.cuda.cpp index 76b870fe58..7ea37f6951 100644 --- a/modules/photo/src/denoising.cuda.cpp +++ b/modules/photo/src/denoising.cuda.cpp @@ -60,9 +60,9 @@ using namespace cv::cuda; #if !defined (HAVE_CUDA) || !defined(HAVE_OPENCV_CUDAARITHM) || !defined(HAVE_OPENCV_CUDAIMGPROC) -void cv::cuda::nonLocalMeans(const GpuMat&, GpuMat&, float, int, int, int, Stream&) { throw_no_cuda(); } -void cv::cuda::FastNonLocalMeansDenoising::simpleMethod(const GpuMat&, GpuMat&, float, int, int, Stream&) { throw_no_cuda(); } -void cv::cuda::FastNonLocalMeansDenoising::labMethod( const GpuMat&, GpuMat&, float, float, int, int, Stream&) { throw_no_cuda(); } +void cv::cuda::nonLocalMeans(InputArray, OutputArray, float, int, int, int, Stream&) { throw_no_cuda(); } +void cv::cuda::fastNlMeansDenoising(InputArray, OutputArray, float, int, int, Stream&) { throw_no_cuda(); } +void cv::cuda::fastNlMeansDenoisingColored(InputArray, OutputArray, float, float, int, int, Stream&) { throw_no_cuda(); } #else @@ -78,13 +78,15 @@ namespace cv { namespace cuda { namespace device } }}} -void cv::cuda::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, int borderMode, Stream& s) +void cv::cuda::nonLocalMeans(InputArray _src, OutputArray _dst, float h, int search_window, int block_window, int borderMode, Stream& stream) { using cv::cuda::device::imgproc::nlm_bruteforce_gpu; typedef void (*func_t)(const PtrStepSzb& src, PtrStepSzb dst, int search_radius, int block_radius, float h, int borderMode, cudaStream_t stream); static const func_t funcs[4] = { nlm_bruteforce_gpu, nlm_bruteforce_gpu, nlm_bruteforce_gpu, 0/*nlm_bruteforce_gpu,*/ }; + const GpuMat src = _src.getGpuMat(); + CV_Assert(src.type() == CV_8U || src.type() == CV_8UC2 || src.type() == CV_8UC3); const func_t func = funcs[src.channels() - 1]; @@ -93,8 +95,10 @@ void cv::cuda::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search int b = borderMode; CV_Assert(b == BORDER_REFLECT101 || b == BORDER_REPLICATE || b == BORDER_CONSTANT || b == BORDER_REFLECT || b == BORDER_WRAP); - dst.create(src.size(), src.type()); - func(src, dst, search_window/2, block_window/2, h, borderMode, StreamAccessor::getStream(s)); + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); + + func(src, dst, search_window/2, block_window/2, h, borderMode, StreamAccessor::getStream(stream)); } namespace cv { namespace cuda { namespace device @@ -112,47 +116,55 @@ namespace cv { namespace cuda { namespace device } }}} -void cv::cuda::FastNonLocalMeansDenoising::simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, Stream& s) +void cv::cuda::fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, int search_window, int block_window, Stream& stream) { + const GpuMat src = _src.getGpuMat(); + CV_Assert(src.depth() == CV_8U && src.channels() < 4); int border_size = search_window/2 + block_window/2; Size esize = src.size() + Size(border_size, border_size) * 2; - cv::cuda::ensureSizeIsEnough(esize, CV_8UC3, extended_src_buffer); - GpuMat extended_src(esize, src.type(), extended_src_buffer.ptr(), extended_src_buffer.step); + BufferPool pool(stream); - cv::cuda::copyMakeBorder(src, extended_src, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), s); + GpuMat extended_src = pool.getBuffer(esize, src.type()); + cv::cuda::copyMakeBorder(src, extended_src, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream); GpuMat src_hdr = extended_src(Rect(Point2i(border_size, border_size), src.size())); int bcols, brows; device::imgproc::nln_fast_get_buffer_size(src_hdr, search_window, block_window, bcols, brows); - buffer.create(brows, bcols, CV_32S); + GpuMat buffer = pool.getBuffer(brows, bcols, CV_32S); using namespace cv::cuda::device::imgproc; typedef void (*nlm_fast_t)(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t); static const nlm_fast_t funcs[] = { nlm_fast_gpu, nlm_fast_gpu, nlm_fast_gpu, 0}; - dst.create(src.size(), src.type()); - funcs[src.channels()-1](src_hdr, dst, buffer, search_window, block_window, h, StreamAccessor::getStream(s)); + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); + + funcs[src.channels()-1](src_hdr, dst, buffer, search_window, block_window, h, StreamAccessor::getStream(stream)); } -void cv::cuda::FastNonLocalMeansDenoising::labMethod( const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window, int block_window, Stream& s) +void cv::cuda::fastNlMeansDenoisingColored(InputArray _src, OutputArray _dst, float h_luminance, float h_color, int search_window, int block_window, Stream& stream) { + const GpuMat src = _src.getGpuMat(); + CV_Assert(src.type() == CV_8UC3); - lab.create(src.size(), src.type()); - cv::cuda::cvtColor(src, lab, cv::COLOR_BGR2Lab, 0, s); + BufferPool pool(stream); + + GpuMat lab = pool.getBuffer(src.size(), src.type()); + cv::cuda::cvtColor(src, lab, cv::COLOR_BGR2Lab, 0, stream); - l.create(src.size(), CV_8U); - ab.create(src.size(), CV_8UC2); - device::imgproc::fnlm_split_channels(lab, l, ab, StreamAccessor::getStream(s)); + GpuMat l = pool.getBuffer(src.size(), CV_8U); + GpuMat ab = pool.getBuffer(src.size(), CV_8UC2); + device::imgproc::fnlm_split_channels(lab, l, ab, StreamAccessor::getStream(stream)); - simpleMethod(l, l, h_luminance, search_window, block_window, s); - simpleMethod(ab, ab, h_color, search_window, block_window, s); + fastNlMeansDenoising(l, l, h_luminance, search_window, block_window, stream); + fastNlMeansDenoising(ab, ab, h_color, search_window, block_window, stream); - device::imgproc::fnlm_merge_channels(l, ab, lab, StreamAccessor::getStream(s)); - cv::cuda::cvtColor(lab, dst, cv::COLOR_Lab2BGR, 0, s); + device::imgproc::fnlm_merge_channels(l, ab, lab, StreamAccessor::getStream(stream)); + cv::cuda::cvtColor(lab, _dst, cv::COLOR_Lab2BGR, 0, stream); } #endif diff --git a/modules/photo/test/test_denoising.cuda.cpp b/modules/photo/test/test_denoising.cuda.cpp index dce20b9f51..209bac3328 100644 --- a/modules/photo/test/test_denoising.cuda.cpp +++ b/modules/photo/test/test_denoising.cuda.cpp @@ -99,10 +99,9 @@ TEST(CUDA_FastNonLocalMeans, Regression) cv::cvtColor(bgr, gray, cv::COLOR_BGR2GRAY); GpuMat dbgr, dgray; - cv::cuda::FastNonLocalMeansDenoising fnlmd; - fnlmd.simpleMethod(GpuMat(gray), dgray, 20); - fnlmd.labMethod(GpuMat(bgr), dbgr, 20, 10); + cv::cuda::fastNlMeansDenoising(GpuMat(gray), dgray, 20); + cv::cuda::fastNlMeansDenoisingColored(GpuMat(bgr), dbgr, 20, 10); #if 0 dumpImage("../gpu/denoising/fnlm_denoised_lena_bgr.png", cv::Mat(dbgr));