refactor CUDA part of photo module

pull/3605/head
Vladislav Vinogradov 10 years ago
parent df57d038b8
commit f48befc6f0
  1. 114
      modules/photo/include/opencv2/photo/cuda.hpp
  2. 8
      modules/photo/perf/perf_cuda.cpp
  3. 58
      modules/photo/src/denoising.cuda.cpp
  4. 5
      modules/photo/test/test_denoising.cuda.cpp

@ -64,64 +64,66 @@ BORDER_REPLICATE , BORDER_CONSTANT , BORDER_REFLECT and BORDER_WRAP are supporte
@sa
fastNlMeansDenoising
*/
CV_EXPORTS void nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, int borderMode = BORDER_DEFAULT, Stream& s = Stream::Null());
CV_EXPORTS void nonLocalMeans(InputArray src, OutputArray dst,
float h,
int search_window = 21,
int block_size = 7,
int borderMode = BORDER_DEFAULT,
Stream& stream = Stream::Null());
/** @brief Perform image denoising using Non-local Means Denoising algorithm
<http://www.ipol.im/pub/algo/bcm_non_local_means_denoising> with several computational
optimizations. Noise expected to be a gaussian white noise
@param src Input 8-bit 1-channel, 2-channel or 3-channel image.
@param dst Output image with the same size and type as src .
@param h Parameter regulating filter strength. Big h value perfectly removes noise but also
removes image details, smaller h value preserves details but also preserves some noise
@param search_window Size in pixels of the window that is used to compute weighted average for
given pixel. Should be odd. Affect performance linearly: greater search_window - greater
denoising time. Recommended value 21 pixels
@param block_size Size in pixels of the template patch that is used to compute weights. Should be
odd. Recommended value 7 pixels
@param s Stream for the asynchronous invocations.
This function expected to be applied to grayscale images. For colored images look at
FastNonLocalMeansDenoising::labMethod.
/** @brief The class implements fast approximate Non Local Means Denoising algorithm.
@sa
fastNlMeansDenoising
*/
CV_EXPORTS void fastNlMeansDenoising(InputArray src, OutputArray dst,
float h,
int search_window = 21,
int block_size = 7,
Stream& stream = Stream::Null());
/** @brief Modification of fastNlMeansDenoising function for colored images
@param src Input 8-bit 3-channel image.
@param dst Output image with the same size and type as src .
@param h_luminance Parameter regulating filter strength. Big h value perfectly removes noise but
also removes image details, smaller h value preserves details but also preserves some noise
@param photo_render float The same as h but for color components. For most images value equals 10 will be
enought to remove colored noise and do not distort colors
@param search_window Size in pixels of the window that is used to compute weighted average for
given pixel. Should be odd. Affect performance linearly: greater search_window - greater
denoising time. Recommended value 21 pixels
@param block_size Size in pixels of the template patch that is used to compute weights. Should be
odd. Recommended value 7 pixels
@param s Stream for the asynchronous invocations.
The function converts image to CIELAB colorspace and then separately denoise L and AB components
with given h parameters using FastNonLocalMeansDenoising::simpleMethod function.
@sa
fastNlMeansDenoisingColored
*/
class CV_EXPORTS FastNonLocalMeansDenoising
{
public:
/** @brief Perform image denoising using Non-local Means Denoising algorithm
<http://www.ipol.im/pub/algo/bcm_non_local_means_denoising> with several computational
optimizations. Noise expected to be a gaussian white noise
@param src Input 8-bit 1-channel, 2-channel or 3-channel image.
@param dst Output image with the same size and type as src .
@param h Parameter regulating filter strength. Big h value perfectly removes noise but also
removes image details, smaller h value preserves details but also preserves some noise
@param search_window Size in pixels of the window that is used to compute weighted average for
given pixel. Should be odd. Affect performance linearly: greater search_window - greater
denoising time. Recommended value 21 pixels
@param block_size Size in pixels of the template patch that is used to compute weights. Should be
odd. Recommended value 7 pixels
@param s Stream for the asynchronous invocations.
This function expected to be applied to grayscale images. For colored images look at
FastNonLocalMeansDenoising::labMethod.
@sa
fastNlMeansDenoising
*/
void simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, Stream& s = Stream::Null());
/** @brief Modification of FastNonLocalMeansDenoising::simpleMethod for color images
@param src Input 8-bit 3-channel image.
@param dst Output image with the same size and type as src .
@param h_luminance Parameter regulating filter strength. Big h value perfectly removes noise but
also removes image details, smaller h value preserves details but also preserves some noise
@param photo_render float The same as h but for color components. For most images value equals 10 will be
enought to remove colored noise and do not distort colors
@param search_window Size in pixels of the window that is used to compute weighted average for
given pixel. Should be odd. Affect performance linearly: greater search_window - greater
denoising time. Recommended value 21 pixels
@param block_size Size in pixels of the template patch that is used to compute weights. Should be
odd. Recommended value 7 pixels
@param s Stream for the asynchronous invocations.
The function converts image to CIELAB colorspace and then separately denoise L and AB components
with given h parameters using FastNonLocalMeansDenoising::simpleMethod function.
@sa
fastNlMeansDenoisingColored
*/
void labMethod(const GpuMat& src, GpuMat& dst, float h_luminance, float photo_render, int search_window = 21, int block_size = 7, Stream& s = Stream::Null());
private:
GpuMat buffer, extended_src_buffer;
GpuMat lab, l, ab;
};
CV_EXPORTS void fastNlMeansDenoisingColored(InputArray src, OutputArray dst,
float h_luminance, float photo_render,
int search_window = 21,
int block_size = 7,
Stream& stream = Stream::Null());
//! @} photo

@ -126,12 +126,10 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, CUDA_FastNonLocalMeans,
if (PERF_RUN_CUDA())
{
cv::cuda::FastNonLocalMeansDenoising fnlmd;
const cv::cuda::GpuMat d_src(src);
cv::cuda::GpuMat dst;
TEST_CYCLE() fnlmd.simpleMethod(d_src, dst, h, search_widow_size, block_size);
TEST_CYCLE() cv::cuda::fastNlMeansDenoising(d_src, dst, h, search_widow_size, block_size);
CUDA_SANITY_CHECK(dst);
}
@ -171,12 +169,10 @@ PERF_TEST_P(Sz_Depth_WinSz_BlockSz, CUDA_FastNonLocalMeansColored,
if (PERF_RUN_CUDA())
{
cv::cuda::FastNonLocalMeansDenoising fnlmd;
const cv::cuda::GpuMat d_src(src);
cv::cuda::GpuMat dst;
TEST_CYCLE() fnlmd.labMethod(d_src, dst, h, h, search_widow_size, block_size);
TEST_CYCLE() cv::cuda::fastNlMeansDenoisingColored(d_src, dst, h, h, search_widow_size, block_size);
CUDA_SANITY_CHECK(dst);
}

@ -60,9 +60,9 @@ using namespace cv::cuda;
#if !defined (HAVE_CUDA) || !defined(HAVE_OPENCV_CUDAARITHM) || !defined(HAVE_OPENCV_CUDAIMGPROC)
void cv::cuda::nonLocalMeans(const GpuMat&, GpuMat&, float, int, int, int, Stream&) { throw_no_cuda(); }
void cv::cuda::FastNonLocalMeansDenoising::simpleMethod(const GpuMat&, GpuMat&, float, int, int, Stream&) { throw_no_cuda(); }
void cv::cuda::FastNonLocalMeansDenoising::labMethod( const GpuMat&, GpuMat&, float, float, int, int, Stream&) { throw_no_cuda(); }
void cv::cuda::nonLocalMeans(InputArray, OutputArray, float, int, int, int, Stream&) { throw_no_cuda(); }
void cv::cuda::fastNlMeansDenoising(InputArray, OutputArray, float, int, int, Stream&) { throw_no_cuda(); }
void cv::cuda::fastNlMeansDenoisingColored(InputArray, OutputArray, float, float, int, int, Stream&) { throw_no_cuda(); }
#else
@ -78,13 +78,15 @@ namespace cv { namespace cuda { namespace device
}
}}}
void cv::cuda::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, int borderMode, Stream& s)
void cv::cuda::nonLocalMeans(InputArray _src, OutputArray _dst, float h, int search_window, int block_window, int borderMode, Stream& stream)
{
using cv::cuda::device::imgproc::nlm_bruteforce_gpu;
typedef void (*func_t)(const PtrStepSzb& src, PtrStepSzb dst, int search_radius, int block_radius, float h, int borderMode, cudaStream_t stream);
static const func_t funcs[4] = { nlm_bruteforce_gpu<uchar>, nlm_bruteforce_gpu<uchar2>, nlm_bruteforce_gpu<uchar3>, 0/*nlm_bruteforce_gpu<uchar4>,*/ };
const GpuMat src = _src.getGpuMat();
CV_Assert(src.type() == CV_8U || src.type() == CV_8UC2 || src.type() == CV_8UC3);
const func_t func = funcs[src.channels() - 1];
@ -93,8 +95,10 @@ void cv::cuda::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search
int b = borderMode;
CV_Assert(b == BORDER_REFLECT101 || b == BORDER_REPLICATE || b == BORDER_CONSTANT || b == BORDER_REFLECT || b == BORDER_WRAP);
dst.create(src.size(), src.type());
func(src, dst, search_window/2, block_window/2, h, borderMode, StreamAccessor::getStream(s));
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
func(src, dst, search_window/2, block_window/2, h, borderMode, StreamAccessor::getStream(stream));
}
namespace cv { namespace cuda { namespace device
@ -112,47 +116,55 @@ namespace cv { namespace cuda { namespace device
}
}}}
void cv::cuda::FastNonLocalMeansDenoising::simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, Stream& s)
void cv::cuda::fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, int search_window, int block_window, Stream& stream)
{
const GpuMat src = _src.getGpuMat();
CV_Assert(src.depth() == CV_8U && src.channels() < 4);
int border_size = search_window/2 + block_window/2;
Size esize = src.size() + Size(border_size, border_size) * 2;
cv::cuda::ensureSizeIsEnough(esize, CV_8UC3, extended_src_buffer);
GpuMat extended_src(esize, src.type(), extended_src_buffer.ptr(), extended_src_buffer.step);
BufferPool pool(stream);
cv::cuda::copyMakeBorder(src, extended_src, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), s);
GpuMat extended_src = pool.getBuffer(esize, src.type());
cv::cuda::copyMakeBorder(src, extended_src, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream);
GpuMat src_hdr = extended_src(Rect(Point2i(border_size, border_size), src.size()));
int bcols, brows;
device::imgproc::nln_fast_get_buffer_size(src_hdr, search_window, block_window, bcols, brows);
buffer.create(brows, bcols, CV_32S);
GpuMat buffer = pool.getBuffer(brows, bcols, CV_32S);
using namespace cv::cuda::device::imgproc;
typedef void (*nlm_fast_t)(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
static const nlm_fast_t funcs[] = { nlm_fast_gpu<uchar>, nlm_fast_gpu<uchar2>, nlm_fast_gpu<uchar3>, 0};
dst.create(src.size(), src.type());
funcs[src.channels()-1](src_hdr, dst, buffer, search_window, block_window, h, StreamAccessor::getStream(s));
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
funcs[src.channels()-1](src_hdr, dst, buffer, search_window, block_window, h, StreamAccessor::getStream(stream));
}
void cv::cuda::FastNonLocalMeansDenoising::labMethod( const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window, int block_window, Stream& s)
void cv::cuda::fastNlMeansDenoisingColored(InputArray _src, OutputArray _dst, float h_luminance, float h_color, int search_window, int block_window, Stream& stream)
{
const GpuMat src = _src.getGpuMat();
CV_Assert(src.type() == CV_8UC3);
lab.create(src.size(), src.type());
cv::cuda::cvtColor(src, lab, cv::COLOR_BGR2Lab, 0, s);
BufferPool pool(stream);
GpuMat lab = pool.getBuffer(src.size(), src.type());
cv::cuda::cvtColor(src, lab, cv::COLOR_BGR2Lab, 0, stream);
l.create(src.size(), CV_8U);
ab.create(src.size(), CV_8UC2);
device::imgproc::fnlm_split_channels(lab, l, ab, StreamAccessor::getStream(s));
GpuMat l = pool.getBuffer(src.size(), CV_8U);
GpuMat ab = pool.getBuffer(src.size(), CV_8UC2);
device::imgproc::fnlm_split_channels(lab, l, ab, StreamAccessor::getStream(stream));
simpleMethod(l, l, h_luminance, search_window, block_window, s);
simpleMethod(ab, ab, h_color, search_window, block_window, s);
fastNlMeansDenoising(l, l, h_luminance, search_window, block_window, stream);
fastNlMeansDenoising(ab, ab, h_color, search_window, block_window, stream);
device::imgproc::fnlm_merge_channels(l, ab, lab, StreamAccessor::getStream(s));
cv::cuda::cvtColor(lab, dst, cv::COLOR_Lab2BGR, 0, s);
device::imgproc::fnlm_merge_channels(l, ab, lab, StreamAccessor::getStream(stream));
cv::cuda::cvtColor(lab, _dst, cv::COLOR_Lab2BGR, 0, stream);
}
#endif

@ -99,10 +99,9 @@ TEST(CUDA_FastNonLocalMeans, Regression)
cv::cvtColor(bgr, gray, cv::COLOR_BGR2GRAY);
GpuMat dbgr, dgray;
cv::cuda::FastNonLocalMeansDenoising fnlmd;
fnlmd.simpleMethod(GpuMat(gray), dgray, 20);
fnlmd.labMethod(GpuMat(bgr), dbgr, 20, 10);
cv::cuda::fastNlMeansDenoising(GpuMat(gray), dgray, 20);
cv::cuda::fastNlMeansDenoisingColored(GpuMat(bgr), dbgr, 20, 10);
#if 0
dumpImage("../gpu/denoising/fnlm_denoised_lena_bgr.png", cv::Mat(dbgr));

Loading…
Cancel
Save