significantly improved parallel non-local means by using granularity parameter in parallel_for_ loop. Because the algorithm deals with sliding sums, it's essential that each thread has enough work to do, otherwise the algorithm gets higher theoretical complexity and thus there is no speedup comparing to 1-thread code (at best).

pull/4018/head
Vadim Pisarevsky 10 years ago
parent feb5b6aa93
commit b37aaa8303
  1. 44
      modules/photo/src/denoising.cpp
  2. 11
      modules/photo/test/test_denoising.cpp

@ -50,42 +50,50 @@ static void fastNlMeansDenoising_( const Mat& src, Mat& dst, const std::vector<f
int templateWindowSize, int searchWindowSize)
{
int hn = (int)h.size();
double granularity = (double)std::max(1., (double)dst.total()/(1 << 17));
switch (CV_MAT_CN(src.type())) {
case 1:
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<ST, IT, UIT, D, int>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break;
case 2:
if (hn == 1)
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
else
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break;
case 3:
if (hn == 1)
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
else
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break;
case 4:
if (hn == 1)
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
else
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break;
default:
CV_Error(Error::StsBadArg,
@ -237,6 +245,7 @@ static void fastNlMeansDenoisingMulti_( const std::vector<Mat>& srcImgs, Mat& ds
int templateWindowSize, int searchWindowSize)
{
int hn = (int)h.size();
double granularity = (double)std::max(1., (double)dst.total()/(1 << 16));
switch (srcImgs[0].type())
{
@ -244,43 +253,50 @@ static void fastNlMeansDenoisingMulti_( const std::vector<Mat>& srcImgs, Mat& ds
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<uchar, IT, UIT, D, int>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break;
case CV_8UC2:
if (hn == 1)
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
else
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break;
case CV_8UC3:
if (hn == 1)
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
else
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break;
case CV_8UC4:
if (hn == 1)
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
else
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break;
default:
CV_Error(Error::StsBadArg,

@ -156,3 +156,14 @@ TEST(Photo_White, issue_2646)
ASSERT_EQ(0, nonWhitePixelsCount);
}
TEST(Photo_Denoising, speed)
{
string imgname = string(cvtest::TS::ptr()->get_data_path()) + "shared/5MP.png";
Mat src = imread(imgname, 0), dst;
double t = (double)getTickCount();
fastNlMeansDenoising(src, dst, 5, 7, 21);
t = (double)getTickCount() - t;
printf("execution time: %gms\n", t*1000./getTickFrequency());
}

Loading…
Cancel
Save