significantly improved parallel non-local means by using granularity parameter in parallel_for_ loop. Because the algorithm deals with sliding sums, it's essential that each thread has enough work to do, otherwise the algorithm gets higher theoretical complexity and thus there is no speedup comparing to 1-thread code (at best).

pull/4018/head
Vadim Pisarevsky 10 years ago
parent feb5b6aa93
commit b37aaa8303
  1. 44
      modules/photo/src/denoising.cpp
  2. 11
      modules/photo/test/test_denoising.cpp

@ -50,42 +50,50 @@ static void fastNlMeansDenoising_( const Mat& src, Mat& dst, const std::vector<f
int templateWindowSize, int searchWindowSize) int templateWindowSize, int searchWindowSize)
{ {
int hn = (int)h.size(); int hn = (int)h.size();
double granularity = (double)std::max(1., (double)dst.total()/(1 << 17));
switch (CV_MAT_CN(src.type())) { switch (CV_MAT_CN(src.type())) {
case 1: case 1:
parallel_for_(cv::Range(0, src.rows), parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<ST, IT, UIT, D, int>( FastNlMeansDenoisingInvoker<ST, IT, UIT, D, int>(
src, dst, templateWindowSize, searchWindowSize, &h[0])); src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break; break;
case 2: case 2:
if (hn == 1) if (hn == 1)
parallel_for_(cv::Range(0, src.rows), parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>( FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>(
src, dst, templateWindowSize, searchWindowSize, &h[0])); src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
else else
parallel_for_(cv::Range(0, src.rows), parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>( FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>(
src, dst, templateWindowSize, searchWindowSize, &h[0])); src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break; break;
case 3: case 3:
if (hn == 1) if (hn == 1)
parallel_for_(cv::Range(0, src.rows), parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>( FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>(
src, dst, templateWindowSize, searchWindowSize, &h[0])); src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
else else
parallel_for_(cv::Range(0, src.rows), parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>( FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>(
src, dst, templateWindowSize, searchWindowSize, &h[0])); src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break; break;
case 4: case 4:
if (hn == 1) if (hn == 1)
parallel_for_(cv::Range(0, src.rows), parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>( FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>(
src, dst, templateWindowSize, searchWindowSize, &h[0])); src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
else else
parallel_for_(cv::Range(0, src.rows), parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>( FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>(
src, dst, templateWindowSize, searchWindowSize, &h[0])); src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break; break;
default: default:
CV_Error(Error::StsBadArg, CV_Error(Error::StsBadArg,
@ -237,6 +245,7 @@ static void fastNlMeansDenoisingMulti_( const std::vector<Mat>& srcImgs, Mat& ds
int templateWindowSize, int searchWindowSize) int templateWindowSize, int searchWindowSize)
{ {
int hn = (int)h.size(); int hn = (int)h.size();
double granularity = (double)std::max(1., (double)dst.total()/(1 << 16));
switch (srcImgs[0].type()) switch (srcImgs[0].type())
{ {
@ -244,43 +253,50 @@ static void fastNlMeansDenoisingMulti_( const std::vector<Mat>& srcImgs, Mat& ds
parallel_for_(cv::Range(0, srcImgs[0].rows), parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<uchar, IT, UIT, D, int>( FastNlMeansMultiDenoisingInvoker<uchar, IT, UIT, D, int>(
srcImgs, imgToDenoiseIndex, temporalWindowSize, srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0])); dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break; break;
case CV_8UC2: case CV_8UC2:
if (hn == 1) if (hn == 1)
parallel_for_(cv::Range(0, srcImgs[0].rows), parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>( FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>(
srcImgs, imgToDenoiseIndex, temporalWindowSize, srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0])); dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
else else
parallel_for_(cv::Range(0, srcImgs[0].rows), parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>( FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>(
srcImgs, imgToDenoiseIndex, temporalWindowSize, srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0])); dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break; break;
case CV_8UC3: case CV_8UC3:
if (hn == 1) if (hn == 1)
parallel_for_(cv::Range(0, srcImgs[0].rows), parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>( FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>(
srcImgs, imgToDenoiseIndex, temporalWindowSize, srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0])); dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
else else
parallel_for_(cv::Range(0, srcImgs[0].rows), parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>( FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>(
srcImgs, imgToDenoiseIndex, temporalWindowSize, srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0])); dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break; break;
case CV_8UC4: case CV_8UC4:
if (hn == 1) if (hn == 1)
parallel_for_(cv::Range(0, srcImgs[0].rows), parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>( FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>(
srcImgs, imgToDenoiseIndex, temporalWindowSize, srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0])); dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
else else
parallel_for_(cv::Range(0, srcImgs[0].rows), parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>( FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>(
srcImgs, imgToDenoiseIndex, temporalWindowSize, srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0])); dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break; break;
default: default:
CV_Error(Error::StsBadArg, CV_Error(Error::StsBadArg,

@ -156,3 +156,14 @@ TEST(Photo_White, issue_2646)
ASSERT_EQ(0, nonWhitePixelsCount); ASSERT_EQ(0, nonWhitePixelsCount);
} }
TEST(Photo_Denoising, speed)
{
string imgname = string(cvtest::TS::ptr()->get_data_path()) + "shared/5MP.png";
Mat src = imread(imgname, 0), dst;
double t = (double)getTickCount();
fastNlMeansDenoising(src, dst, 5, 7, 21);
t = (double)getTickCount() - t;
printf("execution time: %gms\n", t*1000./getTickFrequency());
}

Loading…
Cancel
Save