From d588c717da1ad2b77e03b058a281da3c00ba0327 Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Fri, 13 Feb 2015 00:11:30 +0100 Subject: [PATCH] Using WEIGHT_THRESHOLD to limit table size. Still problematic with 16-bit and big h-values. --- .../src/fast_nlmeans_denoising_invoker.hpp | 30 +++++++++---------- .../fast_nlmeans_multi_denoising_invoker.hpp | 29 +++++++++--------- 2 files changed, 29 insertions(+), 30 deletions(-) diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index 27a016ae98..c9689cabd7 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -123,31 +123,28 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( // precalc weight for every possible l2 dist between blocks // additional optimization of precalced weights to replace division(averaging) by binary shift - // squared distances are truncated to 24 bits to avoid unreasonable table sizes - // TODO: uses lots of memory and loses precision wtih 16-bit images ???? - const size_t TABLE_MAX_BITS = 24; CV_Assert(template_window_size_ <= 46340); // sqrt(INT_MAX) int template_window_size_sq = template_window_size_ * template_window_size_; - almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq) + - std::max(2*pixelInfo::sampleBits(), TABLE_MAX_BITS) - TABLE_MAX_BITS; + almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq); double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq; + const double WEIGHT_THRESHOLD = 0.001; + const size_t ALLOC_CHUNK = 65536; IT max_dist = (IT)pixelInfo::sampleMax() * (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; - int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1); - almost_dist2weight_.resize(almost_max_dist); - - const double WEIGHT_THRESHOLD = 0.001; - for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) + int almost_max_dist = 0; + while (true) { - double dist = almost_dist * almost_dist2actual_dist_multiplier; + double dist = almost_max_dist * almost_dist2actual_dist_multiplier; IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * pixelInfo::channels))); + if (weight < WEIGHT_THRESHOLD * fixed_point_mult_ || dist > max_dist) break; - if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) - weight = 0; + if (almost_max_dist >= almost_dist2weight_.size()) + almost_dist2weight_.resize(almost_max_dist + ALLOC_CHUNK); - almost_dist2weight_[almost_dist] = weight; + almost_dist2weight_[almost_max_dist++] = weight; } + almost_dist2weight_.resize(almost_max_dist); CV_Assert(almost_dist2weight_[0] == fixed_point_mult_); // additional optimization init end @@ -161,6 +158,8 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) co int row_from = range.start; int row_to = range.end - 1; + int almost_max_dist = almost_dist2weight_.size(); + // sums of cols anf rows for current pixel p Array2d dist_sums(search_window_size_, search_window_size_); @@ -244,7 +243,8 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) co for (int x = 0; x < search_window_size_; x++) { int almostAvgDist = (int)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_); - IT weight = almost_dist2weight_[almostAvgDist]; + IT weight = + almostAvgDist < almost_max_dist ? almost_dist2weight_[almostAvgDist] : 0; weights_sum += weight; T p = cur_row_ptr[border_size_ + search_window_x + x]; diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index c90249b82e..b4bfc0c6c1 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -131,35 +131,31 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( // precalc weight for every possible l2 dist between blocks // additional optimization of precalced weights to replace division(averaging) by binary shift - // squared distances are truncated to 24 bits to avoid unreasonable table sizes - // TODO: uses lots of memory and loses precision wtih 16-bit images ???? - const size_t TABLE_MAX_BITS = 24; int template_window_size_sq = template_window_size_ * template_window_size_; almost_template_window_size_sq_bin_shift = 0; while (1 << almost_template_window_size_sq_bin_shift < template_window_size_sq) almost_template_window_size_sq_bin_shift++; - almost_template_window_size_sq_bin_shift += - std::max(2*pixelInfo::sampleBits(), TABLE_MAX_BITS) - TABLE_MAX_BITS; int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift; double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq; + const double WEIGHT_THRESHOLD = 0.001; + const size_t ALLOC_CHUNK = 65536; IT max_dist = (IT)pixelInfo::sampleMax() * (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; - int almost_max_dist = (int) (max_dist / almost_dist2actual_dist_multiplier + 1); - almost_dist2weight.resize(almost_max_dist); - - const double WEIGHT_THRESHOLD = 0.001; - for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) + int almost_max_dist = 0; + while (true) { - double dist = almost_dist * almost_dist2actual_dist_multiplier; + double dist = almost_max_dist * almost_dist2actual_dist_multiplier; IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * pixelInfo::channels))); + if (weight < WEIGHT_THRESHOLD * fixed_point_mult_ || dist > max_dist) break; - if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) - weight = 0; + if (almost_max_dist >= almost_dist2weight.size()) + almost_dist2weight.resize(almost_max_dist + ALLOC_CHUNK); - almost_dist2weight[almost_dist] = weight; + almost_dist2weight[almost_max_dist++] = weight; } + almost_dist2weight.resize(almost_max_dist); CV_Assert(almost_dist2weight[0] == fixed_point_mult_); // additional optimization init end @@ -173,6 +169,8 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& rang int row_from = range.start; int row_to = range.end - 1; + int almost_max_dist = almost_dist2weight.size(); + Array3d dist_sums(temporal_window_size_, search_window_size_, search_window_size_); // for lazy calc optimization @@ -273,7 +271,8 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& rang { int almostAvgDist = (int)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift); - IT weight = almost_dist2weight[almostAvgDist]; + IT weight = + almostAvgDist < almost_max_dist ? almost_dist2weight[almostAvgDist] : 0; weights_sum += weight; T p = cur_row_ptr[border_size_ + search_window_x + x];