From d588c717da1ad2b77e03b058a281da3c00ba0327 Mon Sep 17 00:00:00 2001
From: Erik Karlsson <erik.r.karlsson@gmail.com>
Date: Fri, 13 Feb 2015 00:11:30 +0100
Subject: [PATCH] Using WEIGHT_THRESHOLD to limit table size. Still problematic
 with 16-bit and big h-values.

---
 .../src/fast_nlmeans_denoising_invoker.hpp    | 30 +++++++++----------
 .../fast_nlmeans_multi_denoising_invoker.hpp  | 29 +++++++++---------
 2 files changed, 29 insertions(+), 30 deletions(-)
diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp
index 27a016ae98..c9689cabd7 100644
--- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp
+++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp
@@ -123,31 +123,28 @@ FastNlMeansDenoisingInvoker<T, IT, UIT>::FastNlMeansDenoisingInvoker(
 
     // precalc weight for every possible l2 dist between blocks
     // additional optimization of precalced weights to replace division(averaging) by binary shift
-    // squared distances are truncated to 24 bits to avoid unreasonable table sizes
-    // TODO: uses lots of memory and loses precision wtih 16-bit images ????
-    const size_t TABLE_MAX_BITS = 24;
     CV_Assert(template_window_size_ <= 46340); // sqrt(INT_MAX)
     int template_window_size_sq = template_window_size_ * template_window_size_;
-    almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq) +
-        std::max(2*pixelInfo<T>::sampleBits(), TABLE_MAX_BITS) - TABLE_MAX_BITS;
+    almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq);
     double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq;
 
+    const double WEIGHT_THRESHOLD = 0.001;
+    const size_t ALLOC_CHUNK = 65536;
     IT max_dist =
         (IT)pixelInfo<T>::sampleMax() * (IT)pixelInfo<T>::sampleMax() * (IT)pixelInfo<T>::channels;
-    int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);
-    almost_dist2weight_.resize(almost_max_dist);
-
-    const double WEIGHT_THRESHOLD = 0.001;
-    for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
+    int almost_max_dist = 0;
+    while (true)
     {
-        double dist = almost_dist * almost_dist2actual_dist_multiplier;
+        double dist = almost_max_dist * almost_dist2actual_dist_multiplier;
         IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * pixelInfo<T>::channels)));
+        if (weight < WEIGHT_THRESHOLD * fixed_point_mult_ || dist > max_dist) break;
 
-        if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
-            weight = 0;
+        if (almost_max_dist >= almost_dist2weight_.size())
+            almost_dist2weight_.resize(almost_max_dist + ALLOC_CHUNK);
 
-        almost_dist2weight_[almost_dist] = weight;
+        almost_dist2weight_[almost_max_dist++] = weight;
     }
+    almost_dist2weight_.resize(almost_max_dist);
     CV_Assert(almost_dist2weight_[0] == fixed_point_mult_);
 
     // additional optimization init end
@@ -161,6 +158,8 @@ void FastNlMeansDenoisingInvoker<T, IT, UIT>::operator() (const Range& range) co
     int row_from = range.start;
     int row_to = range.end - 1;
 
+    int almost_max_dist = almost_dist2weight_.size();
+
     // sums of cols anf rows for current pixel p
     Array2d<IT> dist_sums(search_window_size_, search_window_size_);
 
@@ -244,7 +243,8 @@ void FastNlMeansDenoisingInvoker<T, IT, UIT>::operator() (const Range& range) co
                 for (int x = 0; x < search_window_size_; x++)
                 {
                     int almostAvgDist = (int)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_);
-                    IT weight = almost_dist2weight_[almostAvgDist];
+                    IT weight =
+                        almostAvgDist < almost_max_dist ? almost_dist2weight_[almostAvgDist] : 0;
                     weights_sum += weight;
 
                     T p = cur_row_ptr[border_size_ + search_window_x + x];
diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp
index c90249b82e..b4bfc0c6c1 100644
--- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp
+++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp
@@ -131,35 +131,31 @@ FastNlMeansMultiDenoisingInvoker<T, IT, UIT>::FastNlMeansMultiDenoisingInvoker(
 
     // precalc weight for every possible l2 dist between blocks
     // additional optimization of precalced weights to replace division(averaging) by binary shift
-    // squared distances are truncated to 24 bits to avoid unreasonable table sizes
-    // TODO: uses lots of memory and loses precision wtih 16-bit images ????
-    const size_t TABLE_MAX_BITS = 24;
     int template_window_size_sq = template_window_size_ * template_window_size_;
     almost_template_window_size_sq_bin_shift = 0;
     while (1 << almost_template_window_size_sq_bin_shift < template_window_size_sq)
         almost_template_window_size_sq_bin_shift++;
-    almost_template_window_size_sq_bin_shift +=
-        std::max(2*pixelInfo<T>::sampleBits(), TABLE_MAX_BITS) - TABLE_MAX_BITS;
 
     int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift;
     double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq;
 
+    const double WEIGHT_THRESHOLD = 0.001;
+    const size_t ALLOC_CHUNK = 65536;
     IT max_dist =
         (IT)pixelInfo<T>::sampleMax() * (IT)pixelInfo<T>::sampleMax() * (IT)pixelInfo<T>::channels;
-    int almost_max_dist = (int) (max_dist / almost_dist2actual_dist_multiplier + 1);
-    almost_dist2weight.resize(almost_max_dist);
-
-    const double WEIGHT_THRESHOLD = 0.001;
-    for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
+    int almost_max_dist = 0;
+    while (true)
     {
-        double dist = almost_dist * almost_dist2actual_dist_multiplier;
+        double dist = almost_max_dist * almost_dist2actual_dist_multiplier;
         IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * pixelInfo<T>::channels)));
+        if (weight < WEIGHT_THRESHOLD * fixed_point_mult_ || dist > max_dist) break;
 
-        if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
-            weight = 0;
+        if (almost_max_dist >= almost_dist2weight.size())
+            almost_dist2weight.resize(almost_max_dist + ALLOC_CHUNK);
 
-        almost_dist2weight[almost_dist] = weight;
+        almost_dist2weight[almost_max_dist++] = weight;
     }
+    almost_dist2weight.resize(almost_max_dist);
     CV_Assert(almost_dist2weight[0] == fixed_point_mult_);
 
     // additional optimization init end
@@ -173,6 +169,8 @@ void FastNlMeansMultiDenoisingInvoker<T, IT, UIT>::operator() (const Range& rang
     int row_from = range.start;
     int row_to = range.end - 1;
 
+    int almost_max_dist = almost_dist2weight.size();
+
     Array3d<IT> dist_sums(temporal_window_size_, search_window_size_, search_window_size_);
 
     // for lazy calc optimization
@@ -273,7 +271,8 @@ void FastNlMeansMultiDenoisingInvoker<T, IT, UIT>::operator() (const Range& rang
                     {
                         int almostAvgDist = (int)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift);
 
-                        IT weight = almost_dist2weight[almostAvgDist];
+                        IT weight =
+                            almostAvgDist < almost_max_dist ? almost_dist2weight[almostAvgDist] : 0;
                         weights_sum += weight;
 
                         T p = cur_row_ptr[border_size_ + search_window_x + x];