From cc6f1eb824b37b6a5c9b3b0d303c2ed78402974e Mon Sep 17 00:00:00 2001
From: Yury Zemlyanskiy <yuri.zemlyanskiy@gmail.com>
Date: Sun, 19 Aug 2012 15:56:19 +0400
Subject: [PATCH 1/7] "SimpleFlow" optical flow estimation algorithm (GSoC
 project) declaration in includes, implementation, usage example, test

---
 .../motion_analysis_and_object_tracking.rst   |  44 +
 .../video/include/opencv2/video/tracking.hpp  |  21 +-
 modules/video/src/simpleflow.cpp              | 757 ++++++++++++++++++
 modules/video/src/simpleflow.hpp              | 125 +++
 modules/video/test/test_simpleflow.cpp        | 193 +++++
 samples/cpp/simpleflow_demo.cpp               |  96 +++
 6 files changed, 1235 insertions(+), 1 deletion(-)
 create mode 100644 modules/video/src/simpleflow.cpp
 create mode 100644 modules/video/src/simpleflow.hpp
 create mode 100644 modules/video/test/test_simpleflow.cpp
 create mode 100644 samples/cpp/simpleflow_demo.cpp

diff --git a/modules/video/doc/motion_analysis_and_object_tracking.rst b/modules/video/doc/motion_analysis_and_object_tracking.rst
index 6c196c2ff1..ebb9290cc4 100644
--- a/modules/video/doc/motion_analysis_and_object_tracking.rst
+++ b/modules/video/doc/motion_analysis_and_object_tracking.rst
@@ -597,6 +597,48 @@ Returns background image
 See :ocv:func:`BackgroundSubtractor::getBackgroundImage`.
 
 
+calcOpticalFlowSF
+-----------
+Calculate an optical flow using "SimpleFlow" algorithm.
+
+.. ocv:function:: void calcOpticalFlowSF( Mat& prev, Mat& next, Mat& flowX, Mat& flowY, int layers, int averaging_block_size, int max_flow, double sigma_dist, double sigma_color, int postprocess_window, double sigma_dist_fix, double sigma_color_fix, double occ_thr, int upscale_averaging_radiud, double upscale_sigma_dist, double upscale_sigma_color, double speed_up_thr)
+
+    :param prev: First 8-bit 3-channel image.
+
+    :param next: Second 8-bit 3-channel image 
+
+    :param flowX: X-coordinate of estimated flow
+
+    :param flowY: Y-coordinate of estimated flow
+
+    :param layers: Number of layers
+
+    :param averaging_block_size: Size of block through which we sum up when calculate cost function for pixel
+
+    :param max_flow: maximal flow that we search at each level
+
+    :param sigma_dist: vector smooth spatial sigma parameter
+
+    :param sigma_color: vector smooth color sigma parameter
+
+    :param postprocess_window: window size for postprocess cross bilateral filter
+
+    :param sigma_dist_fix: spatial sigma for postprocess cross bilateralf filter
+
+    :param sigma_color_fix: color sigma for postprocess cross bilateral filter
+
+    :param occ_thr: threshold for detecting occlusions
+
+    :param upscale_averaging_radiud: window size for bilateral upscale operation
+
+    :param upscale_sigma_dist: spatial sigma for bilateral upscale operation
+
+    :param upscale_sigma_color: color sigma for bilateral upscale operation
+
+    :param speed_up_thr: threshold to detect point with irregular flow - where flow should be recalculated after upscale
+
+See [Tao2012]_. And site of project - http://graphics.berkeley.edu/papers/Tao-SAN-2012-05/. 
+
 .. [Bouguet00] Jean-Yves Bouguet. Pyramidal Implementation of the Lucas Kanade Feature Tracker.
 
 .. [Bradski98] Bradski, G.R. "Computer Vision Face Tracking for Use in a Perceptual User Interface", Intel, 1998
@@ -612,3 +654,5 @@ See :ocv:func:`BackgroundSubtractor::getBackgroundImage`.
 .. [Lucas81] Lucas, B., and Kanade, T. An Iterative Image Registration Technique with an Application to Stereo Vision, Proc. of 7th International Joint Conference on Artificial Intelligence (IJCAI), pp. 674-679.
 
 .. [Welch95] Greg Welch and Gary Bishop “An Introduction to the Kalman Filter”, 1995
+
+.. [Tao2012] Michael Tao, Jiamin Bai, Pushmeet Kohli and Sylvain Paris. SimpleFlow: A Non-iterative, Sublinear Optical Flow Algorithm. Computer Graphics Forum (Eurographics 2012)
diff --git a/modules/video/include/opencv2/video/tracking.hpp b/modules/video/include/opencv2/video/tracking.hpp
index 75668d2289..85c18817a9 100644
--- a/modules/video/include/opencv2/video/tracking.hpp
+++ b/modules/video/include/opencv2/video/tracking.hpp
@@ -326,7 +326,26 @@ CV_EXPORTS_W void calcOpticalFlowFarneback( InputArray prev, InputArray next,
 // that maps one 2D point set to another or one image to another.
 CV_EXPORTS_W Mat estimateRigidTransform( InputArray src, InputArray dst,
                                          bool fullAffine);
-    
+  
+//! computes dense optical flow using Simple Flow algorithm
+CV_EXPORTS_W void calcOpticalFlowSF(Mat& from, 
+                                    Mat& to, 
+                                    Mat& flowX,
+                                    Mat& flowY,
+                                    int layers,
+                                    int averaging_block_size, 
+                                    int max_flow,
+                                    double sigma_dist,
+                                    double sigma_color,
+                                    int postprocess_window, 
+                                    double sigma_dist_fix, 
+                                    double sigma_color_fix,
+                                    double occ_thr,
+                                    int upscale_averaging_radius,
+                                    double upscale_sigma_dist,
+                                    double upscale_sigma_color,
+                                    double speed_up_thr);
+
 }
 
 #endif
diff --git a/modules/video/src/simpleflow.cpp b/modules/video/src/simpleflow.cpp
new file mode 100644
index 0000000000..1fda3618c9
--- /dev/null
+++ b/modules/video/src/simpleflow.cpp
@@ -0,0 +1,757 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include "simpleflow.hpp"
+
+//
+// 2D dense optical flow algorithm from the following paper:
+// Michael Tao, Jiamin Bai, Pushmeet Kohli, and Sylvain Paris.
+// "SimpleFlow: A Non-iterative, Sublinear Optical Flow Algorithm"
+// Computer Graphics Forum (Eurographics 2012)
+// http://graphics.berkeley.edu/papers/Tao-SAN-2012-05/
+//
+
+namespace cv
+{
+
+WeightedCrossBilateralFilter::WeightedCrossBilateralFilter(
+  const Mat& _image,
+  int _windowSize,
+  double _sigmaDist,
+  double _sigmaColor)
+  : image(_image), 
+    windowSize(_windowSize), 
+    sigmaDist(_sigmaDist), 
+    sigmaColor(_sigmaColor) {
+    
+  expDist.resize(2*windowSize*windowSize+1);
+  const double sigmaDistSqr = 2 * sigmaDist * sigmaDist;
+  for (int i = 0; i <= 2*windowSize*windowSize; ++i) {
+    expDist[i] = exp(-i/sigmaDistSqr);
+  }
+
+  const double sigmaColorSqr = 2 * sigmaColor * sigmaColor;
+  wc.resize(image.rows);
+  for (int row = 0; row < image.rows; ++row) {
+    wc[row].resize(image.cols);
+    for (int col = 0; col < image.cols; ++col) {
+      int beginRow = max(0, row - windowSize);
+      int beginCol = max(0, col - windowSize);
+      int endRow = min(image.rows - 1, row + windowSize);
+      int endCol = min(image.cols - 1, col + windowSize);
+      wc[row][col] = build<double>(endRow - beginRow + 1, endCol - beginCol + 1);
+
+      for (int r = beginRow; r <= endRow; ++r) {
+        for (int c = beginCol; c <= endCol; ++c) {
+          wc[row][col][r - beginRow][c - beginCol] = 
+            exp(-dist(image.at<Vec3b>(row, col), 
+                      image.at<Vec3b>(r, c)) 
+                / sigmaColorSqr);
+        }
+      }
+    }
+  }
+}
+
+Mat WeightedCrossBilateralFilter::apply(Mat& matrix, Mat& weights) {
+  int rows = matrix.rows;
+  int cols = matrix.cols;
+
+  Mat result = Mat::zeros(rows, cols, CV_64F);
+  for (int row = 0; row < rows; ++row) {
+    for(int col = 0; col < cols; ++col) {
+      result.at<double>(row, col) = 
+        convolution(matrix, row, col, weights);
+    }
+  }
+  return result;
+}
+
+double WeightedCrossBilateralFilter::convolution(Mat& matrix, 
+                                                 int row, int col, 
+                                                 Mat& weights) {
+  double result = 0, weightsSum = 0;
+  int beginRow = max(0, row - windowSize);
+  int beginCol = max(0, col - windowSize);
+  int endRow = min(matrix.rows - 1, row + windowSize);
+  int endCol = min(matrix.cols - 1, col + windowSize);
+  for (int r = beginRow; r <= endRow; ++r) {
+    double* ptr = matrix.ptr<double>(r);
+    for (int c = beginCol; c <= endCol; ++c) {
+      const double w = expDist[dist(row, col, r, c)] *
+                       wc[row][col][r - beginRow][c - beginCol] *
+                       weights.at<double>(r, c);
+      result += ptr[c] * w;
+      weightsSum += w;
+    }
+  }
+  return result / weightsSum;
+}
+
+static void removeOcclusions(const Flow& flow, 
+                             const Flow& flow_inv,
+                             double occ_thr,
+                             Mat& confidence) {
+  const int rows = flow.u.rows;
+  const int cols = flow.v.cols;
+  int occlusions = 0;
+  for (int r = 0; r < rows; ++r) {
+    for (int c = 0; c < cols; ++c) {
+      if (dist(flow.u.at<double>(r, c), flow.v.at<double>(r, c),
+               -flow_inv.u.at<double>(r, c), -flow_inv.v.at<double>(r, c)) > occ_thr) {
+        confidence.at<double>(r, c) = 0;
+        occlusions++;
+      }
+    }
+  }
+}
+
+static Mat wd(int top_shift, int bottom_shift, int left_shift, int right_shift, double sigma) {
+  const double factor = 1.0 / (2.0 * sigma * sigma);
+  Mat d = Mat(top_shift + bottom_shift + 1, right_shift + left_shift + 1, CV_64F);
+  for (int dr = -top_shift, r = 0; dr <= bottom_shift; ++dr, ++r) {
+    for (int dc = -left_shift, c = 0; dc <= right_shift; ++dc, ++c) {
+      d.at<double>(r, c) = -(dr*dr + dc*dc) * factor;
+    }
+  }
+  Mat ed;
+  exp(d, ed);
+  return ed;
+}
+
+static Mat wc(const Mat& image, int r0, int c0, int top_shift, int bottom_shift, int left_shift, int right_shift, double sigma) {
+  const double factor = 1.0 / (2.0 * sigma * sigma);
+  Mat d = Mat(top_shift + bottom_shift + 1, right_shift + left_shift + 1, CV_64F);
+  for (int dr = r0-top_shift, r = 0; dr <= r0+bottom_shift; ++dr, ++r) {
+    for (int dc = c0-left_shift, c = 0; dc <= c0+right_shift; ++dc, ++c) {
+      d.at<double>(r, c) = -dist(image.at<Vec3b>(r0, c0), image.at<Vec3b>(dr, dc)) * factor;
+    }
+  }
+  Mat ed;
+  exp(d, ed);
+  return ed;
+}
+
+inline static void dist(const Mat& m1, const Mat& m2, Mat& result) {
+  const int rows = m1.rows;
+  const int cols = m1.cols;
+  for (int r = 0; r < rows; ++r) {
+    const Vec3b *m1_row = m1.ptr<Vec3b>(r);
+    const Vec3b *m2_row = m2.ptr<Vec3b>(r);
+    double* row = result.ptr<double>(r);
+    for (int c = 0; c < cols; ++c) {
+      row[c] = dist(m1_row[c], m2_row[c]);
+    }
+  }
+}
+
+static void calcOpticalFlowSingleScaleSF(const Mat& prev, 
+                                         const Mat& next,
+                                         const Mat& mask,
+                                         Flow& flow,
+                                         Mat& confidence,
+                                         int averaging_radius, 
+                                         int max_flow,
+                                         double sigma_dist,
+                                         double sigma_color) {
+  const int rows = prev.rows;
+  const int cols = prev.cols;
+  confidence = Mat::zeros(rows, cols, CV_64F);
+
+  for (int r0 = 0; r0 < rows; ++r0) {
+    for (int c0 = 0; c0 < cols; ++c0) {
+      int u0 = floor(flow.u.at<double>(r0, c0) + 0.5);
+      int v0 = floor(flow.v.at<double>(r0, c0) + 0.5);
+
+      const int min_row_shift = -min(r0 + u0, max_flow);
+      const int max_row_shift = min(rows - 1 - (r0 + u0), max_flow);
+      const int min_col_shift = -min(c0 + v0, max_flow);
+      const int max_col_shift = min(cols - 1 - (c0 + v0), max_flow);
+
+      double min_cost = DBL_MAX, best_u = u0, best_v = v0;
+
+      Mat w_full_window;
+      double w_full_window_sum;
+      Mat diff_storage;
+
+      if (r0 - averaging_radius >= 0 && 
+          r0 + averaging_radius < rows &&
+          c0 - averaging_radius >= 0 &&
+          c0 + averaging_radius < cols &&
+          mask.at<uchar>(r0, c0)) {
+          w_full_window = wd(averaging_radius, 
+                             averaging_radius, 
+                             averaging_radius, 
+                             averaging_radius, 
+                             sigma_dist).mul(
+                          wc(prev, r0, c0, 
+                             averaging_radius, 
+                             averaging_radius, 
+                             averaging_radius, 
+                             averaging_radius, 
+                             sigma_color));
+
+          w_full_window_sum = sum(w_full_window)[0];
+          diff_storage = Mat::zeros(averaging_radius*2 + 1, averaging_radius*2 + 1, CV_64F);
+      }
+
+      bool first_flow_iteration = true;
+      double sum_e, min_e;
+
+      for (int u = min_row_shift; u <= max_row_shift; ++u) {
+        for (int v = min_col_shift; v <= max_col_shift; ++v) {
+          double e = dist(prev.at<Vec3b>(r0, c0), next.at<Vec3b>(r0 + u0 + u, c0 + v0 + v));
+          if (first_flow_iteration) {
+            sum_e = e;
+            min_e = e;
+            first_flow_iteration = false;
+          } else {
+            sum_e += e;
+            min_e = std::min(min_e, e);
+          }
+          if (!mask.at<uchar>(r0, c0)) {
+            continue;
+          }
+
+          const int window_top_shift = min(r0, r0 + u + u0, averaging_radius);
+          const int window_bottom_shift = min(rows - 1 - r0, 
+                                              rows - 1 - (r0 + u + u0), 
+                                              averaging_radius);
+          const int window_left_shift = min(c0, c0 + v + v0, averaging_radius);
+          const int window_right_shift = min(cols - 1 - c0, 
+                                             cols - 1 - (c0 + v + v0), 
+                                             averaging_radius);
+
+          const Range prev_row_range(r0 - window_top_shift, r0 + window_bottom_shift + 1);
+          const Range prev_col_range(c0 - window_left_shift, c0 + window_right_shift + 1);
+
+          const Range next_row_range(r0 + u0 + u - window_top_shift, 
+                                     r0 + u0 + u + window_bottom_shift + 1);
+          const Range next_col_range(c0 + v0 + v - window_left_shift, 
+                                     c0 + v0 + v + window_right_shift + 1); 
+          
+          Mat diff2;
+          Mat w;
+          double w_sum;
+          if (window_top_shift == averaging_radius &&
+              window_bottom_shift == averaging_radius &&
+              window_left_shift == averaging_radius &&
+              window_right_shift == averaging_radius) {
+            w = w_full_window;
+            w_sum = w_full_window_sum;
+            diff2 = diff_storage;
+
+            dist(prev(prev_row_range, prev_col_range), next(next_row_range, next_col_range), diff2);
+          } else {
+            diff2 = Mat::zeros(window_bottom_shift + window_top_shift + 1,  
+                                   window_right_shift + window_left_shift + 1, CV_64F);
+            
+            dist(prev(prev_row_range, prev_col_range), next(next_row_range, next_col_range), diff2);
+
+            w = wd(window_top_shift, window_bottom_shift, window_left_shift, window_right_shift, sigma_dist).mul( 
+                wc(prev, r0, c0, window_top_shift, window_bottom_shift, window_left_shift, window_right_shift, sigma_color));
+            w_sum = sum(w)[0];
+          }
+          multiply(diff2, w, diff2);
+      
+          const double cost = sum(diff2)[0] / w_sum;
+          if (cost < min_cost) {
+            min_cost = cost;
+            best_u = u + u0;
+            best_v = v + v0;
+          }
+        }
+      }
+      int square = (max_row_shift - min_row_shift + 1) *
+                   (max_col_shift - min_col_shift + 1);
+      confidence.at<double>(r0, c0) = (square == 0) ? 0
+                                                   : sum_e / square - min_e;
+      if (mask.at<uchar>(r0, c0)) {
+        flow.u.at<double>(r0, c0) = best_u;
+        flow.v.at<double>(r0, c0) = best_v;
+      }
+    }
+  }
+}
+
+static Flow upscaleOpticalFlow(int new_rows, 
+                               int new_cols,
+                               const Mat& image,
+                               const Mat& confidence,
+                               const Flow& flow,
+                               int averaging_radius,
+                               double sigma_dist,
+                               double sigma_color) {
+  const int rows = image.rows;
+  const int cols = image.cols;
+  Flow new_flow(new_rows, new_cols);
+  for (int r = 0; r < rows; ++r) {
+    for (int c = 0; c < cols; ++c) {
+      const int window_top_shift = min(r, averaging_radius);
+      const int window_bottom_shift = min(rows - 1 - r, averaging_radius);
+      const int window_left_shift = min(c, averaging_radius);
+      const int window_right_shift = min(cols - 1 - c, averaging_radius);
+      
+      const Range row_range(r - window_top_shift, r + window_bottom_shift + 1);
+      const Range col_range(c - window_left_shift, c + window_right_shift + 1);
+      
+      const Mat w = confidence(row_range, col_range).mul(
+        wd(window_top_shift, window_bottom_shift, window_left_shift, window_right_shift, sigma_dist)).mul( 
+        wc(image, r, c, window_top_shift, window_bottom_shift, window_left_shift, window_right_shift, sigma_color));
+
+      const double w_sum = sum(w)[0];
+      double new_u, new_v;
+      if (fabs(w_sum) < 1e-9) {
+        new_u = flow.u.at<double>(r, c);
+        new_v = flow.v.at<double>(r, c);
+      } else {
+        new_u = sum(flow.u(row_range, col_range).mul(w))[0] / w_sum;
+        new_v = sum(flow.v(row_range, col_range).mul(w))[0] / w_sum;
+      }
+      
+      for (int dr = 0; dr <= 1; ++dr) {
+        int nr = 2*r + dr;
+        for (int dc = 0; dc <= 1; ++dc) {
+          int nc = 2*c + dc;
+          if (nr < new_rows && nc < new_cols) {
+            new_flow.u.at<double>(nr, nc) = 2 * new_u;
+            new_flow.v.at<double>(nr, nc) = 2 * new_v;
+          }
+        }
+      }
+    }
+  }
+  return new_flow;
+}
+
+static Mat calcIrregularityMat(const Flow& flow, int radius) {
+  const int rows = flow.u.rows;
+  const int cols = flow.v.cols;
+  Mat irregularity = Mat::zeros(rows, cols, CV_64F);
+  for (int r = 0; r < rows; ++r) {
+    const int start_row = max(0, r - radius);
+    const int end_row = min(rows - 1, r + radius);
+    for (int c = 0; c < cols; ++c) {
+      const int start_col = max(0, c - radius);
+      const int end_col = min(cols - 1, c + radius);
+      for (int dr = start_row; dr <= end_row; ++dr) {
+        for (int dc = start_col; dc <= end_col; ++dc) {
+          const double diff = dist(flow.u.at<double>(r, c), flow.v.at<double>(r, c), 
+                                   flow.u.at<double>(dr, dc), flow.v.at<double>(dr, dc));
+          if (diff > irregularity.at<double>(r, c)) {
+            irregularity.at<double>(r, c) = diff;
+          }
+        }
+      }
+    }
+  }
+  return irregularity;
+}
+
+static void selectPointsToRecalcFlow(const Flow& flow, 
+                                     int irregularity_metric_radius,
+                                     int speed_up_thr,
+                                     int curr_rows,
+                                     int curr_cols,
+                                     const Mat& prev_speed_up,
+                                     Mat& speed_up,
+                                     Mat& mask) {
+  const int prev_rows = flow.u.rows;
+  const int prev_cols = flow.v.cols;
+
+  Mat is_flow_regular = calcIrregularityMat(flow, 
+                                                irregularity_metric_radius)
+                              < speed_up_thr;
+  Mat done = Mat::zeros(prev_rows, prev_cols, CV_8U);
+  speed_up = Mat::zeros(curr_rows, curr_cols, CV_8U);
+  mask = Mat::zeros(curr_rows, curr_cols, CV_8U);
+
+  for (int r = 0; r < is_flow_regular.rows; ++r) {
+    for (int c = 0; c < is_flow_regular.cols; ++c) {
+      if (!done.at<uchar>(r, c)) {
+        if (is_flow_regular.at<uchar>(r, c) && 
+            2*r + 1 < curr_rows && 2*c + 1< curr_cols) {
+
+          bool all_flow_in_region_regular = true;
+          int speed_up_at_this_point = prev_speed_up.at<uchar>(r, c);
+          int step = (1 << speed_up_at_this_point) - 1;
+          int prev_top = r;
+          int prev_bottom = std::min(r + step, prev_rows - 1);
+          int prev_left = c;
+          int prev_right = std::min(c + step, prev_cols - 1);
+
+          for (int rr = prev_top; rr <= prev_bottom; ++rr) {
+            for (int cc = prev_left; cc <= prev_right; ++cc) {
+              done.at<uchar>(rr, cc) = 1;
+              if (!is_flow_regular.at<uchar>(rr, cc)) {
+                all_flow_in_region_regular = false;
+              }
+            }
+          }
+
+          int curr_top = std::min(2 * r, curr_rows - 1);
+          int curr_bottom = std::min(2*(r + step) + 1, curr_rows - 1);
+          int curr_left = std::min(2 * c, curr_cols - 1);
+          int curr_right = std::min(2*(c + step) + 1, curr_cols - 1);
+
+          if (all_flow_in_region_regular && 
+              curr_top != curr_bottom &&
+              curr_left != curr_right) {
+            mask.at<uchar>(curr_top, curr_left) = MASK_TRUE_VALUE;
+            mask.at<uchar>(curr_bottom, curr_left) = MASK_TRUE_VALUE;
+            mask.at<uchar>(curr_top, curr_right) = MASK_TRUE_VALUE;
+            mask.at<uchar>(curr_bottom, curr_right) = MASK_TRUE_VALUE;
+            for (int rr = curr_top; rr <= curr_bottom; ++rr) {
+              for (int cc = curr_left; cc <= curr_right; ++cc) {
+                speed_up.at<uchar>(rr, cc) = speed_up_at_this_point + 1; 
+              }
+            }
+          } else {
+            for (int rr = curr_top; rr <= curr_bottom; ++rr) {
+              for (int cc = curr_left; cc <= curr_right; ++cc) {
+                mask.at<uchar>(rr, cc) = MASK_TRUE_VALUE;
+              }
+            }
+          }
+        } else {
+          done.at<uchar>(r, c) = 1;
+          for (int dr = 0; dr <= 1; ++dr) {
+            int nr = 2*r + dr;
+            for (int dc = 0; dc <= 1; ++dc) {
+              int nc = 2*c + dc;
+              if (nr < curr_rows && nc < curr_cols) {
+                mask.at<uchar>(nr, nc) = MASK_TRUE_VALUE;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+static inline double extrapolateValueInRect(int height, int width,
+                                            double v11, double v12,
+                                            double v21, double v22,
+                                            int r, int c) {
+  if (r == 0 && c == 0) { return v11;}
+  if (r == 0 && c == width) { return v12;}
+  if (r == height && c == 0) { return v21;}
+  if (r == height && c == width) { return v22;}
+  
+  double qr = double(r) / height;
+  double pr = 1.0 - qr;
+  double qc = double(c) / width;
+  double pc = 1.0 - qc;
+
+  return v11*pr*pc + v12*pr*qc + v21*qr*pc + v22*qc*qr; 
+}
+                                              
+static void extrapolateFlow(Flow& flow,
+                            const Mat& speed_up) {
+  const int rows = flow.u.rows;
+  const int cols = flow.u.cols;
+  Mat done = Mat::zeros(rows, cols, CV_8U);
+  for (int r = 0; r < rows; ++r) {
+    for (int c = 0; c < cols; ++c) {
+      if (!done.at<uchar>(r, c) && speed_up.at<uchar>(r, c) > 1) {
+        int step = (1 << speed_up.at<uchar>(r, c)) - 1;
+        int top = r;
+        int bottom = std::min(r + step, rows - 1);
+        int left = c;
+        int right = std::min(c + step, cols - 1);
+
+        int height = bottom - top;
+        int width = right - left;
+        for (int rr = top; rr <= bottom; ++rr) {
+          for (int cc = left; cc <= right; ++cc) {
+            done.at<uchar>(rr, cc) = 1;
+            flow.u.at<double>(rr, cc) = extrapolateValueInRect(
+                                          height, width, 
+                                          flow.u.at<double>(top, left),
+                                          flow.u.at<double>(top, right),
+                                          flow.u.at<double>(bottom, left),
+                                          flow.u.at<double>(bottom, right),
+                                          rr-top, cc-left); 
+
+            flow.v.at<double>(rr, cc) = extrapolateValueInRect(
+                                          height, width, 
+                                          flow.v.at<double>(top, left),
+                                          flow.v.at<double>(top, right),
+                                          flow.v.at<double>(bottom, left),
+                                          flow.v.at<double>(bottom, right),
+                                          rr-top, cc-left); 
+          }
+        }
+      }
+    }
+  }
+}
+
+static void buildPyramidWithResizeMethod(Mat& src,
+                                  vector<Mat>& pyramid,
+                                  int layers,
+                                  int interpolation_type) {
+  pyramid.push_back(src);
+  for (int i = 1; i <= layers; ++i) {
+    Mat prev = pyramid[i - 1];
+    if (prev.rows <= 1 || prev.cols <= 1) {
+      break;
+    }
+
+    Mat next;
+    resize(prev, next, Size((prev.cols + 1) / 2, (prev.rows + 1) / 2), 0, 0, interpolation_type);
+    pyramid.push_back(next);
+  }
+}
+
+static Flow calcOpticalFlowSF(Mat& from, 
+                       Mat& to, 
+                       int layers,
+                       int averaging_block_size, 
+                       int max_flow,
+                       double sigma_dist,
+                       double sigma_color,
+                       int postprocess_window, 
+                       double sigma_dist_fix, 
+                       double sigma_color_fix,
+                       double occ_thr,
+                       int upscale_averaging_radius,
+                       double upscale_sigma_dist,
+                       double upscale_sigma_color,
+                       double speed_up_thr) {
+  vector<Mat> pyr_from_images;
+  vector<Mat> pyr_to_images;
+
+  buildPyramidWithResizeMethod(from, pyr_from_images, layers - 1, INTER_CUBIC);
+  buildPyramidWithResizeMethod(to, pyr_to_images, layers - 1, INTER_CUBIC);
+//  buildPyramid(from, pyr_from_images, layers - 1, BORDER_WRAP);
+//  buildPyramid(to, pyr_to_images, layers - 1, BORDER_WRAP);
+
+  if ((int)pyr_from_images.size() != layers) {
+      exit(1);
+  }
+
+  if ((int)pyr_to_images.size() != layers) {
+      exit(1);
+  }
+
+  Mat first_from_image = pyr_from_images[layers - 1];
+  Mat first_to_image = pyr_to_images[layers - 1];
+
+  Mat mask = Mat::ones(first_from_image.rows, first_from_image.cols, CV_8U);
+  Mat mask_inv = Mat::ones(first_from_image.rows, first_from_image.cols, CV_8U);
+
+  Flow flow(first_from_image.rows, first_from_image.cols);
+  Flow flow_inv(first_to_image.rows, first_to_image.cols);
+
+  Mat confidence;
+  Mat confidence_inv;
+
+  calcOpticalFlowSingleScaleSF(first_from_image, 
+                               first_to_image, 
+                               mask,
+                               flow,
+                               confidence,
+                               averaging_block_size, 
+                               max_flow, 
+                               sigma_dist, 
+                               sigma_color);
+
+  calcOpticalFlowSingleScaleSF(first_to_image, 
+                               first_from_image, 
+                               mask_inv,
+                               flow_inv,
+                               confidence_inv,
+                               averaging_block_size, 
+                               max_flow, 
+                               sigma_dist, 
+                               sigma_color);
+
+  removeOcclusions(flow, 
+                   flow_inv,
+                   occ_thr,
+                   confidence);
+
+  removeOcclusions(flow_inv, 
+                   flow,
+                   occ_thr,
+                   confidence_inv);
+
+  Mat speed_up = Mat::zeros(first_from_image.rows, first_from_image.cols, CV_8U);
+  Mat speed_up_inv = Mat::zeros(first_from_image.rows, first_from_image.cols, CV_8U);
+
+  for (int curr_layer = layers - 2; curr_layer >= 0; --curr_layer) {
+    const Mat curr_from = pyr_from_images[curr_layer];
+    const Mat curr_to = pyr_to_images[curr_layer];
+    const Mat prev_from = pyr_from_images[curr_layer + 1];
+    const Mat prev_to = pyr_to_images[curr_layer + 1];
+
+    const int curr_rows = curr_from.rows;
+    const int curr_cols = curr_from.cols;
+
+    Mat new_speed_up, new_speed_up_inv;
+
+    selectPointsToRecalcFlow(flow,
+                             averaging_block_size,
+                             speed_up_thr,
+                             curr_rows,
+                             curr_cols,
+                             speed_up,
+                             new_speed_up,
+                             mask);
+
+    int points_to_recalculate = sum(mask)[0] / MASK_TRUE_VALUE;
+
+    selectPointsToRecalcFlow(flow_inv,
+                             averaging_block_size,
+                             speed_up_thr,
+                             curr_rows,
+                             curr_cols,
+                             speed_up_inv,
+                             new_speed_up_inv,
+                             mask_inv);
+
+    points_to_recalculate = sum(mask_inv)[0] / MASK_TRUE_VALUE;
+
+    speed_up = new_speed_up;
+    speed_up_inv = new_speed_up_inv;
+
+    flow = upscaleOpticalFlow(curr_rows,
+                              curr_cols,
+                              prev_from,
+                              confidence,
+                              flow, 
+                              upscale_averaging_radius,
+                              upscale_sigma_dist,
+                              upscale_sigma_color);
+
+    flow_inv = upscaleOpticalFlow(curr_rows,
+                                  curr_cols,
+                                  prev_to,
+                                  confidence_inv,
+                                  flow_inv,
+                                  upscale_averaging_radius,
+                                  upscale_sigma_dist,
+                                  upscale_sigma_color);
+
+    calcOpticalFlowSingleScaleSF(curr_from, 
+                                 curr_to, 
+                                 mask,
+                                 flow,
+                                 confidence,
+                                 averaging_block_size, 
+                                 max_flow, 
+                                 sigma_dist, 
+                                 sigma_color);
+
+    calcOpticalFlowSingleScaleSF(curr_to,
+                                 curr_from, 
+                                 mask_inv,
+                                 flow_inv,
+                                 confidence_inv,
+                                 averaging_block_size, 
+                                 max_flow, 
+                                 sigma_dist, 
+                                 sigma_color);
+
+    extrapolateFlow(flow, speed_up);
+    extrapolateFlow(flow_inv, speed_up_inv);
+
+    removeOcclusions(flow, flow_inv, occ_thr, confidence);
+    removeOcclusions(flow_inv, flow, occ_thr, confidence_inv);
+  }
+
+  WeightedCrossBilateralFilter filter_postprocess(pyr_from_images[0], 
+                                                  postprocess_window,
+                                                  sigma_dist_fix,
+                                                  sigma_color_fix);
+
+  flow.u = filter_postprocess.apply(flow.u, confidence);
+  flow.v = filter_postprocess.apply(flow.v, confidence);
+
+  Mat blured_u, blured_v;
+  GaussianBlur(flow.u, blured_u, Size(3, 3), 5);
+  GaussianBlur(flow.v, blured_v, Size(3, 3), 5);
+
+  return Flow(blured_v, blured_u);
+}
+
+void calcOpticalFlowSF(Mat& from, 
+                       Mat& to, 
+                       Mat& flowX, 
+                       Mat& flowY,
+                       int layers,
+                       int averaging_block_size, 
+                       int max_flow,
+                       double sigma_dist,
+                       double sigma_color,
+                       int postprocess_window, 
+                       double sigma_dist_fix, 
+                       double sigma_color_fix,
+                       double occ_thr,
+                       int upscale_averaging_radius,
+                       double upscale_sigma_dist,
+                       double upscale_sigma_color,
+                       double speed_up_thr) {
+
+  Flow flow = calcOpticalFlowSF(from, to, 
+                                layers,
+                                averaging_block_size,
+                                max_flow,
+                                sigma_dist,
+                                sigma_color,
+                                postprocess_window,
+                                sigma_dist_fix,
+                                sigma_color_fix,
+                                occ_thr,
+                                upscale_averaging_radius,
+                                upscale_sigma_dist,
+                                upscale_sigma_color,
+                                speed_up_thr);
+  flowX = flow.u;
+  flowY = flow.v;
+}
+
+}
+
diff --git a/modules/video/src/simpleflow.hpp b/modules/video/src/simpleflow.hpp
new file mode 100644
index 0000000000..55052fd05b
--- /dev/null
+++ b/modules/video/src/simpleflow.hpp
@@ -0,0 +1,125 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_SIMPLEFLOW_H__
+#define __OPENCV_SIMPLEFLOW_H__
+
+#include <vector>
+
+using namespace std;
+
+#define MASK_TRUE_VALUE 255
+#define UNKNOWN_FLOW_THRESH 1e9
+
+namespace cv {
+
+struct Flow {
+  Mat u, v;
+
+  Flow() {;}
+
+  Flow(Mat& _u, Mat& _v)
+    : u(_u), v(_v) {;}
+  
+  Flow(int rows, int cols) {
+    u = Mat::zeros(rows, cols, CV_64F);
+    v = Mat::zeros(rows, cols, CV_64F);
+  }
+};
+
+inline static double dist(const Vec3b& p1, const Vec3b& p2) {
+  return (p1[0] - p2[0]) * (p1[0] - p2[0]) +
+         (p1[1] - p2[1]) * (p1[1] - p2[1]) +
+         (p1[2] - p2[2]) * (p1[2] - p2[2]);
+}
+
+inline static double dist(const Point2f& p1, const Point2f& p2) {
+  return (p1.x - p2.x) * (p1.x - p2.x) +
+         (p1.y - p2.y) * (p1.y - p2.y);
+}
+
+inline static double dist(double x1, double y1, double x2, double y2) {
+  return (x1 - x2) * (x1 - x2) +
+         (y1 - y2) * (y1 - y2);
+}
+
+inline static int dist(int x1, int y1, int x2, int y2) {
+  return (x1 - x2) * (x1 - x2) +
+         (y1 - y2) * (y1 - y2);
+}
+
+template<class T>
+inline static T min(T t1, T t2, T t3) {
+  return (t1 <= t2 && t1 <= t3) ? t1 : min(t2, t3);
+}
+
+template<class T>
+vector<vector<T> > build(int n, int m) {
+  vector<vector<T> > res(n);
+  for (int i = 0; i < n; ++i) {
+    res[i].resize(m, 0);
+  }
+  return res;
+}
+
+class WeightedCrossBilateralFilter {
+public:
+  WeightedCrossBilateralFilter(const Mat& _image,
+                       int _windowSize,
+                       double _sigmaDist,
+                       double _sigmaColor);
+
+  Mat apply(Mat& matrix, Mat& weights);
+
+private:
+  double convolution(Mat& matrix, int row, int col, Mat& weights);
+
+  Mat image;
+  int windowSize;
+  double sigmaDist, sigmaColor;
+
+  vector<double> expDist;
+  vector<vector<vector<vector<double> > > > wc;
+};
+}
+
+#endif
diff --git a/modules/video/test/test_simpleflow.cpp b/modules/video/test/test_simpleflow.cpp
new file mode 100644
index 0000000000..186ba8f56d
--- /dev/null
+++ b/modules/video/test/test_simpleflow.cpp
@@ -0,0 +1,193 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#include <string>
+
+using namespace std;
+
+/* ///////////////////// simpleflow_test ///////////////////////// */
+
+class CV_SimpleFlowTest : public cvtest::BaseTest
+{
+public:
+    CV_SimpleFlowTest();
+protected:
+    void run(int);
+};
+
+
+CV_SimpleFlowTest::CV_SimpleFlowTest() {}
+
+static void readOpticalFlowFromFile(FILE* file, cv::Mat& flowX, cv::Mat& flowY) {
+  char header[5];
+  if (fread(header, 1, 4, file) < 4 && (string)header != "PIEH") {
+    return;
+  }
+
+  int cols, rows;
+  if (fread(&cols, sizeof(int), 1, file) != 1||
+      fread(&rows, sizeof(int), 1, file) != 1) {
+    return;
+  }
+
+  flowX = cv::Mat::zeros(rows, cols, CV_64F);
+  flowY = cv::Mat::zeros(rows, cols, CV_64F);
+
+  for (int i = 0; i < rows; ++i) {
+    for (int j = 0; j < cols; ++j) {
+      float uPoint, vPoint;
+      if (fread(&uPoint, sizeof(float), 1, file) != 1 ||
+          fread(&vPoint, sizeof(float), 1, file) != 1) {
+        flowX.release();
+        flowY.release();
+        return;
+      }
+  
+      flowX.at<double>(i, j) = uPoint;
+      flowY.at<double>(i, j) = vPoint;
+    }
+  }
+}
+
+static bool isFlowCorrect(double u) {
+  return !isnan(u) && (fabs(u) < 1e9);
+}
+
+static double calc_rmse(cv::Mat flow1X, cv::Mat flow1Y, cv::Mat flow2X, cv::Mat flow2Y) {
+  long double sum;
+  int counter = 0;
+  const int rows = flow1X.rows;
+  const int cols = flow1X.cols;
+
+  for (int y = 0; y < rows; ++y) {
+    for (int x = 0; x < cols; ++x) {
+      double u1 = flow1X.at<double>(y, x);
+      double v1 = flow1Y.at<double>(y, x);
+      double u2 = flow2X.at<double>(y, x);
+      double v2 = flow2Y.at<double>(y, x);
+      if (isFlowCorrect(u1) && isFlowCorrect(u2) && isFlowCorrect(v1) && isFlowCorrect(v2)) {
+        sum += (u1-u2)*(u1-u2) + (v1-v2)*(v1-v2);
+        counter++;
+      }
+    }
+  }
+  return sqrt((double)sum / (1e-9 + counter));
+}
+
+void CV_SimpleFlowTest::run(int) {
+    int code = cvtest::TS::OK;
+    
+    const double MAX_RMSE = 0.6;
+    const string frame1_path = ts->get_data_path() + "optflow/RubberWhale1.png";
+    const string frame2_path = ts->get_data_path() + "optflow/RubberWhale2.png";
+    const string gt_flow_path = ts->get_data_path() + "optflow/RubberWhale.flo";
+
+    cv::Mat frame1 = cv::imread(frame1_path);
+    cv::Mat frame2 = cv::imread(frame2_path);
+
+    if (frame1.empty()) {
+      ts->printf(cvtest::TS::LOG, "could not read image %s\n", frame2_path.c_str());
+      ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
+      return;
+    }
+    
+    if (frame2.empty()) {
+      ts->printf(cvtest::TS::LOG, "could not read image %s\n", frame2_path.c_str());
+      ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
+      return;
+    }
+
+    if (frame1.rows != frame2.rows && frame1.cols != frame2.cols) {
+      ts->printf(cvtest::TS::LOG, "images should be of equal sizes (%s and %s)",
+                 frame1_path.c_str(), frame2_path.c_str());
+      ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
+      return;
+    }
+
+    if (frame1.type() != 16 || frame2.type() != 16) {
+      ts->printf(cvtest::TS::LOG, "images should be of equal type CV_8UC3 (%s and %s)",
+                 frame1_path.c_str(), frame2_path.c_str());
+      ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
+      return;
+    }
+
+    cv::Mat flowX_gt, flowY_gt;
+
+    FILE* gt_flow_file = fopen(gt_flow_path.c_str(), "rb");
+    if (gt_flow_file == NULL) {
+      ts->printf(cvtest::TS::LOG, "could not read ground-thuth flow from file %s",
+                 gt_flow_path.c_str());
+      ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
+      return;
+    }
+    readOpticalFlowFromFile(gt_flow_file, flowX_gt, flowY_gt);
+    if (flowX_gt.empty() || flowY_gt.empty()) {
+      ts->printf(cvtest::TS::LOG, "error while reading flow data from file %s",
+                 gt_flow_path.c_str());
+      ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
+      return;
+    }
+    fclose(gt_flow_file);
+
+    cv::Mat flowX, flowY;
+    cv::calcOpticalFlowSF(frame1, frame2, 
+                          flowX, flowY,
+                          3, 4, 2, 4.1, 25.5, 18, 55.0, 25.5, 0.35, 18, 55.0, 25.5, 10);
+
+    double rmse = calc_rmse(flowX_gt, flowY_gt, flowX, flowY);
+    
+    ts->printf(cvtest::TS::LOG, "Optical flow estimation RMSE for SimpleFlow algorithm : %lf\n",
+               rmse);
+
+    if (rmse > MAX_RMSE) {
+      ts->printf( cvtest::TS::LOG,
+                 "Too big rmse error : %lf ( >= %lf )\n", rmse, MAX_RMSE);
+      ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY);
+      return;
+    }
+}
+
+
+TEST(Video_OpticalFlowSimpleFlow, accuracy) { CV_SimpleFlowTest test; test.safe_run(); }
+
+/* End of file. */
diff --git a/samples/cpp/simpleflow_demo.cpp b/samples/cpp/simpleflow_demo.cpp
new file mode 100644
index 0000000000..6a195fe894
--- /dev/null
+++ b/samples/cpp/simpleflow_demo.cpp
@@ -0,0 +1,96 @@
+#include "opencv2/video/tracking.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "opencv2/highgui/highgui.hpp"
+
+#include <cstdio>
+#include <iostream>
+
+using namespace cv;
+using namespace std;
+
+static void help()
+{
+    // print a welcome message, and the OpenCV version
+    printf("This is a demo of SimpleFlow optical flow algorithm,\n"
+           "Using OpenCV version %s\n\n", CV_VERSION);
+
+    printf("Usage: simpleflow_demo frame1 frame2 output_flow"
+           "\nApplication will write estimated flow "
+           "\nbetween 'frame1' and 'frame2' in binary format"
+           "\ninto file 'output_flow'"
+           "\nThen one can use code from http://vision.middlebury.edu/flow/data/"
+           "\nto convert flow in binary file to image\n");
+}
+
+// binary file format for flow data specified here:
+// http://vision.middlebury.edu/flow/data/
+static void writeOpticalFlowToFile(const Mat& u, const Mat& v, FILE* file) {
+  int cols = u.cols;
+  int rows = u.rows;
+  
+  fprintf(file, "PIEH");
+   
+  if (fwrite(&cols, sizeof(int), 1, file) != 1 ||
+      fwrite(&rows, sizeof(int), 1, file) != 1) {
+    fprintf(stderr, "writeOpticalFlowToFile : problem writing header\n");
+    exit(1);
+  }
+
+  for (int i= 0; i < u.rows; ++i) {
+    for (int j = 0; j < u.cols; ++j) {
+      float uPoint = u.at<double>(i, j);
+      float vPoint = v.at<double>(i, j);
+
+      if (fwrite(&uPoint, sizeof(float), 1, file) != 1 ||
+          fwrite(&vPoint, sizeof(float), 1, file) != 1) {
+        fprintf(stderr, "writeOpticalFlowToFile : problem writing data\n");
+        exit(1);
+      }
+    }
+  }
+}
+int main(int argc, char** argv) {
+    help();
+
+    if (argc < 4) {
+      fprintf(stderr, "Wrong number of command line arguments : %d (expected %d)\n", argc, 4);
+      exit(1);
+    }
+    
+    Mat frame1 = imread(argv[1]);
+    Mat frame2 = imread(argv[2]);
+
+    if (frame1.empty() || frame2.empty()) {
+      fprintf(stderr, "simpleflow_demo : Images cannot be read\n");
+      exit(1);
+    }
+
+    if (frame1.rows != frame2.rows && frame1.cols != frame2.cols) {
+      fprintf(stderr, "simpleflow_demo : Images should be of equal sizes\n");
+      exit(1);
+    }
+
+    if (frame1.type() != 16 || frame2.type() != 16) {
+      fprintf(stderr, "simpleflow_demo : Images should be of equal type CV_8UC3\n");
+      exit(1);
+    }
+
+    printf("simpleflow_demo : Read two images of size [rows = %d, cols = %d]\n", 
+           frame1.rows, frame1.cols);
+
+    Mat flowX, flowY;
+
+    calcOpticalFlowSF(frame1, frame2, 
+                      flowX, flowY,
+                      3, 2, 4, 4.1, 25.5, 18, 55.0, 25.5, 0.35, 18, 55.0, 25.5, 10);
+
+  FILE* file = fopen(argv[3], "wb");
+  if (file == NULL) {
+    fprintf(stderr, "simpleflow_demo : Unable to open file '%s' for writing\n", argv[3]); 
+    exit(1);
+  }
+  printf("simpleflow_demo : Writing to file\n");
+  writeOpticalFlowToFile(flowX, flowY, file);
+  fclose(file);
+  return 0;
+}

From 5ee632fe90a5425532588bdd8b60e25c847b317b Mon Sep 17 00:00:00 2001
From: Yury Zemlyanskiy <yuri.zemlyanskiy@gmail.com>
Date: Mon, 27 Aug 2012 14:40:57 +0400
Subject: [PATCH 2/7] Updates for SimpleFlow algorithm

+ New format for flow data - CV_32C2
+ Memory optimization
+ Cross Bilateral Filter optimization
+ Minor optimizations
+ Sample for calcOpticalFlowSF improved
---
 .../video/include/opencv2/video/tracking.hpp  |   5 +-
 modules/video/src/simpleflow.cpp              | 450 +++++++-----------
 modules/video/src/simpleflow.hpp              |  53 +--
 modules/video/test/test_simpleflow.cpp        |  63 +--
 samples/cpp/simpleflow_demo.cpp               | 229 ++++++---
 5 files changed, 384 insertions(+), 416 deletions(-)

diff --git a/modules/video/include/opencv2/video/tracking.hpp b/modules/video/include/opencv2/video/tracking.hpp
index 85c18817a9..6800c63373 100644
--- a/modules/video/include/opencv2/video/tracking.hpp
+++ b/modules/video/include/opencv2/video/tracking.hpp
@@ -329,9 +329,8 @@ CV_EXPORTS_W Mat estimateRigidTransform( InputArray src, InputArray dst,
   
 //! computes dense optical flow using Simple Flow algorithm
 CV_EXPORTS_W void calcOpticalFlowSF(Mat& from, 
-                                    Mat& to, 
-                                    Mat& flowX,
-                                    Mat& flowY,
+                                    Mat& to,
+                                    Mat& flow,
                                     int layers,
                                     int averaging_block_size, 
                                     int max_flow,
diff --git a/modules/video/src/simpleflow.cpp b/modules/video/src/simpleflow.cpp
index 1fda3618c9..0cd320d6ee 100644
--- a/modules/video/src/simpleflow.cpp
+++ b/modules/video/src/simpleflow.cpp
@@ -54,193 +54,154 @@
 namespace cv
 {
 
-WeightedCrossBilateralFilter::WeightedCrossBilateralFilter(
-  const Mat& _image,
-  int _windowSize,
-  double _sigmaDist,
-  double _sigmaColor)
-  : image(_image), 
-    windowSize(_windowSize), 
-    sigmaDist(_sigmaDist), 
-    sigmaColor(_sigmaColor) {
-    
-  expDist.resize(2*windowSize*windowSize+1);
-  const double sigmaDistSqr = 2 * sigmaDist * sigmaDist;
-  for (int i = 0; i <= 2*windowSize*windowSize; ++i) {
-    expDist[i] = exp(-i/sigmaDistSqr);
-  }
-
-  const double sigmaColorSqr = 2 * sigmaColor * sigmaColor;
-  wc.resize(image.rows);
-  for (int row = 0; row < image.rows; ++row) {
-    wc[row].resize(image.cols);
-    for (int col = 0; col < image.cols; ++col) {
-      int beginRow = max(0, row - windowSize);
-      int beginCol = max(0, col - windowSize);
-      int endRow = min(image.rows - 1, row + windowSize);
-      int endCol = min(image.cols - 1, col + windowSize);
-      wc[row][col] = build<double>(endRow - beginRow + 1, endCol - beginCol + 1);
-
-      for (int r = beginRow; r <= endRow; ++r) {
-        for (int c = beginCol; c <= endCol; ++c) {
-          wc[row][col][r - beginRow][c - beginCol] = 
-            exp(-dist(image.at<Vec3b>(row, col), 
-                      image.at<Vec3b>(r, c)) 
-                / sigmaColorSqr);
-        }
-      }
-    }
-  }
-}
-
-Mat WeightedCrossBilateralFilter::apply(Mat& matrix, Mat& weights) {
-  int rows = matrix.rows;
-  int cols = matrix.cols;
-
-  Mat result = Mat::zeros(rows, cols, CV_64F);
-  for (int row = 0; row < rows; ++row) {
-    for(int col = 0; col < cols; ++col) {
-      result.at<double>(row, col) = 
-        convolution(matrix, row, col, weights);
-    }
-  }
-  return result;
-}
-
-double WeightedCrossBilateralFilter::convolution(Mat& matrix, 
-                                                 int row, int col, 
-                                                 Mat& weights) {
-  double result = 0, weightsSum = 0;
-  int beginRow = max(0, row - windowSize);
-  int beginCol = max(0, col - windowSize);
-  int endRow = min(matrix.rows - 1, row + windowSize);
-  int endCol = min(matrix.cols - 1, col + windowSize);
-  for (int r = beginRow; r <= endRow; ++r) {
-    double* ptr = matrix.ptr<double>(r);
-    for (int c = beginCol; c <= endCol; ++c) {
-      const double w = expDist[dist(row, col, r, c)] *
-                       wc[row][col][r - beginRow][c - beginCol] *
-                       weights.at<double>(r, c);
-      result += ptr[c] * w;
-      weightsSum += w;
-    }
-  }
-  return result / weightsSum;
-}
-
-static void removeOcclusions(const Flow& flow, 
-                             const Flow& flow_inv,
-                             double occ_thr,
+static void removeOcclusions(const Mat& flow, 
+                             const Mat& flow_inv,
+                             float occ_thr,
                              Mat& confidence) {
-  const int rows = flow.u.rows;
-  const int cols = flow.v.cols;
-  int occlusions = 0;
+  const int rows = flow.rows;
+  const int cols = flow.cols;
   for (int r = 0; r < rows; ++r) {
     for (int c = 0; c < cols; ++c) {
-      if (dist(flow.u.at<double>(r, c), flow.v.at<double>(r, c),
-               -flow_inv.u.at<double>(r, c), -flow_inv.v.at<double>(r, c)) > occ_thr) {
-        confidence.at<double>(r, c) = 0;
-        occlusions++;
+      if (dist(flow.at<Vec2f>(r, c), -flow_inv.at<Vec2f>(r, c)) > occ_thr) {
+        confidence.at<float>(r, c) = 0;
+      } else {
+        confidence.at<float>(r, c) = 1;
       }
     }
   }
 }
 
-static Mat wd(int top_shift, int bottom_shift, int left_shift, int right_shift, double sigma) {
-  const double factor = 1.0 / (2.0 * sigma * sigma);
-  Mat d = Mat(top_shift + bottom_shift + 1, right_shift + left_shift + 1, CV_64F);
+static void wd(Mat& d, int top_shift, int bottom_shift, int left_shift, int right_shift, float sigma) {
+  const float factor = 1.0 / (2.0 * sigma * sigma);
   for (int dr = -top_shift, r = 0; dr <= bottom_shift; ++dr, ++r) {
     for (int dc = -left_shift, c = 0; dc <= right_shift; ++dc, ++c) {
-      d.at<double>(r, c) = -(dr*dr + dc*dc) * factor;
+      d.at<float>(r, c) = -(dr*dr + dc*dc) * factor;
     }
   }
-  Mat ed;
-  exp(d, ed);
-  return ed;
+  exp(d, d);
 }
 
-static Mat wc(const Mat& image, int r0, int c0, int top_shift, int bottom_shift, int left_shift, int right_shift, double sigma) {
-  const double factor = 1.0 / (2.0 * sigma * sigma);
-  Mat d = Mat(top_shift + bottom_shift + 1, right_shift + left_shift + 1, CV_64F);
+static void wc(const Mat& image, Mat& d, int r0, int c0, 
+               int top_shift, int bottom_shift, int left_shift, int right_shift, float sigma) {
+  const float factor = 1.0 / (2.0 * sigma * sigma);
+  const Vec3b centeral_point = image.at<Vec3b>(r0, c0);
   for (int dr = r0-top_shift, r = 0; dr <= r0+bottom_shift; ++dr, ++r) {
+    const Vec3b *row = image.ptr<Vec3b>(dr); 
+    float *d_row = d.ptr<float>(r);
     for (int dc = c0-left_shift, c = 0; dc <= c0+right_shift; ++dc, ++c) {
-      d.at<double>(r, c) = -dist(image.at<Vec3b>(r0, c0), image.at<Vec3b>(dr, dc)) * factor;
+      d_row[c] = -dist(centeral_point, row[dc]) * factor;
     }
   }
-  Mat ed;
-  exp(d, ed);
-  return ed;
+  exp(d, d);
 }
 
-inline static void dist(const Mat& m1, const Mat& m2, Mat& result) {
+static void dist(const Mat& m1, const Mat& m2, Mat& result) {
   const int rows = m1.rows;
   const int cols = m1.cols;
   for (int r = 0; r < rows; ++r) {
     const Vec3b *m1_row = m1.ptr<Vec3b>(r);
     const Vec3b *m2_row = m2.ptr<Vec3b>(r);
-    double* row = result.ptr<double>(r);
+    float* row = result.ptr<float>(r);
     for (int c = 0; c < cols; ++c) {
       row[c] = dist(m1_row[c], m2_row[c]);
     }
   }
 }
 
+static void crossBilateralFilter(const Mat& image, const Mat& edge_image, const Mat confidence, Mat& dst, int d, float sigma_color, float sigma_space, bool flag=false) {
+  const int rows = image.rows;
+  const int cols = image.cols;
+  Mat image_extended, edge_image_extended, confidence_extended;
+  copyMakeBorder(image, image_extended, d, d, d, d, BORDER_DEFAULT);
+  copyMakeBorder(edge_image, edge_image_extended, d, d, d, d, BORDER_DEFAULT);
+  copyMakeBorder(confidence, confidence_extended, d, d, d, d, BORDER_CONSTANT, Scalar(0));
+  Mat weights_space(2*d+1, 2*d+1, CV_32F);
+  wd(weights_space, d, d, d, d, sigma_space);
+  Mat weights(2*d+1, 2*d+1, CV_32F);
+  Mat weighted_sum(2*d+1, 2*d+1, CV_32F);
+  
+
+  vector<Mat> image_extended_channels;
+  split(image_extended, image_extended_channels);
+
+  for (int row = 0; row < rows; ++row) {
+    for (int col = 0; col < cols; ++col) {
+      wc(edge_image_extended, weights, row+d, col+d, d, d, d, d, sigma_color);
+
+      Range window_rows(row,row+2*d+1);
+      Range window_cols(col,col+2*d+1);
+
+      multiply(weights, confidence_extended(window_rows, window_cols), weights);
+      multiply(weights, weights_space, weights);
+      float weights_sum = sum(weights)[0];
+
+      for (int ch = 0; ch < 2; ++ch) {
+        multiply(weights, image_extended_channels[ch](window_rows, window_cols), weighted_sum);
+        float total_sum = sum(weighted_sum)[0];
+
+        dst.at<Vec2f>(row, col)[ch] = (flag && fabs(weights_sum) < 1e-9) 
+          ? image.at<float>(row, col) 
+          : total_sum / weights_sum;
+      }
+    }
+  }
+}
+
 static void calcOpticalFlowSingleScaleSF(const Mat& prev, 
                                          const Mat& next,
                                          const Mat& mask,
-                                         Flow& flow,
+                                         Mat& flow,
                                          Mat& confidence,
                                          int averaging_radius, 
                                          int max_flow,
-                                         double sigma_dist,
-                                         double sigma_color) {
+                                         float sigma_dist,
+                                         float sigma_color) {
   const int rows = prev.rows;
   const int cols = prev.cols;
-  confidence = Mat::zeros(rows, cols, CV_64F);
+  confidence = Mat::zeros(rows, cols, CV_32F);
+  
+  Mat diff_storage(averaging_radius*2 + 1, averaging_radius*2 + 1, CV_32F);
+  Mat w_full_window(averaging_radius*2 + 1, averaging_radius*2 + 1, CV_32F);
+  Mat wd_full_window(averaging_radius*2 + 1, averaging_radius*2 + 1, CV_32F);
+  float w_full_window_sum;
+
+  Mat prev_extended;
+  copyMakeBorder(prev, prev_extended, 
+                 averaging_radius, averaging_radius, averaging_radius, averaging_radius,
+                 BORDER_DEFAULT);
+
+  wd(wd_full_window, averaging_radius, averaging_radius, averaging_radius, averaging_radius, sigma_dist);
 
   for (int r0 = 0; r0 < rows; ++r0) {
     for (int c0 = 0; c0 < cols; ++c0) {
-      int u0 = floor(flow.u.at<double>(r0, c0) + 0.5);
-      int v0 = floor(flow.v.at<double>(r0, c0) + 0.5);
+      Vec2f flow_at_point = flow.at<Vec2f>(r0, c0);
+      int u0 = floor(flow_at_point[0] + 0.5);
+      if (r0 + u0 < 0) { u0 = -r0; }
+      if (r0 + u0 >= rows) { u0 = rows - 1 - r0; }
+      int v0 = floor(flow_at_point[1] + 0.5);
+      if (c0 + v0 < 0) { v0 = -c0; }
+      if (c0 + v0 >= cols) { v0 = cols - 1 - c0; }
 
       const int min_row_shift = -min(r0 + u0, max_flow);
       const int max_row_shift = min(rows - 1 - (r0 + u0), max_flow);
       const int min_col_shift = -min(c0 + v0, max_flow);
       const int max_col_shift = min(cols - 1 - (c0 + v0), max_flow);
 
-      double min_cost = DBL_MAX, best_u = u0, best_v = v0;
-
-      Mat w_full_window;
-      double w_full_window_sum;
-      Mat diff_storage;
-
-      if (r0 - averaging_radius >= 0 && 
-          r0 + averaging_radius < rows &&
-          c0 - averaging_radius >= 0 &&
-          c0 + averaging_radius < cols &&
-          mask.at<uchar>(r0, c0)) {
-          w_full_window = wd(averaging_radius, 
-                             averaging_radius, 
-                             averaging_radius, 
-                             averaging_radius, 
-                             sigma_dist).mul(
-                          wc(prev, r0, c0, 
-                             averaging_radius, 
-                             averaging_radius, 
-                             averaging_radius, 
-                             averaging_radius, 
-                             sigma_color));
+      float min_cost = DBL_MAX, best_u = u0, best_v = v0;
 
+      if (mask.at<uchar>(r0, c0)) {
+          wc(prev_extended, w_full_window, r0 + averaging_radius, c0 + averaging_radius,
+             averaging_radius, averaging_radius, averaging_radius, averaging_radius, sigma_color);
+          multiply(w_full_window, wd_full_window, w_full_window);
           w_full_window_sum = sum(w_full_window)[0];
-          diff_storage = Mat::zeros(averaging_radius*2 + 1, averaging_radius*2 + 1, CV_64F);
       }
 
       bool first_flow_iteration = true;
-      double sum_e, min_e;
+      float sum_e, min_e;
 
       for (int u = min_row_shift; u <= max_row_shift; ++u) {
         for (int v = min_col_shift; v <= max_col_shift; ++v) {
-          double e = dist(prev.at<Vec3b>(r0, c0), next.at<Vec3b>(r0 + u0 + u, c0 + v0 + v));
+          float e = dist(prev.at<Vec3b>(r0, c0), next.at<Vec3b>(r0 + u0 + u, c0 + v0 + v));
           if (first_flow_iteration) {
             sum_e = e;
             min_e = e;
@@ -269,10 +230,11 @@ static void calcOpticalFlowSingleScaleSF(const Mat& prev,
                                      r0 + u0 + u + window_bottom_shift + 1);
           const Range next_col_range(c0 + v0 + v - window_left_shift, 
                                      c0 + v0 + v + window_right_shift + 1); 
-          
+         
+                     
           Mat diff2;
           Mat w;
-          double w_sum;
+          float w_sum;
           if (window_top_shift == averaging_radius &&
               window_bottom_shift == averaging_radius &&
               window_left_shift == averaging_radius &&
@@ -280,21 +242,23 @@ static void calcOpticalFlowSingleScaleSF(const Mat& prev,
             w = w_full_window;
             w_sum = w_full_window_sum;
             diff2 = diff_storage;
-
             dist(prev(prev_row_range, prev_col_range), next(next_row_range, next_col_range), diff2);
           } else {
-            diff2 = Mat::zeros(window_bottom_shift + window_top_shift + 1,  
-                                   window_right_shift + window_left_shift + 1, CV_64F);
+            diff2 = diff_storage(Range(averaging_radius - window_top_shift, 
+                                       averaging_radius + 1 + window_bottom_shift),
+                                 Range(averaging_radius - window_left_shift,
+                                       averaging_radius + 1 + window_right_shift));
             
             dist(prev(prev_row_range, prev_col_range), next(next_row_range, next_col_range), diff2);
-
-            w = wd(window_top_shift, window_bottom_shift, window_left_shift, window_right_shift, sigma_dist).mul( 
-                wc(prev, r0, c0, window_top_shift, window_bottom_shift, window_left_shift, window_right_shift, sigma_color));
+            w = w_full_window(Range(averaging_radius - window_top_shift, 
+                                    averaging_radius + 1 + window_bottom_shift),
+                              Range(averaging_radius - window_left_shift,
+                                    averaging_radius + 1 + window_right_shift));
             w_sum = sum(w)[0];
           }
           multiply(diff2, w, diff2);
       
-          const double cost = sum(diff2)[0] / w_sum;
+          const float cost = sum(diff2)[0] / w_sum;
           if (cost < min_cost) {
             min_cost = cost;
             best_u = u + u0;
@@ -302,72 +266,37 @@ static void calcOpticalFlowSingleScaleSF(const Mat& prev,
           }
         }
       }
-      int square = (max_row_shift - min_row_shift + 1) *
-                   (max_col_shift - min_col_shift + 1);
-      confidence.at<double>(r0, c0) = (square == 0) ? 0
-                                                   : sum_e / square - min_e;
+      int windows_square = (max_row_shift - min_row_shift + 1) *
+                           (max_col_shift - min_col_shift + 1);
+      confidence.at<float>(r0, c0) = (windows_square == 0) ? 0
+                                                           : sum_e / windows_square - min_e;
+      CV_Assert(confidence.at<float>(r0, c0) >= 0); // TODO: remove it after testing
       if (mask.at<uchar>(r0, c0)) {
-        flow.u.at<double>(r0, c0) = best_u;
-        flow.v.at<double>(r0, c0) = best_v;
+        flow.at<Vec2f>(r0, c0) = Vec2f(best_u, best_v);
       }
     }
   }
 }
 
-static Flow upscaleOpticalFlow(int new_rows, 
+static Mat upscaleOpticalFlow(int new_rows, 
                                int new_cols,
                                const Mat& image,
                                const Mat& confidence,
-                               const Flow& flow,
+                               Mat& flow,
                                int averaging_radius,
-                               double sigma_dist,
-                               double sigma_color) {
-  const int rows = image.rows;
-  const int cols = image.cols;
-  Flow new_flow(new_rows, new_cols);
-  for (int r = 0; r < rows; ++r) {
-    for (int c = 0; c < cols; ++c) {
-      const int window_top_shift = min(r, averaging_radius);
-      const int window_bottom_shift = min(rows - 1 - r, averaging_radius);
-      const int window_left_shift = min(c, averaging_radius);
-      const int window_right_shift = min(cols - 1 - c, averaging_radius);
-      
-      const Range row_range(r - window_top_shift, r + window_bottom_shift + 1);
-      const Range col_range(c - window_left_shift, c + window_right_shift + 1);
-      
-      const Mat w = confidence(row_range, col_range).mul(
-        wd(window_top_shift, window_bottom_shift, window_left_shift, window_right_shift, sigma_dist)).mul( 
-        wc(image, r, c, window_top_shift, window_bottom_shift, window_left_shift, window_right_shift, sigma_color));
-
-      const double w_sum = sum(w)[0];
-      double new_u, new_v;
-      if (fabs(w_sum) < 1e-9) {
-        new_u = flow.u.at<double>(r, c);
-        new_v = flow.v.at<double>(r, c);
-      } else {
-        new_u = sum(flow.u(row_range, col_range).mul(w))[0] / w_sum;
-        new_v = sum(flow.v(row_range, col_range).mul(w))[0] / w_sum;
-      }
-      
-      for (int dr = 0; dr <= 1; ++dr) {
-        int nr = 2*r + dr;
-        for (int dc = 0; dc <= 1; ++dc) {
-          int nc = 2*c + dc;
-          if (nr < new_rows && nc < new_cols) {
-            new_flow.u.at<double>(nr, nc) = 2 * new_u;
-            new_flow.v.at<double>(nr, nc) = 2 * new_v;
-          }
-        }
-      }
-    }
-  }
+                               float sigma_dist,
+                               float sigma_color) {
+  crossBilateralFilter(flow, image, confidence, flow, averaging_radius, sigma_color, sigma_dist, false);
+  Mat new_flow;
+  resize(flow, new_flow, Size(new_cols, new_rows), 0, 0, INTER_NEAREST);
+  new_flow *= 2;
   return new_flow;
 }
 
-static Mat calcIrregularityMat(const Flow& flow, int radius) {
-  const int rows = flow.u.rows;
-  const int cols = flow.v.cols;
-  Mat irregularity = Mat::zeros(rows, cols, CV_64F);
+static Mat calcIrregularityMat(const Mat& flow, int radius) {
+  const int rows = flow.rows;
+  const int cols = flow.cols;
+  Mat irregularity(rows, cols, CV_32F);
   for (int r = 0; r < rows; ++r) {
     const int start_row = max(0, r - radius);
     const int end_row = min(rows - 1, r + radius);
@@ -376,10 +305,9 @@ static Mat calcIrregularityMat(const Flow& flow, int radius) {
       const int end_col = min(cols - 1, c + radius);
       for (int dr = start_row; dr <= end_row; ++dr) {
         for (int dc = start_col; dc <= end_col; ++dc) {
-          const double diff = dist(flow.u.at<double>(r, c), flow.v.at<double>(r, c), 
-                                   flow.u.at<double>(dr, dc), flow.v.at<double>(dr, dc));
-          if (diff > irregularity.at<double>(r, c)) {
-            irregularity.at<double>(r, c) = diff;
+          const float diff = dist(flow.at<Vec2f>(r, c), flow.at<Vec2f>(dr, dc));
+          if (diff > irregularity.at<float>(r, c)) {
+            irregularity.at<float>(r, c) = diff;
           }
         }
       }
@@ -388,7 +316,7 @@ static Mat calcIrregularityMat(const Flow& flow, int radius) {
   return irregularity;
 }
 
-static void selectPointsToRecalcFlow(const Flow& flow, 
+static void selectPointsToRecalcFlow(const Mat& flow, 
                                      int irregularity_metric_radius,
                                      int speed_up_thr,
                                      int curr_rows,
@@ -396,11 +324,10 @@ static void selectPointsToRecalcFlow(const Flow& flow,
                                      const Mat& prev_speed_up,
                                      Mat& speed_up,
                                      Mat& mask) {
-  const int prev_rows = flow.u.rows;
-  const int prev_cols = flow.v.cols;
+  const int prev_rows = flow.rows;
+  const int prev_cols = flow.cols;
 
-  Mat is_flow_regular = calcIrregularityMat(flow, 
-                                                irregularity_metric_radius)
+  Mat is_flow_regular = calcIrregularityMat(flow, irregularity_metric_radius)
                               < speed_up_thr;
   Mat done = Mat::zeros(prev_rows, prev_cols, CV_8U);
   speed_up = Mat::zeros(curr_rows, curr_cols, CV_8U);
@@ -470,28 +397,28 @@ static void selectPointsToRecalcFlow(const Flow& flow,
   }
 }
 
-static inline double extrapolateValueInRect(int height, int width,
-                                            double v11, double v12,
-                                            double v21, double v22,
+static inline float extrapolateValueInRect(int height, int width,
+                                            float v11, float v12,
+                                            float v21, float v22,
                                             int r, int c) {
   if (r == 0 && c == 0) { return v11;}
   if (r == 0 && c == width) { return v12;}
   if (r == height && c == 0) { return v21;}
   if (r == height && c == width) { return v22;}
   
-  double qr = double(r) / height;
-  double pr = 1.0 - qr;
-  double qc = double(c) / width;
-  double pc = 1.0 - qc;
+  float qr = float(r) / height;
+  float pr = 1.0 - qr;
+  float qc = float(c) / width;
+  float pc = 1.0 - qc;
 
   return v11*pr*pc + v12*pr*qc + v21*qr*pc + v22*qc*qr; 
 }
                                               
-static void extrapolateFlow(Flow& flow,
+static void extrapolateFlow(Mat& flow,
                             const Mat& speed_up) {
-  const int rows = flow.u.rows;
-  const int cols = flow.u.cols;
-  Mat done = Mat::zeros(rows, cols, CV_8U);
+  const int rows = flow.rows;
+  const int cols = flow.cols;
+  Mat done(rows, cols, CV_8U);
   for (int r = 0; r < rows; ++r) {
     for (int c = 0; c < cols; ++c) {
       if (!done.at<uchar>(r, c) && speed_up.at<uchar>(r, c) > 1) {
@@ -506,21 +433,22 @@ static void extrapolateFlow(Flow& flow,
         for (int rr = top; rr <= bottom; ++rr) {
           for (int cc = left; cc <= right; ++cc) {
             done.at<uchar>(rr, cc) = 1;
-            flow.u.at<double>(rr, cc) = extrapolateValueInRect(
-                                          height, width, 
-                                          flow.u.at<double>(top, left),
-                                          flow.u.at<double>(top, right),
-                                          flow.u.at<double>(bottom, left),
-                                          flow.u.at<double>(bottom, right),
-                                          rr-top, cc-left); 
-
-            flow.v.at<double>(rr, cc) = extrapolateValueInRect(
-                                          height, width, 
-                                          flow.v.at<double>(top, left),
-                                          flow.v.at<double>(top, right),
-                                          flow.v.at<double>(bottom, left),
-                                          flow.v.at<double>(bottom, right),
-                                          rr-top, cc-left); 
+            Vec2f flow_at_point;
+            Vec2f top_left = flow.at<Vec2f>(top, left);
+            Vec2f top_right = flow.at<Vec2f>(top, right);
+            Vec2f bottom_left = flow.at<Vec2f>(bottom, left);
+            Vec2f bottom_right = flow.at<Vec2f>(bottom, right);
+
+            flow_at_point[0] = extrapolateValueInRect(height, width, 
+                                                      top_left[0], top_right[0],
+                                                      bottom_left[0], bottom_right[0],
+                                                      rr-top, cc-left); 
+
+            flow_at_point[1] = extrapolateValueInRect(height, width, 
+                                                      top_left[1], top_right[1],
+                                                      bottom_left[1], bottom_right[1],
+                                                      rr-top, cc-left); 
+            flow.at<Vec2f>(rr, cc) = flow_at_point;
           }
         }
       }
@@ -545,8 +473,9 @@ static void buildPyramidWithResizeMethod(Mat& src,
   }
 }
 
-static Flow calcOpticalFlowSF(Mat& from, 
+void calcOpticalFlowSF(Mat& from, 
                        Mat& to, 
+                       Mat& resulted_flow,
                        int layers,
                        int averaging_block_size, 
                        int max_flow,
@@ -565,8 +494,6 @@ static Flow calcOpticalFlowSF(Mat& from,
 
   buildPyramidWithResizeMethod(from, pyr_from_images, layers - 1, INTER_CUBIC);
   buildPyramidWithResizeMethod(to, pyr_to_images, layers - 1, INTER_CUBIC);
-//  buildPyramid(from, pyr_from_images, layers - 1, BORDER_WRAP);
-//  buildPyramid(to, pyr_to_images, layers - 1, BORDER_WRAP);
 
   if ((int)pyr_from_images.size() != layers) {
       exit(1);
@@ -582,8 +509,8 @@ static Flow calcOpticalFlowSF(Mat& from,
   Mat mask = Mat::ones(first_from_image.rows, first_from_image.cols, CV_8U);
   Mat mask_inv = Mat::ones(first_from_image.rows, first_from_image.cols, CV_8U);
 
-  Flow flow(first_from_image.rows, first_from_image.cols);
-  Flow flow_inv(first_to_image.rows, first_to_image.cols);
+  Mat flow(first_from_image.rows, first_from_image.cols, CV_32FC2);
+  Mat flow_inv(first_to_image.rows, first_to_image.cols, CV_32FC2);
 
   Mat confidence;
   Mat confidence_inv;
@@ -641,8 +568,6 @@ static Flow calcOpticalFlowSF(Mat& from,
                              new_speed_up,
                              mask);
 
-    int points_to_recalculate = sum(mask)[0] / MASK_TRUE_VALUE;
-
     selectPointsToRecalcFlow(flow_inv,
                              averaging_block_size,
                              speed_up_thr,
@@ -652,8 +577,6 @@ static Flow calcOpticalFlowSF(Mat& from,
                              new_speed_up_inv,
                              mask_inv);
 
-    points_to_recalculate = sum(mask_inv)[0] / MASK_TRUE_VALUE;
-
     speed_up = new_speed_up;
     speed_up_inv = new_speed_up_inv;
 
@@ -702,55 +625,14 @@ static Flow calcOpticalFlowSF(Mat& from,
     removeOcclusions(flow_inv, flow, occ_thr, confidence_inv);
   }
 
-  WeightedCrossBilateralFilter filter_postprocess(pyr_from_images[0], 
-                                                  postprocess_window,
-                                                  sigma_dist_fix,
-                                                  sigma_color_fix);
+  crossBilateralFilter(flow, pyr_from_images[0], confidence, flow, 
+                       postprocess_window, sigma_color_fix, sigma_dist_fix);
 
-  flow.u = filter_postprocess.apply(flow.u, confidence);
-  flow.v = filter_postprocess.apply(flow.v, confidence);
-
-  Mat blured_u, blured_v;
-  GaussianBlur(flow.u, blured_u, Size(3, 3), 5);
-  GaussianBlur(flow.v, blured_v, Size(3, 3), 5);
-
-  return Flow(blured_v, blured_u);
-}
-
-void calcOpticalFlowSF(Mat& from, 
-                       Mat& to, 
-                       Mat& flowX, 
-                       Mat& flowY,
-                       int layers,
-                       int averaging_block_size, 
-                       int max_flow,
-                       double sigma_dist,
-                       double sigma_color,
-                       int postprocess_window, 
-                       double sigma_dist_fix, 
-                       double sigma_color_fix,
-                       double occ_thr,
-                       int upscale_averaging_radius,
-                       double upscale_sigma_dist,
-                       double upscale_sigma_color,
-                       double speed_up_thr) {
+  GaussianBlur(flow, flow, Size(3, 3), 5);
 
-  Flow flow = calcOpticalFlowSF(from, to, 
-                                layers,
-                                averaging_block_size,
-                                max_flow,
-                                sigma_dist,
-                                sigma_color,
-                                postprocess_window,
-                                sigma_dist_fix,
-                                sigma_color_fix,
-                                occ_thr,
-                                upscale_averaging_radius,
-                                upscale_sigma_dist,
-                                upscale_sigma_color,
-                                speed_up_thr);
-  flowX = flow.u;
-  flowY = flow.v;
+  resulted_flow = Mat(flow.size(), CV_32FC2);
+  int from_to[] = {0,1 , 1,0};
+  mixChannels(&flow, 1, &resulted_flow, 1, from_to, 2);
 }
 
 }
diff --git a/modules/video/src/simpleflow.hpp b/modules/video/src/simpleflow.hpp
index 55052fd05b..c4aa02355a 100644
--- a/modules/video/src/simpleflow.hpp
+++ b/modules/video/src/simpleflow.hpp
@@ -52,32 +52,23 @@ using namespace std;
 
 namespace cv {
 
-struct Flow {
-  Mat u, v;
-
-  Flow() {;}
-
-  Flow(Mat& _u, Mat& _v)
-    : u(_u), v(_v) {;}
-  
-  Flow(int rows, int cols) {
-    u = Mat::zeros(rows, cols, CV_64F);
-    v = Mat::zeros(rows, cols, CV_64F);
-  }
-};
-
-inline static double dist(const Vec3b& p1, const Vec3b& p2) {
+inline static float dist(const Vec3b& p1, const Vec3b& p2) {
   return (p1[0] - p2[0]) * (p1[0] - p2[0]) +
          (p1[1] - p2[1]) * (p1[1] - p2[1]) +
          (p1[2] - p2[2]) * (p1[2] - p2[2]);
 }
 
-inline static double dist(const Point2f& p1, const Point2f& p2) {
+inline static float dist(const Vec2f& p1, const Vec2f& p2) {
+  return (p1[0] - p2[0]) * (p1[0] - p2[0]) +
+         (p1[1] - p2[1]) * (p1[1] - p2[1]);
+}
+
+inline static float dist(const Point2f& p1, const Point2f& p2) {
   return (p1.x - p2.x) * (p1.x - p2.x) +
          (p1.y - p2.y) * (p1.y - p2.y);
 }
 
-inline static double dist(double x1, double y1, double x2, double y2) {
+inline static float dist(float x1, float y1, float x2, float y2) {
   return (x1 - x2) * (x1 - x2) +
          (y1 - y2) * (y1 - y2);
 }
@@ -92,34 +83,6 @@ inline static T min(T t1, T t2, T t3) {
   return (t1 <= t2 && t1 <= t3) ? t1 : min(t2, t3);
 }
 
-template<class T>
-vector<vector<T> > build(int n, int m) {
-  vector<vector<T> > res(n);
-  for (int i = 0; i < n; ++i) {
-    res[i].resize(m, 0);
-  }
-  return res;
-}
-
-class WeightedCrossBilateralFilter {
-public:
-  WeightedCrossBilateralFilter(const Mat& _image,
-                       int _windowSize,
-                       double _sigmaDist,
-                       double _sigmaColor);
-
-  Mat apply(Mat& matrix, Mat& weights);
-
-private:
-  double convolution(Mat& matrix, int row, int col, Mat& weights);
-
-  Mat image;
-  int windowSize;
-  double sigmaDist, sigmaColor;
-
-  vector<double> expDist;
-  vector<vector<vector<vector<double> > > > wc;
-};
 }
 
 #endif
diff --git a/modules/video/test/test_simpleflow.cpp b/modules/video/test/test_simpleflow.cpp
index 186ba8f56d..050d595d1c 100644
--- a/modules/video/test/test_simpleflow.cpp
+++ b/modules/video/test/test_simpleflow.cpp
@@ -58,66 +58,67 @@ protected:
 
 CV_SimpleFlowTest::CV_SimpleFlowTest() {}
 
-static void readOpticalFlowFromFile(FILE* file, cv::Mat& flowX, cv::Mat& flowY) {
+static bool readOpticalFlowFromFile(FILE* file, cv::Mat& flow) {
   char header[5];
   if (fread(header, 1, 4, file) < 4 && (string)header != "PIEH") {
-    return;
+    return false;
   }
 
   int cols, rows;
   if (fread(&cols, sizeof(int), 1, file) != 1||
       fread(&rows, sizeof(int), 1, file) != 1) {
-    return;
+    return false;
   }
 
-  flowX = cv::Mat::zeros(rows, cols, CV_64F);
-  flowY = cv::Mat::zeros(rows, cols, CV_64F);
+  flow = cv::Mat::zeros(rows, cols, CV_32FC2);
 
   for (int i = 0; i < rows; ++i) {
     for (int j = 0; j < cols; ++j) {
-      float uPoint, vPoint;
-      if (fread(&uPoint, sizeof(float), 1, file) != 1 ||
-          fread(&vPoint, sizeof(float), 1, file) != 1) {
-        flowX.release();
-        flowY.release();
-        return;
+      cv::Vec2f flow_at_point;
+      if (fread(&(flow_at_point[0]), sizeof(float), 1, file) != 1 ||
+          fread(&(flow_at_point[1]), sizeof(float), 1, file) != 1) {
+        return false;
       }
-  
-      flowX.at<double>(i, j) = uPoint;
-      flowY.at<double>(i, j) = vPoint;
+      flow.at<cv::Vec2f>(i, j) = flow_at_point;
     }
   }
+
+  return true;
 }
 
-static bool isFlowCorrect(double u) {
+static bool isFlowCorrect(float u) {
   return !isnan(u) && (fabs(u) < 1e9);
 }
 
-static double calc_rmse(cv::Mat flow1X, cv::Mat flow1Y, cv::Mat flow2X, cv::Mat flow2Y) {
-  long double sum;
+static float calc_rmse(cv::Mat flow1, cv::Mat flow2) {
+  float sum;
   int counter = 0;
-  const int rows = flow1X.rows;
-  const int cols = flow1X.cols;
+  const int rows = flow1.rows;
+  const int cols = flow1.cols;
 
   for (int y = 0; y < rows; ++y) {
     for (int x = 0; x < cols; ++x) {
-      double u1 = flow1X.at<double>(y, x);
-      double v1 = flow1Y.at<double>(y, x);
-      double u2 = flow2X.at<double>(y, x);
-      double v2 = flow2Y.at<double>(y, x);
+      cv::Vec2f flow1_at_point = flow1.at<cv::Vec2f>(y, x);
+      cv::Vec2f flow2_at_point = flow2.at<cv::Vec2f>(y, x);
+
+      float u1 = flow1_at_point[0];
+      float v1 = flow1_at_point[1];
+      float u2 = flow2_at_point[0];
+      float v2 = flow2_at_point[1];
+
       if (isFlowCorrect(u1) && isFlowCorrect(u2) && isFlowCorrect(v1) && isFlowCorrect(v2)) {
         sum += (u1-u2)*(u1-u2) + (v1-v2)*(v1-v2);
         counter++;
       }
     }
   }
-  return sqrt((double)sum / (1e-9 + counter));
+  return sqrt(sum / (1e-9 + counter));
 }
 
 void CV_SimpleFlowTest::run(int) {
     int code = cvtest::TS::OK;
     
-    const double MAX_RMSE = 0.6;
+    const float MAX_RMSE = 0.6;
     const string frame1_path = ts->get_data_path() + "optflow/RubberWhale1.png";
     const string frame2_path = ts->get_data_path() + "optflow/RubberWhale2.png";
     const string gt_flow_path = ts->get_data_path() + "optflow/RubberWhale.flo";
@@ -151,7 +152,7 @@ void CV_SimpleFlowTest::run(int) {
       return;
     }
 
-    cv::Mat flowX_gt, flowY_gt;
+    cv::Mat flow_gt;
 
     FILE* gt_flow_file = fopen(gt_flow_path.c_str(), "rb");
     if (gt_flow_file == NULL) {
@@ -160,8 +161,8 @@ void CV_SimpleFlowTest::run(int) {
       ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
       return;
     }
-    readOpticalFlowFromFile(gt_flow_file, flowX_gt, flowY_gt);
-    if (flowX_gt.empty() || flowY_gt.empty()) {
+
+    if (!readOpticalFlowFromFile(gt_flow_file, flow_gt)) {
       ts->printf(cvtest::TS::LOG, "error while reading flow data from file %s",
                  gt_flow_path.c_str());
       ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
@@ -169,12 +170,12 @@ void CV_SimpleFlowTest::run(int) {
     }
     fclose(gt_flow_file);
 
-    cv::Mat flowX, flowY;
+    cv::Mat flow;
     cv::calcOpticalFlowSF(frame1, frame2, 
-                          flowX, flowY,
+                          flow,
                           3, 4, 2, 4.1, 25.5, 18, 55.0, 25.5, 0.35, 18, 55.0, 25.5, 10);
 
-    double rmse = calc_rmse(flowX_gt, flowY_gt, flowX, flowY);
+    float rmse = calc_rmse(flow_gt, flow);
     
     ts->printf(cvtest::TS::LOG, "Optical flow estimation RMSE for SimpleFlow algorithm : %lf\n",
                rmse);
diff --git a/samples/cpp/simpleflow_demo.cpp b/samples/cpp/simpleflow_demo.cpp
index 6a195fe894..332df7821e 100644
--- a/samples/cpp/simpleflow_demo.cpp
+++ b/samples/cpp/simpleflow_demo.cpp
@@ -8,89 +8,212 @@
 using namespace cv;
 using namespace std;
 
+#define APP_NAME "simpleflow_demo : "
+
 static void help()
 {
-    // print a welcome message, and the OpenCV version
-    printf("This is a demo of SimpleFlow optical flow algorithm,\n"
-           "Using OpenCV version %s\n\n", CV_VERSION);
-
-    printf("Usage: simpleflow_demo frame1 frame2 output_flow"
-           "\nApplication will write estimated flow "
-           "\nbetween 'frame1' and 'frame2' in binary format"
-           "\ninto file 'output_flow'"
-           "\nThen one can use code from http://vision.middlebury.edu/flow/data/"
-           "\nto convert flow in binary file to image\n");
+  // print a welcome message, and the OpenCV version
+  printf("This is a demo of SimpleFlow optical flow algorithm,\n"
+         "Using OpenCV version %s\n\n", CV_VERSION);
+
+  printf("Usage: simpleflow_demo frame1 frame2 output_flow"
+         "\nApplication will write estimated flow "
+         "\nbetween 'frame1' and 'frame2' in binary format"
+         "\ninto file 'output_flow'"
+         "\nThen one can use code from http://vision.middlebury.edu/flow/data/"
+         "\nto convert flow in binary file to image\n");
 }
 
 // binary file format for flow data specified here:
 // http://vision.middlebury.edu/flow/data/
-static void writeOpticalFlowToFile(const Mat& u, const Mat& v, FILE* file) {
-  int cols = u.cols;
-  int rows = u.rows;
+static void writeOpticalFlowToFile(const Mat& flow, FILE* file) {
+  int cols = flow.cols;
+  int rows = flow.rows;
   
   fprintf(file, "PIEH");
-   
+ 
   if (fwrite(&cols, sizeof(int), 1, file) != 1 ||
       fwrite(&rows, sizeof(int), 1, file) != 1) {
-    fprintf(stderr, "writeOpticalFlowToFile : problem writing header\n");
+    printf(APP_NAME "writeOpticalFlowToFile : problem writing header\n");
     exit(1);
   }
 
-  for (int i= 0; i < u.rows; ++i) {
-    for (int j = 0; j < u.cols; ++j) {
-      float uPoint = u.at<double>(i, j);
-      float vPoint = v.at<double>(i, j);
+  for (int i= 0; i < rows; ++i) {
+    for (int j = 0; j < cols; ++j) {
+      Vec2f flow_at_point = flow.at<Vec2f>(i, j);
 
-      if (fwrite(&uPoint, sizeof(float), 1, file) != 1 ||
-          fwrite(&vPoint, sizeof(float), 1, file) != 1) {
-        fprintf(stderr, "writeOpticalFlowToFile : problem writing data\n");
+      if (fwrite(&(flow_at_point[0]), sizeof(float), 1, file) != 1 ||
+          fwrite(&(flow_at_point[1]), sizeof(float), 1, file) != 1) {
+        printf(APP_NAME "writeOpticalFlowToFile : problem writing data\n");
         exit(1);
       }
     }
   }
 }
-int main(int argc, char** argv) {
-    help();
 
-    if (argc < 4) {
-      fprintf(stderr, "Wrong number of command line arguments : %d (expected %d)\n", argc, 4);
-      exit(1);
-    }
-    
-    Mat frame1 = imread(argv[1]);
-    Mat frame2 = imread(argv[2]);
+static void run(int argc, char** argv) {
+  if (argc < 3) {
+    printf(APP_NAME "Wrong number of command line arguments for mode `run`: %d (expected %d)\n", 
+           argc, 3);
+    exit(1);
+  }
 
-    if (frame1.empty() || frame2.empty()) {
-      fprintf(stderr, "simpleflow_demo : Images cannot be read\n");
-      exit(1);
-    }
+  Mat frame1 = imread(argv[0]);
+  Mat frame2 = imread(argv[1]);
 
-    if (frame1.rows != frame2.rows && frame1.cols != frame2.cols) {
-      fprintf(stderr, "simpleflow_demo : Images should be of equal sizes\n");
-      exit(1);
-    }
+  if (frame1.empty()) {
+    printf(APP_NAME "Image #1 : %s cannot be read\n", argv[0]);
+    exit(1);
+  }
 
-    if (frame1.type() != 16 || frame2.type() != 16) {
-      fprintf(stderr, "simpleflow_demo : Images should be of equal type CV_8UC3\n");
-      exit(1);
-    }
+  if (frame2.empty()) {
+    printf(APP_NAME "Image #2 : %s cannot be read\n", argv[1]);
+    exit(1);
+  }
+
+  if (frame1.rows != frame2.rows && frame1.cols != frame2.cols) {
+    printf(APP_NAME "Images should be of equal sizes\n");
+    exit(1);
+  }
 
-    printf("simpleflow_demo : Read two images of size [rows = %d, cols = %d]\n", 
-           frame1.rows, frame1.cols);
+  if (frame1.type() != 16 || frame2.type() != 16) {
+    printf(APP_NAME "Images should be of equal type CV_8UC3\n");
+    exit(1);
+  }
+
+  printf(APP_NAME "Read two images of size [rows = %d, cols = %d]\n", 
+         frame1.rows, frame1.cols);
 
-    Mat flowX, flowY;
+  Mat flow;
 
-    calcOpticalFlowSF(frame1, frame2, 
-                      flowX, flowY,
-                      3, 2, 4, 4.1, 25.5, 18, 55.0, 25.5, 0.35, 18, 55.0, 25.5, 10);
+  float start = getTickCount();
+  calcOpticalFlowSF(frame1, frame2, 
+                    flow,
+                    3, 2, 4, 4.1, 25.5, 18, 55.0, 25.5, 0.35, 18, 55.0, 25.5, 10);
+  printf(APP_NAME "calcOpticalFlowSF : %lf sec\n", (getTickCount() - start) / getTickFrequency());
 
-  FILE* file = fopen(argv[3], "wb");
+  FILE* file = fopen(argv[2], "wb");
   if (file == NULL) {
-    fprintf(stderr, "simpleflow_demo : Unable to open file '%s' for writing\n", argv[3]); 
+    printf(APP_NAME "Unable to open file '%s' for writing\n", argv[2]); 
     exit(1);
   }
-  printf("simpleflow_demo : Writing to file\n");
-  writeOpticalFlowToFile(flowX, flowY, file);
+  printf(APP_NAME "Writing to file\n");
+  writeOpticalFlowToFile(flow, file);
   fclose(file);
+}
+
+static bool readOpticalFlowFromFile(FILE* file, Mat& flow) {
+  char header[5];
+  if (fread(header, 1, 4, file) < 4 && (string)header != "PIEH") {
+    return false;
+  }
+
+  int cols, rows;
+  if (fread(&cols, sizeof(int), 1, file) != 1||
+      fread(&rows, sizeof(int), 1, file) != 1) {
+    return false;
+  }
+
+  flow = Mat::zeros(rows, cols, CV_32FC2);
+
+  for (int i = 0; i < rows; ++i) {
+    for (int j = 0; j < cols; ++j) {
+      Vec2f flow_at_point;
+      if (fread(&(flow_at_point[0]), sizeof(float), 1, file) != 1 ||
+          fread(&(flow_at_point[1]), sizeof(float), 1, file) != 1) {
+        return false;
+      }
+      flow.at<Vec2f>(i, j) = flow_at_point;
+    }
+  }
+
+  return true;
+}
+
+static bool isFlowCorrect(float u) {
+  return !isnan(u) && (fabs(u) < 1e9);
+}
+
+static float calc_rmse(Mat flow1, Mat flow2) {
+  float sum;
+  int counter = 0;
+  const int rows = flow1.rows;
+  const int cols = flow1.cols;
+
+  for (int y = 0; y < rows; ++y) {
+    for (int x = 0; x < cols; ++x) {
+      Vec2f flow1_at_point = flow1.at<Vec2f>(y, x);
+      Vec2f flow2_at_point = flow2.at<Vec2f>(y, x);
+
+      float u1 = flow1_at_point[0];
+      float v1 = flow1_at_point[1];
+      float u2 = flow2_at_point[0];
+      float v2 = flow2_at_point[1];
+
+      if (isFlowCorrect(u1) && isFlowCorrect(u2) && isFlowCorrect(v1) && isFlowCorrect(v2)) {
+        sum += (u1-u2)*(u1-u2) + (v1-v2)*(v1-v2);
+        counter++;
+      }
+    }
+  }
+  return sqrt(sum / (1e-9 + counter));
+}
+
+static void eval(int argc, char** argv) {
+  if (argc < 2) {
+    printf(APP_NAME "Wrong number of command line arguments for mode `eval` : %d (expected %d)\n", 
+           argc, 2);
+    exit(1);
+  }
+  
+  Mat flow1, flow2;
+
+  FILE* flow_file_1 = fopen(argv[0], "rb");
+  if (flow_file_1 == NULL) {
+    printf(APP_NAME "Cannot open file with first flow : %s\n", argv[0]);
+    exit(1);
+  }
+  if (!readOpticalFlowFromFile(flow_file_1, flow1)) {
+    printf(APP_NAME "Cannot read flow data from file %s\n", argv[0]);
+    exit(1);
+  }
+  fclose(flow_file_1);
+
+  FILE* flow_file_2 = fopen(argv[1], "rb");
+  if (flow_file_2 == NULL) {
+    printf(APP_NAME "Cannot open file with first flow : %s\n", argv[1]);
+    exit(1);
+  }
+  if (!readOpticalFlowFromFile(flow_file_2, flow2)) {
+    printf(APP_NAME "Cannot read flow data from file %s\n", argv[1]);
+    exit(1);
+  }
+  fclose(flow_file_2);
+  
+  float rmse = calc_rmse(flow1, flow2);
+  printf("%lf\n", rmse);
+}
+
+int main(int argc, char** argv) {
+  if (argc < 2) {
+    printf(APP_NAME "Mode is not specified\n");
+    help();
+    exit(1);
+  }
+  string mode = (string)argv[1];
+  int new_argc = argc - 2;
+  char** new_argv = &argv[2];
+
+  if ("run" == mode) {
+    run(new_argc, new_argv);
+  } else if ("eval" == mode) {
+    eval(new_argc, new_argv);
+  } else if ("help" == mode) 
+    help();
+  else {
+    printf(APP_NAME "Unknown mode : %s\n", argv[1]);   
+    help();
+  }
+  
   return 0;
 }

From 7ad4c2545225aa735b9d1b5cd8e043be07d91643 Mon Sep 17 00:00:00 2001
From: Yury Zemlyanskiy <yuri.zemlyanskiy@gmail.com>
Date: Fri, 31 Aug 2012 13:27:14 +0400
Subject: [PATCH 3/7] Small fixes for SimpleFlow algorithm

+ Fixed warnings
+ Add new function calcOpticalFlow with smaller number of arguments
+ Add asserts to algorithm and remove 'exit(1)'
---
 .../motion_analysis_and_object_tracking.rst   |  2 ++
 .../video/include/opencv2/video/tracking.hpp  |  7 ++++
 modules/video/src/simpleflow.cpp              | 35 +++++++++++--------
 modules/video/test/test_simpleflow.cpp        |  8 ++---
 samples/cpp/simpleflow_demo.cpp               |  2 +-
 5 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/modules/video/doc/motion_analysis_and_object_tracking.rst b/modules/video/doc/motion_analysis_and_object_tracking.rst
index ebb9290cc4..6db525a743 100644
--- a/modules/video/doc/motion_analysis_and_object_tracking.rst
+++ b/modules/video/doc/motion_analysis_and_object_tracking.rst
@@ -601,6 +601,8 @@ calcOpticalFlowSF
 -----------
 Calculate an optical flow using "SimpleFlow" algorithm.
 
+.. ocv:function:: void calcOpticalFlowSF( Mat& prev, Mat& next, Mat& flowX, Mat& flowY, int layers, int averaging_block_size, int max_flow)
+
 .. ocv:function:: void calcOpticalFlowSF( Mat& prev, Mat& next, Mat& flowX, Mat& flowY, int layers, int averaging_block_size, int max_flow, double sigma_dist, double sigma_color, int postprocess_window, double sigma_dist_fix, double sigma_color_fix, double occ_thr, int upscale_averaging_radiud, double upscale_sigma_dist, double upscale_sigma_color, double speed_up_thr)
 
     :param prev: First 8-bit 3-channel image.
diff --git a/modules/video/include/opencv2/video/tracking.hpp b/modules/video/include/opencv2/video/tracking.hpp
index 6800c63373..2be030d76d 100644
--- a/modules/video/include/opencv2/video/tracking.hpp
+++ b/modules/video/include/opencv2/video/tracking.hpp
@@ -328,6 +328,13 @@ CV_EXPORTS_W Mat estimateRigidTransform( InputArray src, InputArray dst,
                                          bool fullAffine);
   
 //! computes dense optical flow using Simple Flow algorithm
+CV_EXPORTS_W void calcOpticalFlowSF(Mat& from, 
+                                    Mat& to,
+                                    Mat& flow,
+                                    int layers,
+                                    int averaging_block_size, 
+                                    int max_flow);
+
 CV_EXPORTS_W void calcOpticalFlowSF(Mat& from, 
                                     Mat& to,
                                     Mat& flow,
diff --git a/modules/video/src/simpleflow.cpp b/modules/video/src/simpleflow.cpp
index 0cd320d6ee..59fafdcf3d 100644
--- a/modules/video/src/simpleflow.cpp
+++ b/modules/video/src/simpleflow.cpp
@@ -72,26 +72,27 @@ static void removeOcclusions(const Mat& flow,
 }
 
 static void wd(Mat& d, int top_shift, int bottom_shift, int left_shift, int right_shift, float sigma) {
-  const float factor = 1.0 / (2.0 * sigma * sigma);
   for (int dr = -top_shift, r = 0; dr <= bottom_shift; ++dr, ++r) {
     for (int dc = -left_shift, c = 0; dc <= right_shift; ++dc, ++c) {
-      d.at<float>(r, c) = -(dr*dr + dc*dc) * factor;
+      d.at<float>(r, c) = -(dr*dr + dc*dc);
     }
   }
+  d *= 1.0 / (2.0 * sigma * sigma);
   exp(d, d);
 }
 
 static void wc(const Mat& image, Mat& d, int r0, int c0, 
                int top_shift, int bottom_shift, int left_shift, int right_shift, float sigma) {
-  const float factor = 1.0 / (2.0 * sigma * sigma);
   const Vec3b centeral_point = image.at<Vec3b>(r0, c0);
+  int left_border = c0-left_shift, right_border = c0+right_shift;
   for (int dr = r0-top_shift, r = 0; dr <= r0+bottom_shift; ++dr, ++r) {
     const Vec3b *row = image.ptr<Vec3b>(dr); 
     float *d_row = d.ptr<float>(r);
-    for (int dc = c0-left_shift, c = 0; dc <= c0+right_shift; ++dc, ++c) {
-      d_row[c] = -dist(centeral_point, row[dc]) * factor;
+    for (int dc = left_border, c = 0; dc <= right_border; ++dc, ++c) {
+      d_row[c] = -dist(centeral_point, row[dc]);
     }
   }
+  d *= 1.0 / (2.0 * sigma * sigma);
   exp(d, d);
 }
 
@@ -163,7 +164,7 @@ static void calcOpticalFlowSingleScaleSF(const Mat& prev,
   Mat diff_storage(averaging_radius*2 + 1, averaging_radius*2 + 1, CV_32F);
   Mat w_full_window(averaging_radius*2 + 1, averaging_radius*2 + 1, CV_32F);
   Mat wd_full_window(averaging_radius*2 + 1, averaging_radius*2 + 1, CV_32F);
-  float w_full_window_sum;
+  float w_full_window_sum = 1e-9;
 
   Mat prev_extended;
   copyMakeBorder(prev, prev_extended, 
@@ -197,7 +198,7 @@ static void calcOpticalFlowSingleScaleSF(const Mat& prev,
       }
 
       bool first_flow_iteration = true;
-      float sum_e, min_e;
+      float sum_e = 0, min_e = 0;
 
       for (int u = min_row_shift; u <= max_row_shift; ++u) {
         for (int v = min_col_shift; v <= max_col_shift; ++v) {
@@ -286,7 +287,7 @@ static Mat upscaleOpticalFlow(int new_rows,
                                int averaging_radius,
                                float sigma_dist,
                                float sigma_color) {
-  crossBilateralFilter(flow, image, confidence, flow, averaging_radius, sigma_color, sigma_dist, false);
+  crossBilateralFilter(flow, image, confidence, flow, averaging_radius, sigma_color, sigma_dist, true);
   Mat new_flow;
   resize(flow, new_flow, Size(new_cols, new_rows), 0, 0, INTER_NEAREST);
   new_flow *= 2;
@@ -495,13 +496,7 @@ void calcOpticalFlowSF(Mat& from,
   buildPyramidWithResizeMethod(from, pyr_from_images, layers - 1, INTER_CUBIC);
   buildPyramidWithResizeMethod(to, pyr_to_images, layers - 1, INTER_CUBIC);
 
-  if ((int)pyr_from_images.size() != layers) {
-      exit(1);
-  }
-
-  if ((int)pyr_to_images.size() != layers) {
-      exit(1);
-  }
+  CV_Assert((int)pyr_from_images.size() == layers && (int)pyr_to_images.size() == layers);
 
   Mat first_from_image = pyr_from_images[layers - 1];
   Mat first_to_image = pyr_to_images[layers - 1];
@@ -635,5 +630,15 @@ void calcOpticalFlowSF(Mat& from,
   mixChannels(&flow, 1, &resulted_flow, 1, from_to, 2);
 }
 
+CV_EXPORTS_W void calcOpticalFlowSF(Mat& from, 
+                                    Mat& to,
+                                    Mat& flow,
+                                    int layers,
+                                    int averaging_block_size, 
+                                    int max_flow) {
+  calcOpticalFlowSF(from, to, flow, layers, averaging_block_size, max_flow,
+                    4.1, 25.5, 18, 55.0, 25.5, 0.35, 18, 55.0, 25.5, 10);
+}
+
 }
 
diff --git a/modules/video/test/test_simpleflow.cpp b/modules/video/test/test_simpleflow.cpp
index 050d595d1c..fe96e7004f 100644
--- a/modules/video/test/test_simpleflow.cpp
+++ b/modules/video/test/test_simpleflow.cpp
@@ -91,7 +91,7 @@ static bool isFlowCorrect(float u) {
 }
 
 static float calc_rmse(cv::Mat flow1, cv::Mat flow2) {
-  float sum;
+  float sum = 0;
   int counter = 0;
   const int rows = flow1.rows;
   const int cols = flow1.cols;
@@ -116,8 +116,6 @@ static float calc_rmse(cv::Mat flow1, cv::Mat flow2) {
 }
 
 void CV_SimpleFlowTest::run(int) {
-    int code = cvtest::TS::OK;
-    
     const float MAX_RMSE = 0.6;
     const string frame1_path = ts->get_data_path() + "optflow/RubberWhale1.png";
     const string frame2_path = ts->get_data_path() + "optflow/RubberWhale2.png";
@@ -171,9 +169,7 @@ void CV_SimpleFlowTest::run(int) {
     fclose(gt_flow_file);
 
     cv::Mat flow;
-    cv::calcOpticalFlowSF(frame1, frame2, 
-                          flow,
-                          3, 4, 2, 4.1, 25.5, 18, 55.0, 25.5, 0.35, 18, 55.0, 25.5, 10);
+    cv::calcOpticalFlowSF(frame1, frame2, flow, 3, 2, 4);
 
     float rmse = calc_rmse(flow_gt, flow);
     
diff --git a/samples/cpp/simpleflow_demo.cpp b/samples/cpp/simpleflow_demo.cpp
index 332df7821e..2727fe640a 100644
--- a/samples/cpp/simpleflow_demo.cpp
+++ b/samples/cpp/simpleflow_demo.cpp
@@ -135,7 +135,7 @@ static bool isFlowCorrect(float u) {
 }
 
 static float calc_rmse(Mat flow1, Mat flow2) {
-  float sum;
+  float sum = 0;
   int counter = 0;
   const int rows = flow1.rows;
   const int cols = flow1.cols;

From 0c10ed26e3cf3d336689a538c060359e63a30458 Mon Sep 17 00:00:00 2001
From: Yury Zemlyanskiy <yuri.zemlyanskiy@gmail.com>
Date: Mon, 3 Sep 2012 20:35:20 +0400
Subject: [PATCH 4/7] Update to improve performance of SimpleFlow algorithm

+ Improve performance of calcOpticalFlowSingleScale method
+ Small refactoring

Current results:

IMAGE NAMES          RMSE
Beanbags
Dimetrodon           0.329428
DogDance
Grove2               0.550852
Grove3               1.464699
Hydrangea            0.523277
MiniCooper
RubberWhale          0.367246
Urban2               2.717003
Urban3               4.185070
Venus                0.775422
Walking

Time (for Urban3):
17.490248 sec
---
 modules/video/src/simpleflow.cpp | 313 ++++++++++++++++---------------
 modules/video/src/simpleflow.hpp |  16 ++
 2 files changed, 176 insertions(+), 153 deletions(-)

diff --git a/modules/video/src/simpleflow.cpp b/modules/video/src/simpleflow.cpp
index 59fafdcf3d..9571fe436f 100644
--- a/modules/video/src/simpleflow.cpp
+++ b/modules/video/src/simpleflow.cpp
@@ -60,6 +60,9 @@ static void removeOcclusions(const Mat& flow,
                              Mat& confidence) {
   const int rows = flow.rows;
   const int cols = flow.cols;
+  if (!confidence.data) {
+    confidence = Mat::zeros(rows, cols, CV_32F);
+  }
   for (int r = 0; r < rows; ++r) {
     for (int c = 0; c < cols; ++c) {
       if (dist(flow.at<Vec2f>(r, c), -flow_inv.at<Vec2f>(r, c)) > occ_thr) {
@@ -96,20 +99,12 @@ static void wc(const Mat& image, Mat& d, int r0, int c0,
   exp(d, d);
 }
 
-static void dist(const Mat& m1, const Mat& m2, Mat& result) {
-  const int rows = m1.rows;
-  const int cols = m1.cols;
-  for (int r = 0; r < rows; ++r) {
-    const Vec3b *m1_row = m1.ptr<Vec3b>(r);
-    const Vec3b *m2_row = m2.ptr<Vec3b>(r);
-    float* row = result.ptr<float>(r);
-    for (int c = 0; c < cols; ++c) {
-      row[c] = dist(m1_row[c], m2_row[c]);
-    }
-  }
-}
-
-static void crossBilateralFilter(const Mat& image, const Mat& edge_image, const Mat confidence, Mat& dst, int d, float sigma_color, float sigma_space, bool flag=false) {
+static void crossBilateralFilter(const Mat& image, 
+                                 const Mat& edge_image, 
+                                 const Mat confidence, 
+                                 Mat& dst, int d, 
+                                 float sigma_color, float sigma_space, 
+                                 bool flag=false) {
   const int rows = image.rows;
   const int cols = image.cols;
   Mat image_extended, edge_image_extended, confidence_extended;
@@ -121,7 +116,6 @@ static void crossBilateralFilter(const Mat& image, const Mat& edge_image, const
   Mat weights(2*d+1, 2*d+1, CV_32F);
   Mat weighted_sum(2*d+1, 2*d+1, CV_32F);
   
-
   vector<Mat> image_extended_channels;
   split(image_extended, image_extended_channels);
 
@@ -148,31 +142,15 @@ static void crossBilateralFilter(const Mat& image, const Mat& edge_image, const
   }
 }
 
-static void calcOpticalFlowSingleScaleSF(const Mat& prev, 
-                                         const Mat& next,
-                                         const Mat& mask,
-                                         Mat& flow,
-                                         Mat& confidence,
-                                         int averaging_radius, 
-                                         int max_flow,
-                                         float sigma_dist,
-                                         float sigma_color) {
+static void calcConfidence(const Mat& prev, 
+                           const Mat& next,
+                           const Mat& flow,
+                           Mat& confidence,
+                           int max_flow) {
   const int rows = prev.rows;
   const int cols = prev.cols;
   confidence = Mat::zeros(rows, cols, CV_32F);
   
-  Mat diff_storage(averaging_radius*2 + 1, averaging_radius*2 + 1, CV_32F);
-  Mat w_full_window(averaging_radius*2 + 1, averaging_radius*2 + 1, CV_32F);
-  Mat wd_full_window(averaging_radius*2 + 1, averaging_radius*2 + 1, CV_32F);
-  float w_full_window_sum = 1e-9;
-
-  Mat prev_extended;
-  copyMakeBorder(prev, prev_extended, 
-                 averaging_radius, averaging_radius, averaging_radius, averaging_radius,
-                 BORDER_DEFAULT);
-
-  wd(wd_full_window, averaging_radius, averaging_radius, averaging_radius, averaging_radius, sigma_dist);
-
   for (int r0 = 0; r0 < rows; ++r0) {
     for (int c0 = 0; c0 < cols; ++c0) {
       Vec2f flow_at_point = flow.at<Vec2f>(r0, c0);
@@ -183,25 +161,16 @@ static void calcOpticalFlowSingleScaleSF(const Mat& prev,
       if (c0 + v0 < 0) { v0 = -c0; }
       if (c0 + v0 >= cols) { v0 = cols - 1 - c0; }
 
-      const int min_row_shift = -min(r0 + u0, max_flow);
-      const int max_row_shift = min(rows - 1 - (r0 + u0), max_flow);
-      const int min_col_shift = -min(c0 + v0, max_flow);
-      const int max_col_shift = min(cols - 1 - (c0 + v0), max_flow);
-
-      float min_cost = DBL_MAX, best_u = u0, best_v = v0;
-
-      if (mask.at<uchar>(r0, c0)) {
-          wc(prev_extended, w_full_window, r0 + averaging_radius, c0 + averaging_radius,
-             averaging_radius, averaging_radius, averaging_radius, averaging_radius, sigma_color);
-          multiply(w_full_window, wd_full_window, w_full_window);
-          w_full_window_sum = sum(w_full_window)[0];
-      }
+      const int top_row_shift = -min(r0 + u0, max_flow);
+      const int bottom_row_shift = min(rows - 1 - (r0 + u0), max_flow);
+      const int left_col_shift = -min(c0 + v0, max_flow);
+      const int right_col_shift = min(cols - 1 - (c0 + v0), max_flow);
 
       bool first_flow_iteration = true;
       float sum_e = 0, min_e = 0;
 
-      for (int u = min_row_shift; u <= max_row_shift; ++u) {
-        for (int v = min_col_shift; v <= max_col_shift; ++v) {
+      for (int u = top_row_shift; u <= bottom_row_shift; ++u) {
+        for (int v = left_col_shift; v <= right_col_shift; ++v) {
           float e = dist(prev.at<Vec3b>(r0, c0), next.at<Vec3b>(r0 + u0 + u, c0 + v0 + v));
           if (first_flow_iteration) {
             sum_e = e;
@@ -211,55 +180,83 @@ static void calcOpticalFlowSingleScaleSF(const Mat& prev,
             sum_e += e;
             min_e = std::min(min_e, e);
           }
-          if (!mask.at<uchar>(r0, c0)) {
-            continue;
-          }
+        }
+      }
+      int windows_square = (bottom_row_shift - top_row_shift + 1) *
+                           (right_col_shift - left_col_shift + 1);
+      confidence.at<float>(r0, c0) = (windows_square == 0) ? 0
+                                                           : sum_e / windows_square - min_e;
+      CV_Assert(confidence.at<float>(r0, c0) >= 0);
+    }
+  }
+}
 
-          const int window_top_shift = min(r0, r0 + u + u0, averaging_radius);
-          const int window_bottom_shift = min(rows - 1 - r0, 
-                                              rows - 1 - (r0 + u + u0), 
-                                              averaging_radius);
-          const int window_left_shift = min(c0, c0 + v + v0, averaging_radius);
-          const int window_right_shift = min(cols - 1 - c0, 
-                                             cols - 1 - (c0 + v + v0), 
-                                             averaging_radius);
-
-          const Range prev_row_range(r0 - window_top_shift, r0 + window_bottom_shift + 1);
-          const Range prev_col_range(c0 - window_left_shift, c0 + window_right_shift + 1);
-
-          const Range next_row_range(r0 + u0 + u - window_top_shift, 
-                                     r0 + u0 + u + window_bottom_shift + 1);
-          const Range next_col_range(c0 + v0 + v - window_left_shift, 
-                                     c0 + v0 + v + window_right_shift + 1); 
-         
-                     
-          Mat diff2;
-          Mat w;
-          float w_sum;
-          if (window_top_shift == averaging_radius &&
-              window_bottom_shift == averaging_radius &&
-              window_left_shift == averaging_radius &&
-              window_right_shift == averaging_radius) {
-            w = w_full_window;
-            w_sum = w_full_window_sum;
-            diff2 = diff_storage;
-            dist(prev(prev_row_range, prev_col_range), next(next_row_range, next_col_range), diff2);
-          } else {
-            diff2 = diff_storage(Range(averaging_radius - window_top_shift, 
-                                       averaging_radius + 1 + window_bottom_shift),
-                                 Range(averaging_radius - window_left_shift,
-                                       averaging_radius + 1 + window_right_shift));
-            
-            dist(prev(prev_row_range, prev_col_range), next(next_row_range, next_col_range), diff2);
-            w = w_full_window(Range(averaging_radius - window_top_shift, 
-                                    averaging_radius + 1 + window_bottom_shift),
-                              Range(averaging_radius - window_left_shift,
-                                    averaging_radius + 1 + window_right_shift));
-            w_sum = sum(w)[0];
+static void calcOpticalFlowSingleScaleSF(const Mat& prev_extended,
+                                         const Mat& next_extended,
+                                         const Mat& mask,
+                                         Mat& flow,
+                                         int averaging_radius, 
+                                         int max_flow,
+                                         float sigma_dist,
+                                         float sigma_color) {
+  const int averaging_radius_2 = averaging_radius << 1;
+  const int rows = prev_extended.rows - averaging_radius_2;
+  const int cols = prev_extended.cols - averaging_radius_2;
+  
+  Mat weight_window(averaging_radius_2 + 1, averaging_radius_2 + 1, CV_32F);
+  Mat space_weight_window(averaging_radius_2 + 1, averaging_radius_2 + 1, CV_32F);
+
+  wd(space_weight_window, averaging_radius, averaging_radius, averaging_radius, averaging_radius, sigma_dist);
+
+  for (int r0 = 0; r0 < rows; ++r0) {
+    for (int c0 = 0; c0 < cols; ++c0) {
+      if (!mask.at<uchar>(r0, c0)) {
+        continue;
+      }
+
+      // TODO: do smth with this creepy staff
+      Vec2f flow_at_point = flow.at<Vec2f>(r0, c0);
+      int u0 = floor(flow_at_point[0] + 0.5);
+      if (r0 + u0 < 0) { u0 = -r0; }
+      if (r0 + u0 >= rows) { u0 = rows - 1 - r0; }
+      int v0 = floor(flow_at_point[1] + 0.5);
+      if (c0 + v0 < 0) { v0 = -c0; }
+      if (c0 + v0 >= cols) { v0 = cols - 1 - c0; }
+
+      const int top_row_shift = -min(r0 + u0, max_flow);
+      const int bottom_row_shift = min(rows - 1 - (r0 + u0), max_flow);
+      const int left_col_shift = -min(c0 + v0, max_flow);
+      const int right_col_shift = min(cols - 1 - (c0 + v0), max_flow);
+
+      float min_cost = DBL_MAX, best_u = u0, best_v = v0;
+
+      wc(prev_extended, weight_window, r0 + averaging_radius, c0 + averaging_radius,
+         averaging_radius, averaging_radius, averaging_radius, averaging_radius, sigma_color);
+      multiply(weight_window, space_weight_window, weight_window);
+
+      const int prev_extended_top_window_row = r0;
+      const int prev_extended_left_window_col = c0;
+
+      for (int u = top_row_shift; u <= bottom_row_shift; ++u) {
+        const int next_extended_top_window_row = r0 + u0 + u;
+        for (int v = left_col_shift; v <= right_col_shift; ++v) {
+          const int next_extended_left_window_col = c0 + v0 + v;
+
+          float cost = 0;
+          for (int r = 0; r <= averaging_radius_2; ++r) {
+            const Vec3b *prev_extended_window_row = prev_extended.ptr<Vec3b>(prev_extended_top_window_row + r);
+            const Vec3b *next_extended_window_row = next_extended.ptr<Vec3b>(next_extended_top_window_row + r);
+            const float* weight_window_row = weight_window.ptr<float>(r);
+            for (int c = 0; c <= averaging_radius_2; ++c) {
+              cost += weight_window_row[c] * 
+                           dist(prev_extended_window_row[prev_extended_left_window_col + c], 
+                                next_extended_window_row[next_extended_left_window_col + c]);
+            }
           }
-          multiply(diff2, w, diff2);
-      
-          const float cost = sum(diff2)[0] / w_sum;
+          // cost should be divided by sum(weight_window), but because 
+          // we interested only in min(cost) and sum(weight_window) is constant
+          // for every point - we remove it
+
           if (cost < min_cost) {
             min_cost = cost;
             best_u = u + u0;
@@ -267,14 +264,7 @@ static void calcOpticalFlowSingleScaleSF(const Mat& prev,
           }
         }
       }
-      int windows_square = (max_row_shift - min_row_shift + 1) *
-                           (max_col_shift - min_col_shift + 1);
-      confidence.at<float>(r0, c0) = (windows_square == 0) ? 0
-                                                           : sum_e / windows_square - min_e;
-      CV_Assert(confidence.at<float>(r0, c0) >= 0); // TODO: remove it after testing
-      if (mask.at<uchar>(r0, c0)) {
-        flow.at<Vec2f>(r0, c0) = Vec2f(best_u, best_v);
-      }
+      flow.at<Vec2f>(r0, c0) = Vec2f(best_u, best_v);
     }
   }
 }
@@ -474,22 +464,22 @@ static void buildPyramidWithResizeMethod(Mat& src,
   }
 }
 
-void calcOpticalFlowSF(Mat& from, 
-                       Mat& to, 
-                       Mat& resulted_flow,
-                       int layers,
-                       int averaging_block_size, 
-                       int max_flow,
-                       double sigma_dist,
-                       double sigma_color,
-                       int postprocess_window, 
-                       double sigma_dist_fix, 
-                       double sigma_color_fix,
-                       double occ_thr,
-                       int upscale_averaging_radius,
-                       double upscale_sigma_dist,
-                       double upscale_sigma_color,
-                       double speed_up_thr) {
+CV_EXPORTS_W void calcOpticalFlowSF(Mat& from, 
+                                    Mat& to, 
+                                    Mat& resulted_flow,
+                                    int layers,
+                                    int averaging_radius, 
+                                    int max_flow,
+                                    double sigma_dist,
+                                    double sigma_color,
+                                    int postprocess_window, 
+                                    double sigma_dist_fix, 
+                                    double sigma_color_fix,
+                                    double occ_thr,
+                                    int upscale_averaging_radius,
+                                    double upscale_sigma_dist,
+                                    double upscale_sigma_color,
+                                    double speed_up_thr) {
   vector<Mat> pyr_from_images;
   vector<Mat> pyr_to_images;
 
@@ -498,34 +488,43 @@ void calcOpticalFlowSF(Mat& from,
 
   CV_Assert((int)pyr_from_images.size() == layers && (int)pyr_to_images.size() == layers);
 
-  Mat first_from_image = pyr_from_images[layers - 1];
-  Mat first_to_image = pyr_to_images[layers - 1];
+  Mat curr_from, curr_to, prev_from, prev_to;
+  Mat curr_from_extended, curr_to_extended;
+
+  curr_from = pyr_from_images[layers - 1];
+  curr_to = pyr_to_images[layers - 1];
+
+  copyMakeBorder(curr_from, curr_from_extended, 
+                 averaging_radius, averaging_radius, averaging_radius, averaging_radius,
+                 BORDER_DEFAULT);
+  copyMakeBorder(curr_to, curr_to_extended, 
+                 averaging_radius, averaging_radius, averaging_radius, averaging_radius,
+                 BORDER_DEFAULT);
 
-  Mat mask = Mat::ones(first_from_image.rows, first_from_image.cols, CV_8U);
-  Mat mask_inv = Mat::ones(first_from_image.rows, first_from_image.cols, CV_8U);
+  Mat mask = Mat::ones(curr_from.size(), CV_8U);
+  Mat mask_inv = Mat::ones(curr_from.size(), CV_8U);
 
-  Mat flow(first_from_image.rows, first_from_image.cols, CV_32FC2);
-  Mat flow_inv(first_to_image.rows, first_to_image.cols, CV_32FC2);
+  Mat flow(curr_from.size(), CV_32FC2);
+  Mat flow_inv(curr_to.size(), CV_32FC2);
 
   Mat confidence;
   Mat confidence_inv;
 
-  calcOpticalFlowSingleScaleSF(first_from_image, 
-                               first_to_image, 
+
+  calcOpticalFlowSingleScaleSF(curr_from_extended,
+                               curr_to_extended,
                                mask,
                                flow,
-                               confidence,
-                               averaging_block_size, 
+                               averaging_radius, 
                                max_flow, 
                                sigma_dist, 
                                sigma_color);
 
-  calcOpticalFlowSingleScaleSF(first_to_image, 
-                               first_from_image, 
+  calcOpticalFlowSingleScaleSF(curr_to_extended,
+                               curr_from_extended,
                                mask_inv,
                                flow_inv,
-                               confidence_inv,
-                               averaging_block_size, 
+                               averaging_radius, 
                                max_flow, 
                                sigma_dist, 
                                sigma_color);
@@ -540,14 +539,21 @@ void calcOpticalFlowSF(Mat& from,
                    occ_thr,
                    confidence_inv);
 
-  Mat speed_up = Mat::zeros(first_from_image.rows, first_from_image.cols, CV_8U);
-  Mat speed_up_inv = Mat::zeros(first_from_image.rows, first_from_image.cols, CV_8U);
+  Mat speed_up = Mat::zeros(curr_from.size(), CV_8U);
+  Mat speed_up_inv = Mat::zeros(curr_from.size(), CV_8U);
 
   for (int curr_layer = layers - 2; curr_layer >= 0; --curr_layer) {
-    const Mat curr_from = pyr_from_images[curr_layer];
-    const Mat curr_to = pyr_to_images[curr_layer];
-    const Mat prev_from = pyr_from_images[curr_layer + 1];
-    const Mat prev_to = pyr_to_images[curr_layer + 1];
+    curr_from = pyr_from_images[curr_layer];
+    curr_to = pyr_to_images[curr_layer];
+    prev_from = pyr_from_images[curr_layer + 1];
+    prev_to = pyr_to_images[curr_layer + 1];
+
+    copyMakeBorder(curr_from, curr_from_extended, 
+                   averaging_radius, averaging_radius, averaging_radius, averaging_radius,
+                   BORDER_DEFAULT);
+    copyMakeBorder(curr_to, curr_to_extended,
+                   averaging_radius, averaging_radius, averaging_radius, averaging_radius,
+                   BORDER_DEFAULT);
 
     const int curr_rows = curr_from.rows;
     const int curr_cols = curr_from.cols;
@@ -555,7 +561,7 @@ void calcOpticalFlowSF(Mat& from,
     Mat new_speed_up, new_speed_up_inv;
 
     selectPointsToRecalcFlow(flow,
-                             averaging_block_size,
+                             averaging_radius,
                              speed_up_thr,
                              curr_rows,
                              curr_cols,
@@ -564,7 +570,7 @@ void calcOpticalFlowSF(Mat& from,
                              mask);
 
     selectPointsToRecalcFlow(flow_inv,
-                             averaging_block_size,
+                             averaging_radius,
                              speed_up_thr,
                              curr_rows,
                              curr_cols,
@@ -593,22 +599,22 @@ void calcOpticalFlowSF(Mat& from,
                                   upscale_sigma_dist,
                                   upscale_sigma_color);
 
-    calcOpticalFlowSingleScaleSF(curr_from, 
-                                 curr_to, 
+    calcConfidence(curr_from, curr_to, flow, confidence, max_flow);
+    calcOpticalFlowSingleScaleSF(curr_from_extended,
+                                 curr_to_extended,
                                  mask,
                                  flow,
-                                 confidence,
-                                 averaging_block_size, 
+                                 averaging_radius, 
                                  max_flow, 
                                  sigma_dist, 
                                  sigma_color);
 
-    calcOpticalFlowSingleScaleSF(curr_to,
-                                 curr_from, 
+    calcConfidence(curr_to, curr_from, flow_inv, confidence_inv, max_flow);
+    calcOpticalFlowSingleScaleSF(curr_to_extended,
+                                 curr_from_extended,
                                  mask_inv,
                                  flow_inv,
-                                 confidence_inv,
-                                 averaging_block_size, 
+                                 averaging_radius, 
                                  max_flow, 
                                  sigma_dist, 
                                  sigma_color);
@@ -616,11 +622,12 @@ void calcOpticalFlowSF(Mat& from,
     extrapolateFlow(flow, speed_up);
     extrapolateFlow(flow_inv, speed_up_inv);
 
+    //TODO: should we remove occlusions for the last stage?
     removeOcclusions(flow, flow_inv, occ_thr, confidence);
     removeOcclusions(flow_inv, flow, occ_thr, confidence_inv);
   }
 
-  crossBilateralFilter(flow, pyr_from_images[0], confidence, flow, 
+  crossBilateralFilter(flow, curr_from, confidence, flow, 
                        postprocess_window, sigma_color_fix, sigma_dist_fix);
 
   GaussianBlur(flow, flow, Size(3, 3), 5);
diff --git a/modules/video/src/simpleflow.hpp b/modules/video/src/simpleflow.hpp
index c4aa02355a..94b92520e9 100644
--- a/modules/video/src/simpleflow.hpp
+++ b/modules/video/src/simpleflow.hpp
@@ -51,6 +51,22 @@ using namespace std;
 #define UNKNOWN_FLOW_THRESH 1e9
 
 namespace cv {
+/*
+template<class T>
+inline static T sqr(T t) {
+  return t*t;
+}
+
+static float dist(const Vec3b& p1, const Vec3b& p2) {
+  return sqr(p1[0] - p2[0]) +
+         sqr(p1[1] - p2[1]) +
+         sqr(p1[2] - p2[2]);
+}
+
+inline static float dist(const Vec2f& p1, const Vec2f& p2) {
+  return sqr(p1[0] - p2[0]) +
+         sqr(p1[1] - p2[1]);
+}*/
 
 inline static float dist(const Vec3b& p1, const Vec3b& p2) {
   return (p1[0] - p2[0]) * (p1[0] - p2[0]) +

From 9d1aa37e214485e18793df09ad473dc76d8c548c Mon Sep 17 00:00:00 2001
From: Yury Zemlyanskiy <yuri.zemlyanskiy@gmail.com>
Date: Wed, 5 Sep 2012 14:19:44 +0400
Subject: [PATCH 5/7] Small fixes

---
 modules/video/src/simpleflow.cpp | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/modules/video/src/simpleflow.cpp b/modules/video/src/simpleflow.cpp
index 7f327b17d9..82b8e50109 100644
--- a/modules/video/src/simpleflow.cpp
+++ b/modules/video/src/simpleflow.cpp
@@ -128,11 +128,11 @@ static void crossBilateralFilter(const Mat& image,
 
       multiply(weights, confidence_extended(window_rows, window_cols), weights);
       multiply(weights, weights_space, weights);
-      float weights_sum = sum(weights)[0];
+      float weights_sum = (float)sum(weights)[0];
 
       for (int ch = 0; ch < 2; ++ch) {
         multiply(weights, image_extended_channels[ch](window_rows, window_cols), weighted_sum);
-        float total_sum = sum(weighted_sum)[0];
+        float total_sum = (float)sum(weighted_sum)[0];
 
         dst.at<Vec2f>(row, col)[ch] = (flag && fabs(weights_sum) < 1e-9) 
           ? image.at<float>(row, col) 
@@ -154,10 +154,10 @@ static void calcConfidence(const Mat& prev,
   for (int r0 = 0; r0 < rows; ++r0) {
     for (int c0 = 0; c0 < cols; ++c0) {
       Vec2f flow_at_point = flow.at<Vec2f>(r0, c0);
-      int u0 = floor(flow_at_point[0] + 0.5);
+      int u0 = cvRound(flow_at_point[0]);
       if (r0 + u0 < 0) { u0 = -r0; }
       if (r0 + u0 >= rows) { u0 = rows - 1 - r0; }
-      int v0 = floor(flow_at_point[1] + 0.5);
+      int v0 = cvRound(flow_at_point[1]);
       if (c0 + v0 < 0) { v0 = -c0; }
       if (c0 + v0 >= cols) { v0 = cols - 1 - c0; }
 
@@ -531,12 +531,12 @@ CV_EXPORTS_W void calcOpticalFlowSF(Mat& from,
 
   removeOcclusions(flow, 
                    flow_inv,
-                   occ_thr,
+                   (float)occ_thr,
                    confidence);
 
   removeOcclusions(flow_inv, 
                    flow,
-                   occ_thr,
+                   (float)occ_thr,
                    confidence_inv);
 
   Mat speed_up = Mat::zeros(curr_from.size(), CV_8U);
@@ -606,8 +606,8 @@ CV_EXPORTS_W void calcOpticalFlowSF(Mat& from,
                                  flow,
                                  averaging_radius, 
                                  max_flow, 
-                                 sigma_dist, 
-                                 sigma_color);
+                                 (float)sigma_dist, 
+                                 (float)sigma_color);
 
     calcConfidence(curr_to, curr_from, flow_inv, confidence_inv, max_flow);
     calcOpticalFlowSingleScaleSF(curr_to_extended,
@@ -616,8 +616,8 @@ CV_EXPORTS_W void calcOpticalFlowSF(Mat& from,
                                  flow_inv,
                                  averaging_radius, 
                                  max_flow, 
-                                 sigma_dist, 
-                                 sigma_color);
+                                 (float)sigma_dist, 
+                                 (float)sigma_color);
 
     extrapolateFlow(flow, speed_up);
     extrapolateFlow(flow_inv, speed_up_inv);
@@ -628,7 +628,7 @@ CV_EXPORTS_W void calcOpticalFlowSF(Mat& from,
   }
 
   crossBilateralFilter(flow, curr_from, confidence, flow, 
-                       postprocess_window, sigma_color_fix, sigma_dist_fix);
+                       postprocess_window, (float)sigma_color_fix, (float)sigma_dist_fix);
 
   GaussianBlur(flow, flow, Size(3, 3), 5);
 

From 036258b8da70c027ae3c5756a5dc7ba8e45f35ac Mon Sep 17 00:00:00 2001
From: Yury Zemlyanskiy <yuri.zemlyanskiy@gmail.com>
Date: Wed, 5 Sep 2012 14:36:40 +0400
Subject: [PATCH 6/7] Small fixed #2

---
 modules/video/src/simpleflow.cpp | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/modules/video/src/simpleflow.cpp b/modules/video/src/simpleflow.cpp
index 82b8e50109..d1471307dc 100644
--- a/modules/video/src/simpleflow.cpp
+++ b/modules/video/src/simpleflow.cpp
@@ -259,8 +259,8 @@ static void calcOpticalFlowSingleScaleSF(const Mat& prev_extended,
 
           if (cost < min_cost) {
             min_cost = cost;
-            best_u = u + u0;
-            best_v = v + v0;
+            best_u = (float)(u + u0);
+            best_v = (float)(v + v0);
           }
         }
       }
@@ -309,7 +309,7 @@ static Mat calcIrregularityMat(const Mat& flow, int radius) {
 
 static void selectPointsToRecalcFlow(const Mat& flow, 
                                      int irregularity_metric_radius,
-                                     int speed_up_thr,
+                                     float speed_up_thr,
                                      int curr_rows,
                                      int curr_cols,
                                      const Mat& prev_speed_up,
@@ -562,7 +562,7 @@ CV_EXPORTS_W void calcOpticalFlowSF(Mat& from,
 
     selectPointsToRecalcFlow(flow,
                              averaging_radius,
-                             (int)speed_up_thr,
+                             speed_up_thr,
                              curr_rows,
                              curr_cols,
                              speed_up,
@@ -586,8 +586,8 @@ CV_EXPORTS_W void calcOpticalFlowSF(Mat& from,
                               prev_from,
                               confidence,
                               flow, 
-                              upscale_averaging_radius,
-                              upscale_sigma_dist,
+                              (float)upscale_averaging_radius,
+                              (float)upscale_sigma_dist,
                               upscale_sigma_color);
 
     flow_inv = upscaleOpticalFlow(curr_rows,
@@ -596,8 +596,8 @@ CV_EXPORTS_W void calcOpticalFlowSF(Mat& from,
                                   confidence_inv,
                                   flow_inv,
                                   upscale_averaging_radius,
-                                  upscale_sigma_dist,
-                                  upscale_sigma_color);
+                                  (float)upscale_sigma_dist,
+                                  (float)upscale_sigma_color);
 
     calcConfidence(curr_from, curr_to, flow, confidence, max_flow);
     calcOpticalFlowSingleScaleSF(curr_from_extended,
@@ -623,8 +623,8 @@ CV_EXPORTS_W void calcOpticalFlowSF(Mat& from,
     extrapolateFlow(flow_inv, speed_up_inv);
 
     //TODO: should we remove occlusions for the last stage?
-    removeOcclusions(flow, flow_inv, occ_thr, confidence);
-    removeOcclusions(flow_inv, flow, occ_thr, confidence_inv);
+    removeOcclusions(flow, flow_inv, (float)occ_thr, confidence);
+    removeOcclusions(flow_inv, flow, (float)occ_thr, confidence_inv);
   }
 
   crossBilateralFilter(flow, curr_from, confidence, flow, 

From 66ce62cd0dc66f0d86580ecd7d20bfb7a990a86d Mon Sep 17 00:00:00 2001
From: Yury Zemlyanskiy <yuri.zemlyanskiy@gmail.com>
Date: Wed, 5 Sep 2012 14:39:33 +0400
Subject: [PATCH 7/7] Small fixed #3

---
 modules/video/src/simpleflow.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/modules/video/src/simpleflow.cpp b/modules/video/src/simpleflow.cpp
index d1471307dc..af99fc5f25 100644
--- a/modules/video/src/simpleflow.cpp
+++ b/modules/video/src/simpleflow.cpp
@@ -517,8 +517,8 @@ CV_EXPORTS_W void calcOpticalFlowSF(Mat& from,
                                flow,
                                averaging_radius, 
                                max_flow, 
-                               sigma_dist, 
-                               sigma_color);
+                               (float)sigma_dist, 
+                               (float)sigma_color);
 
   calcOpticalFlowSingleScaleSF(curr_to_extended,
                                curr_from_extended,
@@ -526,8 +526,8 @@ CV_EXPORTS_W void calcOpticalFlowSF(Mat& from,
                                flow_inv,
                                averaging_radius, 
                                max_flow, 
-                               sigma_dist, 
-                               sigma_color);
+                               (float)sigma_dist, 
+                               (float)sigma_color);
 
   removeOcclusions(flow, 
                    flow_inv,
@@ -586,9 +586,9 @@ CV_EXPORTS_W void calcOpticalFlowSF(Mat& from,
                               prev_from,
                               confidence,
                               flow, 
-                              (float)upscale_averaging_radius,
+                              upscale_averaging_radius,
                               (float)upscale_sigma_dist,
-                              upscale_sigma_color);
+                              (float)upscale_sigma_color);
 
     flow_inv = upscaleOpticalFlow(curr_rows,
                                   curr_cols,